PyPI - fusion-bench - Versions diffs - 0.2.9__py3-none-any.whl - Mend

fusion-bench 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (727) hide show

fusion_bench/method/opcm/weight_average.py ADDED Viewed

@@ -0,0 +1,120 @@
+import os
+import random
+import time
+from collections import defaultdict
+from copy import deepcopy
+from pathlib import Path
+from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, cast
+import lightning as L
+import numpy as np
+import torch
+from omegaconf import DictConfig
+from torch import Tensor, nn
+from tqdm.auto import tqdm
+from transformers import CLIPVisionModel
+from fusion_bench import BaseAlgorithm, BaseModelPool
+from fusion_bench.mixins import LightningFabricMixin
+from fusion_bench.taskpool import CLIPVisionModelTaskPool
+from fusion_bench.utils.json import load_from_json, save_to_json
+if TYPE_CHECKING:
+    from torch.utils.tensorboard import SummaryWriter
+class ContinualWeightAverageForCLIP(
+    BaseAlgorithm,
+    LightningFabricMixin,
+):
+    def __init__(
+        self,
+        shuffle_order: bool = True,
+        seed: Optional[int] = None,
+        save_on_every_step: bool = True,
+        evaluate_on_every_step: bool = False,
+        **kwargs,
+    ):
+        """
+        Continual Model Merging via Weight Average.
+        Args:
+            shuffle_order (bool): whether to shuffle the order of the models.
+            seed (Optional[int]): the seed to use.
+            save_on_every_step (bool): whether to save the merged model on every step.
+            evaluate_on_every_step (bool): whether to evaluate the merged model on every step.
+        """
+        self.shuffle_order = shuffle_order
+        self.seed = seed
+        self.save_on_every_step = save_on_every_step
+        self.evaluate_on_every_step = evaluate_on_every_step
+        super().__init__(**kwargs)
+    def run(self, modelpool: BaseModelPool):
+        if self.seed is not None:
+            L.seed_everything(self.seed)
+        model_names = modelpool.model_names
+        if self.shuffle_order:
+            random.shuffle(model_names)
+        self.taskpool = cast(CLIPVisionModelTaskPool, self._program.taskpool)
+        self._test_datasets = deepcopy(self.taskpool._test_datasets)
+        """Configuration for the test datasets"""
+        # log the model names
+        if self.log_dir is not None:
+            save_to_json(model_names, Path(self.log_dir) / "model_names.json")
+            tensorboard_summarywriter: "SummaryWriter" = self.tensorboard_summarywriter
+            tensorboard_summarywriter.add_text(
+                "global/model_names", str(model_names), global_step=0
+            )
+        # get the average model
+        merged_model = modelpool.load_model(model_names[0])
+        if self.evaluate_on_every_step:
+            self.taskpool._is_setup = False
+            self.taskpool._test_datasets = DictConfig(
+                {model_names[0]: self._test_datasets[model_names[0]]}
+            )
+            report = self.taskpool.evaluate(deepcopy(merged_model))
+            save_to_json(report, Path(self.log_dir) / "report_0.json")
+        if self.save_on_every_step:
+            self.save_merged_model(merged_model, 0)
+        for model_idx, model_name in tqdm(
+            enumerate(model_names[1:]), desc="Processing models"
+        ):
+            model_idx += 1
+            task_model = modelpool.load_model(model_name)
+            for param_name, param in task_model.named_parameters():
+                if not param.requires_grad:
+                    continue
+                task_param = param
+                merged_param = merged_model.get_parameter(param_name)
+                new_param = (merged_param * model_idx + task_param) / (model_idx + 1)
+                merged_model.get_parameter(param_name).data = new_param
+            if self.save_on_every_step:
+                self.save_merged_model(merged_model, model_idx)
+            if self.evaluate_on_every_step:
+                self.taskpool._is_setup = False
+                self.taskpool._test_datasets = DictConfig(
+                    {n: self._test_datasets[n] for n in model_names[: model_idx + 1]}
+                )
+                report = self.taskpool.evaluate(deepcopy(merged_model))
+                save_to_json(report, Path(self.log_dir) / f"report_{model_idx}.json")
+        return merged_model
+    def save_merged_model(self, merged_model: CLIPVisionModel, step: int):
+        os.makedirs(Path(self.log_dir) / "checkpoints", exist_ok=True)
+        merged_model.save_pretrained(
+            Path(self.log_dir) / "checkpoints" / f"merged_model_{step}"
+        )

fusion_bench/method/pruning/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+# flake8: noqa F401
+from .llama_magnitude_prune import MagnitudePruningForLlama
+from .llama_random_prune import RandomPruningForLlama
+from .llama_wanda_prune import WandaPruningForLlama
+from .magnitude_diff_pruning import MagnitudeDiffPruningAlgorithm

fusion_bench/method/pruning/llama_magnitude_prune.py ADDED Viewed

@@ -0,0 +1,202 @@
+from typing import Dict, Literal, Optional, Union
+import torch
+from torch import nn
+from tqdm.auto import tqdm
+from transformers import LlamaForCausalLM, LlamaModel
+from fusion_bench.method import BaseAlgorithm
+from fusion_bench.mixins.simple_profiler import SimpleProfilerMixin
+from fusion_bench.modelpool import CausalLMPool
+from fusion_bench.utils.dtype import parse_dtype
+from . import prune_utils
+def find_layers(module: nn.Module, layers=[nn.Linear], prefix=""):
+    """
+    Recursively find the layers of a certain type in a module.
+    Args:
+        module (nn.Module): PyTorch module.
+        layers (list): List of layer types to find.
+        prefix (str): A prefix to add to the layer names.
+    Returns:
+        dict: Dictionary of layers of the given type(s) within the module.
+    """
+    res = {}
+    for name, submodule in module.named_modules(prefix=prefix):
+        if isinstance(submodule, tuple(layers)):
+            res[name] = submodule
+    return res
+def compute_sparsity(model: Union[LlamaForCausalLM, LlamaModel]):
+    """
+    Compute the sparsity of the model by calculating the ratio of zero weights.
+    sparsity_ratio = number_of_zero_weights / number_of_all_weights
+    Args:
+        model (Union[LlamaForCausalLM, LlamaModel]): The model for which to compute sparsity.
+    Returns:
+        float: The sparsity ratio of the model.
+    """
+    if isinstance(model, LlamaForCausalLM):
+        layers = model.model.layers
+    elif isinstance(model, LlamaModel):
+        layers = model.layers
+    subset: Dict[str, nn.Linear] = find_layers(layers)
+    sparsity = 0
+    total = 0
+    for name in tqdm(subset, desc="Computing sparsity"):
+        sparsity += torch.sum(subset[name].weight == 0).item()
+        total += subset[name].weight.numel()
+    return sparsity / total
+def unstructured_magnitude_prune_(
+    model: Union[LlamaForCausalLM, LlamaModel], sparsity_ratio: float, dtype, device
+):
+    """
+    Apply unstructured magnitude pruning to the model.
+    Args:
+        model (Union[LlamaForCausalLM, LlamaModel]): The model to prune.
+        sparsity_ratio (float): The ratio of weights to prune.
+        dtype: The data type for the pruning process.
+        device: The device to perform the pruning on.
+    Returns:
+        Union[LlamaForCausalLM, LlamaModel]: The pruned model.
+    """
+    if isinstance(model, LlamaForCausalLM):
+        layers = model.model.layers
+    elif isinstance(model, LlamaModel):
+        layers = model.layers
+    subset: Dict[str, nn.Linear] = find_layers(layers)
+    for name in tqdm(subset, desc="Pruning"):
+        prune_utils.unstructured_magnitude_prune_(
+            subset[name].weight,
+            metric_function_or_scores=torch.abs,
+            sparsity_ratio=sparsity_ratio,
+            dtype=dtype,
+            device=device,
+        )
+    return model
+def semistructured_magnitude_prune_(
+    model: Union[LlamaForCausalLM, LlamaModel], n: int, m: int, dtype, device
+):
+    """
+    Apply semi-structured (N:M structured pruning) magnitude pruning to the model.
+    Args:
+        model (Union[LlamaForCausalLM, LlamaModel]): The model to prune.
+        n (int): The number of weights to keep in each group.
+        m (int): The total number of weights in each group.
+        dtype: The data type for the pruning process.
+        device: The device to perform the pruning on.
+    Returns:
+        Union[LlamaForCausalLM, LlamaModel]: The pruned model.
+    """
+    if isinstance(model, LlamaForCausalLM):
+        layers = model.model.layers
+    elif isinstance(model, LlamaModel):
+        layers = model.layers
+    subset: Dict[str, nn.Linear] = find_layers(layers)
+    for name in tqdm(subset, desc="Pruning"):
+        prune_utils.semistructured_magnitude_prune_(
+            subset[name].weight,
+            metric_function_or_scores=torch.abs,
+            n=n,
+            m=m,
+            dtype=dtype,
+            device=device,
+        )
+    return model
+class MagnitudePruningForLlama(BaseAlgorithm, SimpleProfilerMixin):
+    """
+    Implements magnitude-based pruning for LLama models.
+    This class supports both unstructured and semistructured pruning methods.
+    It loads a pre-trained model or the first model in the pool and applies the specified pruning technique.
+    Methods:
+        run(modelpool: LLamaForCausalLMPool) -> nn.Module:
+            Executes the pruning process on the model pool and returns the pruned model.
+    """
+    _config_mapping = BaseAlgorithm._config_mapping | {
+        "prune_type": "prune_type",
+        "device": "device",
+        "dtype": "dtype",
+        "sparsity_ratio": "sparsity_ratio",
+        "n": "n",
+        "m": "m",
+    }
+    def __init__(
+        self,
+        *,
+        prune_type: Literal["unstructured", "semistructured"],
+        device: str,
+        dtype: Optional[str],
+        sparsity_ratio: float,
+        n: int,
+        m: int,
+        **kwargs,
+    ):
+        self.prune_type = prune_type
+        self.device = device
+        self.dtype = dtype
+        self.sparsity_ratio = sparsity_ratio
+        self.n = n
+        self.m = m
+        super().__init__(**kwargs)
+    @torch.no_grad()
+    def run(self, modelpool: CausalLMPool):
+        """
+        Execute the pruning process on the first model from the given model pool.
+        Args:
+            modelpool (CausalLMPool): The model pool containing the models to prune.
+        Returns:
+            nn.Module: The pruned model.
+        """
+        config = self.config
+        # load pre-trained model or the first model in the pool
+        base_model = modelpool.load_pretrained_or_first_model()
+        dtype = parse_dtype(config.dtype)
+        device = torch.device(config.device)
+        if config.prune_type == "unstructured":
+            unstructured_magnitude_prune_(
+                base_model, config.sparsity_ratio, dtype=dtype, device=device
+            )
+        elif config.prune_type == "semistructured":
+            semistructured_magnitude_prune_(
+                base_model, config.n, config.m, dtype=dtype, device=device
+            )
+        else:
+            raise ValueError(
+                f"Invalid pruning type: {config.prune_type}"
+                "Choose from 'unstructured' or 'semistructured'"
+            )
+        return base_model

fusion_bench/method/pruning/llama_random_prune.py ADDED Viewed

@@ -0,0 +1,143 @@
+from typing import Dict, Literal, Optional, Union  # noqa: F401
+import torch
+from torch import nn
+from tqdm.auto import tqdm
+from transformers import LlamaForCausalLM, LlamaModel
+from fusion_bench.method import BaseAlgorithm
+from fusion_bench.mixins.simple_profiler import SimpleProfilerMixin
+from fusion_bench.modelpool import CausalLMPool
+from . import prune_utils
+from .prune_utils import PruningType, find_linear_layers
+def unstructured_magnitude_prune_(
+    model: Union[LlamaForCausalLM, LlamaModel], sparsity_ratio: float
+):
+    """
+    Perform unstructured magnitude pruning on the given model.
+    Args:
+        model (Union[LlamaForCausalLM, LlamaModel]): The model to be pruned.
+        sparsity_ratio (float): The ratio of weights to be pruned.
+    Returns:
+        The pruned model.
+    """
+    if isinstance(model, LlamaForCausalLM):
+        layers = model.model.layers
+    elif isinstance(model, LlamaModel):
+        layers = model.layers
+    subset: Dict[str, nn.Linear] = find_linear_layers(layers)
+    for name in tqdm(subset, desc="Pruning"):
+        prune_utils.unstructured_magnitude_prune_(
+            subset[name].weight,
+            metric_function_or_scores=torch.rand_like,
+            sparsity_ratio=sparsity_ratio,
+        )
+    return model
+def semistructured_magnitude_prune_(
+    model: Union[LlamaForCausalLM, LlamaModel], n: int, m: int
+):
+    """
+    Perform semi-structured (N:M structured) magnitude pruning on the given model.
+    Args:
+        model (Union[LlamaForCausalLM, LlamaModel]): The model to be pruned.
+        n (int): The number of weights to be pruned in each group.
+        m (int): The total number of weights in each group.
+    Returns:
+        The pruned model.
+    """
+    if isinstance(model, LlamaForCausalLM):
+        layers = model.model.layers
+    elif isinstance(model, LlamaModel):
+        layers = model.layers
+    subset: Dict[str, nn.Linear] = find_linear_layers(layers)
+    for name in tqdm(subset, desc="Pruning"):
+        prune_utils.semistructured_magnitude_prune_(
+            subset[name].weight,
+            metric_function_or_scores=torch.rand_like,
+            n=n,
+            m=m,
+        )
+    return model
+class RandomPruningForLlama(BaseAlgorithm, SimpleProfilerMixin):
+    """
+    A class to perform random pruning for Llama models.
+    Attributes:
+        prune_type (PruningType): The type of pruning to be performed.
+        sparsity_ratio (float): The ratio of weights to be pruned.
+        n (int): The number of weights to be pruned in each group (for semistructured pruning).
+        m (int): The total number of weights in each group (for semistructured pruning).
+    """
+    _config_mapping = BaseAlgorithm._config_mapping | {
+        "prune_type": "prune_type",
+        "sparsity_ratio": "sparsity_ratio",
+        "n": "n",
+        "m": "m",
+    }
+    def __init__(
+        self,
+        *,
+        prune_type: PruningType,
+        sparsity_ratio: float,
+        n: int,
+        m: int,
+        **kwargs,
+    ):
+        """
+        Initialize the RandomPruningForLlama class.
+        Args:
+            prune_type (PruningType): The type of pruning to be performed.
+            sparsity_ratio (float): The ratio of weights to be pruned.
+            n (int): The number of weights to be pruned in each group (for semistructured pruning).
+            m (int): The total number of weights in each group (for semistructured pruning).
+            **kwargs: Additional keyword arguments.
+        """
+        self.prune_type = prune_type
+        self.sparsity_ratio = sparsity_ratio
+        self.n = n
+        self.m = m
+        super().__init__(**kwargs)
+    @torch.no_grad()
+    def run(self, modelpool: CausalLMPool):
+        """
+        Run the pruning algorithm on the first model from the given model pool.
+        Args:
+            modelpool (CausalLMPool): The pool of models to be pruned.
+        Returns:
+            The pruned model.
+        """
+        # load pre-trained model or the first model in the pool
+        base_model = modelpool.load_pretrained_or_first_model()
+        if self.prune_type == PruningType.UNSTRUCTURED:
+            unstructured_magnitude_prune_(base_model, self.sparsity_ratio)
+        elif self.prune_type == PruningType.SEMISTRUCTURED:
+            semistructured_magnitude_prune_(base_model, self.n, self.m)
+        else:
+            raise ValueError(
+                f"Invalid pruning type: {self.prune_type}"
+                "Choose from 'unstructured' or 'semistructured'"
+            )
+        return base_model