PyPI - fusion-bench - Versions diffs - 0.2.9__py3-none-any.whl - Mend

fusion-bench 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (727) hide show

fusion_bench/method/pruning/wanda_utils/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""
+This module is modified from https://github.com/locuslab/wanda.
+It contains utility functions and classes for pruning neural network models using the Wanda method.
+The WANDA method is a weight pruning technique that aims to reduce the number of parameters in a neural network
+while maintaining its performance.
+"""

fusion_bench/method/pruning/wanda_utils/ablate.py ADDED Viewed

@@ -0,0 +1,188 @@
+import math
+import time
+import torch
+import torch.nn as nn
+import transformers
+torch.backends.cuda.matmul.allow_tf32 = False
+torch.backends.cudnn.allow_tf32 = False
+class AblateGPT:
+    def __init__(self, layer):
+        self.layer = layer
+        self.dev = self.layer.weight.device
+        W = layer.weight.data.clone()
+        if isinstance(self.layer, nn.Conv2d):
+            W = W.flatten(1)
+        if isinstance(self.layer, transformers.Conv1D):
+            W = W.t()
+        self.rows = W.shape[0]
+        self.columns = W.shape[1]
+        self.H = torch.zeros((self.columns, self.columns), device=self.dev)
+        self.nsamples = 0
+        self.scaler_row = torch.zeros((self.columns), device=self.dev)
+    def add_batch(self, inp, out):
+        if len(inp.shape) == 2:
+            inp = inp.unsqueeze(0)
+        tmp = inp.shape[0]
+        if isinstance(self.layer, nn.Linear) or isinstance(
+            self.layer, transformers.Conv1D
+        ):
+            if len(inp.shape) == 3:
+                inp = inp.reshape((-1, inp.shape[-1]))
+            inp = inp.t()
+        self.H *= self.nsamples / (self.nsamples + tmp)
+        self.scaler_row *= self.nsamples / (self.nsamples + tmp)
+        self.nsamples += tmp
+        inp = math.sqrt(2 / self.nsamples) * inp.float()
+        self.H += inp.matmul(inp.t())
+        self.scaler_row += torch.norm(inp, p=2, dim=1) ** 2 / self.nsamples
+    def get_wanda_mask(self, sparsity, prunen, prunem):
+        W_metric = torch.abs(self.layer.weight.data) * torch.sqrt(
+            self.scaler_row.reshape((1, -1))
+        )
+        W_mask = torch.zeros_like(W_metric) == 1  ## initialize a mask to be all False
+        if prunen != 0:
+            for ii in range(W_metric.shape[1]):
+                if ii % prunem == 0:
+                    tmp = W_metric[:, ii : (ii + prunem)].float()
+                    W_mask.scatter_(
+                        1, ii + torch.topk(tmp, prunen, dim=1, largest=False)[1], True
+                    )
+        else:
+            sort_res = torch.sort(W_metric, dim=-1, stable=True)
+            indices = sort_res[1][:, : int(W_metric.shape[1] * sparsity)]
+            W_mask.scatter_(1, indices, True)
+        return W_mask
+    def get_mag_mask(self, sparsity, prunen, prunem):
+        W = self.layer.weight.data
+        W_metric = torch.abs(W)
+        if prunen != 0:
+            W_mask = torch.zeros_like(W) == 1
+            for ii in range(W_metric.shape[1]):
+                if ii % prunem == 0:
+                    tmp = W_metric[:, ii : (ii + prunem)].float()
+                    W_mask.scatter_(
+                        1, ii + torch.topk(tmp, prunen, dim=1, largest=False)[1], True
+                    )
+        else:
+            thresh = torch.sort(W_metric.flatten().cuda())[0][
+                int(W.numel() * sparsity)
+            ].cpu()
+            W_mask = W_metric <= thresh
+        return W_mask
+    def fasterprune(
+        self,
+        args,
+        sparsity,
+        mask=None,
+        prune_n=0,
+        prune_m=0,
+        blocksize=128,
+        percdamp=0.01,
+    ):
+        W = self.layer.weight.data.clone()
+        if isinstance(self.layer, nn.Conv2d):
+            W = W.flatten(1)
+        if isinstance(self.layer, transformers.Conv1D):
+            W = W.t()
+        W = W.float()
+        tick = time.time()
+        H = self.H
+        del self.H
+        dead = torch.diag(H) == 0
+        H[dead, dead] = 1
+        W[:, dead] = 0
+        Losses = torch.zeros(self.rows, device=self.dev)
+        damp = percdamp * torch.mean(torch.diag(H))
+        diag = torch.arange(self.columns, device=self.dev)
+        H[diag, diag] += damp
+        H = torch.linalg.cholesky(H)
+        H = torch.cholesky_inverse(H)
+        H = torch.linalg.cholesky(H, upper=True)
+        Hinv = H
+        for i1 in range(0, self.columns, blocksize):
+            i2 = min(i1 + blocksize, self.columns)
+            count = i2 - i1
+            W1 = W[:, i1:i2].clone()
+            Q1 = torch.zeros_like(W1)
+            Err1 = torch.zeros_like(W1)
+            Losses1 = torch.zeros_like(W1)
+            Hinv1 = Hinv[i1:i2, i1:i2]
+            if prune_n == 0 or mask is not None:
+                if mask is not None:
+                    mask1 = mask[:, i1:i2]
+                else:
+                    # tmp = W1 ** 2 / (torch.diag(Hinv1).reshape((1, -1))) ** 2
+                    if "wanda" in args.prune_method:
+                        tmp = torch.abs(W1) * torch.sqrt(
+                            self.scaler_row[i1:i2].reshape((1, -1))
+                        )
+                    elif "mag" in args.prune_method:
+                        tmp = torch.abs(W1)
+                    thresh = torch.sort(tmp.flatten())[0][int(tmp.numel() * sparsity)]
+                    mask1 = tmp <= thresh
+            else:
+                mask1 = torch.zeros_like(W1) == 1
+            for i in range(count):
+                w = W1[:, i]
+                d = Hinv1[i, i]
+                if prune_n != 0 and i % prune_m == 0 and mask is None:
+                    # tmp = W1[:, i:(i + prune_m)] ** 2 / (torch.diag(Hinv1)[i:(i + prune_m)].reshape((1, -1))) ** 2
+                    if "wanda" in args.prune_method:
+                        tmp = torch.abs(W1[:, i : (i + prune_m)]) * torch.sqrt(
+                            self.scaler_row[(i + i1) : (i + i1 + prune_m)].reshape(
+                                (1, -1)
+                            )
+                        )
+                    elif "mag" in args.prune_method:
+                        tmp = torch.abs(W1[:, i : (i + prune_m)])
+                    mask1.scatter_(
+                        1, i + torch.topk(tmp, prune_n, dim=1, largest=False)[1], True
+                    )
+                q = w.clone()
+                q[mask1[:, i]] = 0
+                Q1[:, i] = q
+                Losses1[:, i] = (w - q) ** 2 / d**2
+                err1 = (w - q) / d
+                W1[:, i:] -= err1.unsqueeze(1).matmul(Hinv1[i, i:].unsqueeze(0))
+                Err1[:, i] = err1
+            W[:, i1:i2] = Q1
+            Losses += torch.sum(Losses1, 1) / 2
+            W[:, i2:] -= Err1.matmul(Hinv[i1:i2, i2:])
+        torch.cuda.synchronize()
+        if isinstance(self.layer, transformers.Conv1D):
+            W = W.t()
+        self.layer.weight.data = W.reshape(self.layer.weight.shape).to(
+            self.layer.weight.data.dtype
+        )
+    def free(self):
+        self.H = None
+        torch.cuda.empty_cache()

fusion_bench/method/pruning/wanda_utils/data.py ADDED Viewed

@@ -0,0 +1,135 @@
+# Code adapted from https://github.com/IST-DASLab/sparsegpt/blob/master/datautils.py
+import random
+from typing import List, Optional, Tuple, cast  # noqa: F401
+from datasets import load_dataset
+from torch import Tensor
+from tqdm.auto import tqdm
+from transformers import PreTrainedTokenizer
+# Wrapper for tokenized input IDs
+class TokenizerWrapper:
+    def __init__(self, input_ids):
+        self.input_ids = input_ids
+# Load and process wikitext2 dataset
+def get_wikitext2(
+    nsamples: int,
+    seed: int,
+    seqlen: int,
+    tokenizer: PreTrainedTokenizer,
+    data_path: str = "wikitext",
+):
+    """
+    Load and preprocess the Wikitext-2 dataset for training and testing.
+    Args:
+        nsamples (int): Number of samples to generate from the training set.
+        seed (int): Random seed for reproducibility.
+        seqlen (int): Length of the sequence to be used for training.
+        tokenizer (PreTrainedTokenizer): Tokenizer to encode the text data.
+        data_path (str, optional): Path to the dataset. Defaults to "wikitext".
+    """
+    # Load train and test datasets
+    traindata = load_dataset(data_path, "wikitext-2-raw-v1", split="train")
+    testdata = load_dataset(data_path, "wikitext-2-raw-v1", split="test")
+    # Encode datasets
+    trainenc = tokenizer(" ".join(traindata["text"]), return_tensors="pt")
+    testenc = tokenizer("\n\n".join(testdata["text"]), return_tensors="pt")
+    # Generate samples from training set
+    random.seed(seed)
+    trainloader: List[Tuple[Tensor, Tensor]] = []
+    for _ in tqdm(range(nsamples), desc="Generating samples"):
+        i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
+        j = i + seqlen
+        inp: Tensor = trainenc.input_ids[:, i:j]
+        tar = inp.clone()
+        tar[:, :-1] = -100
+        trainloader.append((inp, tar))
+    return trainloader, testenc
+# Load and process c4 dataset
+def get_c4(
+    nsamples: int,
+    seed: int,
+    seqlen: int,
+    tokenizer,
+    data_path: str = "allenai/c4",
+) -> Tuple[List[Tuple[Tensor, Tensor]], TokenizerWrapper]:
+    """
+    Load and process the c4 dataset.
+    Args:
+        nsamples (int): Number of samples to generate from the training set.
+        seed (int): Seed for random number generation.
+        seqlen (int): Length of each sequence.
+        tokenizer: Tokenizer object for encoding the text.
+        data_path (str, optional): Path to the c4 dataset. Defaults to "allenai/c4".
+    Returns:
+        tuple (Tuple[List[Tuple[Tensor, Tensor]], TokenizerWrapper]): Tuple containing the training samples and the validation dataset.
+    """
+    # Load train and validation datasets
+    traindata = load_dataset(
+        data_path,
+        # "allenai--c4", # https://github.com/huggingface/datasets/issues/6559
+        data_files={"train": "en/c4-train.00000-of-01024.json.gz"},
+        split="train",
+    )
+    valdata = load_dataset(
+        data_path,
+        # "allenai--c4",
+        data_files={"validation": "en/c4-validation.00000-of-00008.json.gz"},
+        split="validation",
+    )
+    # Generate samples from training set
+    if seed is not None:
+        random.seed(seed)
+    trainloader = []
+    for _ in tqdm(range(nsamples), desc="Generating samples"):
+        while True:
+            i = random.randint(0, len(traindata) - 1)
+            trainenc = tokenizer(traindata[i]["text"], return_tensors="pt")
+            if trainenc.input_ids.shape[1] > seqlen:
+                break
+        i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
+        j = i + seqlen
+        inp = trainenc.input_ids[:, i:j]
+        tar = inp.clone()
+        tar[:, :-1] = -100
+        trainloader.append((inp, tar))
+    # Prepare validation dataset
+    valenc = tokenizer(" ".join(valdata[:1100]["text"]), return_tensors="pt")
+    valenc = valenc.input_ids[:, : (256 * seqlen)]
+    valenc = TokenizerWrapper(valenc)
+    return trainloader, valenc
+# Function to select the appropriate loader based on dataset name
+def get_loaders(
+    name: str, nsamples: int = 128, seed: int = 0, seqlen: int = 2048, tokenizer=None
+):
+    """
+    Get the data loaders for the specified dataset.
+    Args:
+        name (str): The name of the dataset. Supported values are "wikitext2" and "c4".
+        nsamples (int, optional): Number of samples to generate from the dataset. Defaults to 128.
+        seed (int, optional): Random seed for reproducibility. Defaults to 0.
+        seqlen (int, optional): Length of the sequence to be used for training. Defaults to 2048.
+        tokenizer (optional): Tokenizer to encode the text data. Defaults to None.
+    """
+    if "wikitext2" in name:
+        return get_wikitext2(nsamples, seed, seqlen, tokenizer)
+    if "c4" in name:
+        return get_c4(nsamples, seed, seqlen, tokenizer)
+    raise ValueError(f"Unknown dataset: {name}")

fusion_bench/method/pruning/wanda_utils/eval.py ADDED Viewed

@@ -0,0 +1,245 @@
+# Import necessary modules
+import fnmatch
+import time
+from collections import defaultdict
+import torch
+import torch.nn as nn
+# Import get_loaders function from data module within the same directory
+from .data import get_loaders
+# Function to evaluate perplexity (ppl) on a specified model and tokenizer
+def eval_ppl(model, tokenizer, device=torch.device("cuda:0")):
+    """
+    Evaluate wikitext-2 perplexity (ppl) on a specified model and tokenizer.
+    Args:
+        model: The model to evaluate.
+        tokenizer: The tokenizer to use.
+        device: The device to run the evaluation on.
+    Returns:
+        ppl_test: The perplexity of the model on the test dataset.
+    """
+    # Set dataset
+    dataset = "wikitext2"
+    # Print status
+    print(f"evaluating on {dataset}")
+    # Get the test loader
+    _, testloader = get_loaders(
+        dataset, seed=0, seqlen=model.seqlen, tokenizer=tokenizer
+    )
+    # Evaluate ppl in no grad context to avoid updating the model
+    with torch.no_grad():
+        ppl_test = eval_ppl_wikitext(model, testloader, 1, device)
+    return ppl_test
+# Function to evaluate perplexity (ppl) specifically on the wikitext dataset
+def eval_ppl_wikitext_train(model, trainloader, bs=1, device=None):
+    """
+    Evaluate perplexity (ppl) specifically on the wikitext dataset during training.
+    Args:
+        model: The model to evaluate.
+        trainloader: The training data loader.
+        bs: Batch size.
+        device: The device to run the evaluation on.
+    Returns:
+        ppl: The perplexity of the model on the training dataset.
+    """
+    # Calculate number of samples
+    nsamples = len(trainloader)
+    # List to store negative log likelihoods
+    nlls = []
+    print(f"nsamples {nsamples}")
+    # Loop through each batch
+    for i in range(0, nsamples, bs):
+        if i % 50 == 0:
+            print(f"sample {i}")
+        # Calculate end index
+        j = min(i + bs, nsamples)
+        # Prepare inputs and move to device
+        inputs = trainloader[i][0].to(device)
+        inputs = inputs.reshape(j - i, model.seqlen)
+        # Forward pass through the model
+        lm_logits = model(inputs).logits
+        # Shift logits and labels for next token prediction
+        shift_logits = lm_logits[:, :-1, :].contiguous()
+        shift_labels = inputs[:, 1:]
+        # Compute loss
+        loss_fct = nn.CrossEntropyLoss()
+        loss = loss_fct(
+            shift_logits.reshape(-1, shift_logits.size(-1)), shift_labels.reshape(-1)
+        )
+        # Calculate negative log likelihood
+        neg_log_likelihood = loss.float() * model.seqlen * (j - i)
+        # Append to list of negative log likelihoods
+        nlls.append(neg_log_likelihood)
+    # Compute perplexity
+    ppl = torch.exp(torch.stack(nlls).sum() / (nsamples * model.seqlen))
+    # Empty CUDA cache to save memory
+    torch.cuda.empty_cache()
+    return ppl.item()
+# Function to evaluate perplexity (ppl) specifically on the wikitext dataset
+def eval_ppl_wikitext(model, testenc, bs: int = 1, device=None):
+    """
+    Evaluate perplexity (ppl) specifically on the wikitext dataset.
+    Args:
+        model: The model to evaluate.
+        testenc: The test data encoder.
+        bs: Batch size.
+        device: The device to run the evaluation on.
+    Returns:
+        ppl: The perplexity of the model on the test dataset.
+    """
+    # Get input IDs
+    testenc = testenc.input_ids
+    # Calculate number of samples
+    nsamples = testenc.numel() // model.seqlen
+    # List to store negative log likelihoods
+    nlls = []
+    print(f"nsamples {nsamples}")
+    # Loop through each batch
+    for i in range(0, nsamples, bs):
+        if i % 50 == 0:
+            print(f"sample {i}")
+        # Calculate end index
+        j = min(i + bs, nsamples)
+        # Prepare inputs and move to device
+        inputs = testenc[:, (i * model.seqlen) : (j * model.seqlen)].to(device)
+        inputs = inputs.reshape(j - i, model.seqlen)
+        # Forward pass through the model
+        lm_logits = model(inputs).logits
+        # Shift logits and labels for next token prediction
+        shift_logits = lm_logits[:, :-1, :].contiguous()
+        shift_labels = inputs[:, 1:]
+        # Compute loss
+        loss_fct = nn.CrossEntropyLoss()
+        loss = loss_fct(
+            shift_logits.reshape(-1, shift_logits.size(-1)), shift_labels.reshape(-1)
+        )
+        # Calculate negative log likelihood
+        neg_log_likelihood = loss.float() * model.seqlen * (j - i)
+        # Append to list of negative log likelihoods
+        nlls.append(neg_log_likelihood)
+    # Compute perplexity
+    ppl = torch.exp(torch.stack(nlls).sum() / (nsamples * model.seqlen))
+    # Empty CUDA cache to save memory
+    torch.cuda.empty_cache()
+    return ppl.item()
+def eval_zero_shot(
+    model_name,
+    model,
+    tokenizer,
+    task_list=[
+        "boolq",
+        "rte",
+        "hellaswag",
+        "winogrande",
+        "arc_challenge",
+        "arc_easy",
+        "openbookqa",
+    ],
+    num_fewshot=0,
+    use_accelerate=False,
+    add_special_tokens=False,
+):
+    """
+    Evaluate the model on a list of tasks in a zero-shot setting.
+    Args:
+        model_name: The name of the model.
+        model: The model to evaluate.
+        tokenizer: The tokenizer to use.
+        task_list: List of tasks to evaluate on.
+        num_fewshot: Number of few-shot examples.
+        use_accelerate: Whether to use the accelerate library.
+        add_special_tokens: Whether to add special tokens.
+    Returns:
+        results: The evaluation results.
+    """
+    from lm_eval import evaluator, tasks
+    def pattern_match(patterns, source_list):
+        """
+        Match patterns in the source list.
+        Args:
+            patterns: List of patterns to match.
+            source_list: List of source items.
+        Returns:
+            task_names: List of matched task names.
+        """
+        task_names = set()
+        for pattern in patterns:
+            for matching in fnmatch.filter(source_list, pattern):
+                task_names.add(matching)
+        return list(task_names)
+    task_names = pattern_match(task_list, tasks.ALL_TASKS)
+    model_args = f"pretrained={model_name},cache_dir=./llm_weights"
+    limit = None
+    if "70b" in model_name or "65b" in model_name:
+        limit = 2000
+    if use_accelerate:
+        model_args = (
+            f"pretrained={model_name},cache_dir=./llm_weights,use_accelerate=True"
+        )
+    results = evaluator.simple_evaluate(
+        model="hf-causal-experimental",
+        model_args=model_args,
+        tasks=task_names,
+        num_fewshot=num_fewshot,
+        batch_size=None,
+        device=None,
+        no_cache=True,
+        limit=limit,
+        description_dict={},
+        decontamination_ngrams_path=None,
+        check_integrity=False,
+        pretrained_model=model,
+        tokenizer=tokenizer,
+        add_special_tokens=add_special_tokens,
+    )
+    return results

fusion_bench/method/pruning/wanda_utils/layerwrapper.py ADDED Viewed

@@ -0,0 +1,61 @@
+import torch
+import torch.nn as nn
+# Define WrappedGPT class
+class WrappedGPT:
+    """
+    This class wraps a GPT layer for specific operations.
+    Attributes:
+        layer (nn.Linear | nn.Module): The GPT layer to be wrapped.
+        dev (torch.device): The device on which the layer's weights are stored.
+        rows (int): The number of rows in the layer's weight matrix.
+        columns (int): The number of columns in the layer's weight matrix.
+        scaler_row (torch.Tensor): A tensor to store the scaler values for each column.
+        nsamples (int): The number of samples processed.
+        layer_id (int): The ID of the layer.
+        layer_name (str): The name of the layer.
+    """
+    def __init__(self, layer: nn.Linear | nn.Module, layer_id=0, layer_name="none"):
+        """
+        Initialize the WrappedGPT class.
+        Args:
+            layer (nn.Linear | nn.Module): The GPT layer to be wrapped.
+            layer_id (int, optional): The ID of the layer. Defaults to 0.
+            layer_name (str, optional): The name of the layer. Defaults to "none".
+        """
+        self.layer = layer
+        self.dev = self.layer.weight.device
+        self.rows = layer.weight.data.shape[0]
+        self.columns = layer.weight.data.shape[1]
+        self.scaler_row = torch.zeros((self.columns), device=self.dev)
+        self.nsamples = 0
+        self.layer_id = layer_id
+        self.layer_name = layer_name
+    def add_batch(self, inp: torch.Tensor, out: torch.Tensor):
+        """
+        Add a batch of input and output tensors to the scaler_row.
+        Args:
+            inp (torch.Tensor): The input tensor.
+            out (torch.Tensor): The output tensor.
+        """
+        if len(inp.shape) == 2:
+            inp = inp.unsqueeze(0)
+        tmp = inp.shape[0]
+        if isinstance(self.layer, nn.Linear):
+            if len(inp.shape) == 3:
+                inp = inp.reshape((-1, inp.shape[-1]))
+            inp = inp.t()
+        self.scaler_row *= self.nsamples / (self.nsamples + tmp)
+        self.nsamples += tmp
+        inp = inp.type(torch.float32)
+        self.scaler_row += torch.norm(inp, p=2, dim=1) ** 2 / self.nsamples