PyPI - fusion-bench - Versions diffs - 0.2.9__py3-none-any.whl - Mend

fusion-bench 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (727) hide show

fusion_bench/metrics/continual_learning/backward_transfer.py ADDED Viewed

@@ -0,0 +1,22 @@
+from typing import Dict
+import numpy as np
+def compute_backward_transfer(
+    acc_Ti: Dict[str, float], acc_ii: Dict[str, float]
+) -> float:
+    R"""
+    Compute the backward transfer (BWT) of a model on a set of tasks.
+    Equation:
+        BWT = \frac{1}{n} \sum_{k=1}^{n} (acc_{Ti}[k] - acc_{ii}[k])
+    Returns:
+        float: The backward transfer of the model.
+    """
+    assert set(acc_ii.keys()) == set(acc_Ti.keys())
+    bwt = 0
+    for task_name in acc_ii:
+        bwt += acc_Ti[task_name] - acc_ii[task_name]
+    return bwt / len(acc_ii)

fusion_bench/metrics/nyuv2/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+from .depth import DepthMetric
+from .noise import NoiseMetric
+from .normal import NormalMetric
+from .segmentation import SegmentationMertic
+metric_classes = {
+    "segmentation": SegmentationMertic,
+    "depth": DepthMetric,
+    "normal": NormalMetric,
+    "noise": NoiseMetric,
+}

fusion_bench/metrics/nyuv2/depth.py ADDED Viewed

@@ -0,0 +1,45 @@
+from typing import List, cast
+import numpy as np
+import torch
+from torch import Tensor, nn
+from torchmetrics import Metric
+class DepthMetric(Metric):
+    metric_names = ["abs_err", "rel_err"]
+    def __init__(self):
+        super().__init__()
+        self.add_state("abs_record", default=[], dist_reduce_fx="cat")
+        self.add_state("rel_record", default=[], dist_reduce_fx="cat")
+        self.add_state("batch_size", default=[], dist_reduce_fx="cat")
+    def reset(self):
+        self.abs_record = []
+        self.rel_record = []
+        self.batch_size = []
+    def update(self, preds: Tensor, target: Tensor):
+        binary_mask = (torch.sum(target, dim=1) != 0).unsqueeze(1)
+        preds = preds.masked_select(binary_mask)
+        target = target.masked_select(binary_mask)
+        abs_err = torch.abs(preds - target)
+        rel_err = torch.abs(preds - target) / target
+        abs_err = torch.sum(abs_err) / torch.nonzero(binary_mask, as_tuple=False).size(
+            0
+        )
+        rel_err = torch.sum(rel_err) / torch.nonzero(binary_mask, as_tuple=False).size(
+            0
+        )
+        self.abs_record.append(abs_err)
+        self.rel_record.append(rel_err)
+        self.batch_size.append(torch.asarray(preds.size(0), device=preds.device))
+    def compute(self):
+        records = torch.stack(
+            [torch.stack(self.abs_record), torch.stack(self.rel_record)]
+        )
+        batch_size = torch.stack(self.batch_size)
+        return [(records[i] * batch_size).sum() / batch_size.sum() for i in range(2)]

fusion_bench/metrics/nyuv2/loss.py ADDED Viewed

@@ -0,0 +1,31 @@
+import torch
+from torch import Tensor, nn
+def segmentation_loss(pred: Tensor, gt: Tensor):
+    return nn.functional.cross_entropy(pred, gt.long(), ignore_index=-1)
+def depth_loss(pred: Tensor, gt: Tensor):
+    binary_mask = (torch.sum(gt, dim=1) != 0).float().unsqueeze(1).to(pred.device)
+    loss = torch.sum(torch.abs(pred - gt) * binary_mask) / torch.nonzero(
+        binary_mask, as_tuple=False
+    ).size(0)
+    return loss
+def normal_loss(pred: Tensor, gt: Tensor):
+    # gt has been normalized on the NYUv2 dataset
+    pred = pred / torch.norm(pred, p=2, dim=1, keepdim=True)
+    binary_mask = (torch.sum(gt, dim=1) != 0).float().unsqueeze(1).to(pred.device)
+    loss = 1 - torch.sum((pred * gt) * binary_mask) / torch.nonzero(
+        binary_mask, as_tuple=False
+    ).size(0)
+    return loss
+loss_fn = {
+    "segmentation": segmentation_loss,
+    "depth": depth_loss,
+    "normal": normal_loss,
+}

fusion_bench/metrics/nyuv2/noise.py ADDED Viewed

@@ -0,0 +1,16 @@
+from typing import List, cast
+import torch
+from torch import Tensor, nn
+from torchmetrics import Metric
+class NoiseMetric(Metric):
+    def __init__(self):
+        super().__init__()
+    def update(self, preds: Tensor, target: Tensor):
+        pass
+    def compute(self):
+        return [1]

fusion_bench/metrics/nyuv2/normal.py ADDED Viewed

@@ -0,0 +1,48 @@
+from typing import List, cast
+import numpy as np
+import torch
+from torch import Tensor, nn
+from torchmetrics import Metric
+class NormalMetric(Metric):
+    metric_names = ["mean", "median", "<11.25", "<22.5", "<30"]
+    def __init__(self):
+        super(NormalMetric, self).__init__()
+        self.add_state("record", default=[], dist_reduce_fx="cat")
+    def update(self, preds, target):
+        # gt has been normalized on the NYUv2 dataset
+        preds = preds / torch.norm(preds, p=2, dim=1, keepdim=True)
+        binary_mask = torch.sum(target, dim=1) != 0
+        error = (
+            torch.acos(
+                torch.clamp(
+                    torch.sum(preds * target, 1).masked_select(binary_mask), -1, 1
+                )
+            )
+            .detach()
+            .cpu()
+            .numpy()
+        )
+        error = np.degrees(error)
+        self.record.append(torch.from_numpy(error))
+    def compute(self):
+        """
+        returns mean, median, and percentage of pixels with error less than 11.25, 22.5, and 30 degrees ("mean", "median", "<11.25", "<22.5", "<30")
+        """
+        if self.record is None:
+            return torch.asarray([0.0, 0.0, 0.0, 0.0, 0.0])
+        records = torch.concatenate(self.record)
+        return [
+            torch.mean(records),
+            torch.median(records),
+            torch.mean((records < 11.25) * 1.0),
+            torch.mean((records < 22.5) * 1.0),
+            torch.mean((records < 30) * 1.0),
+        ]

fusion_bench/metrics/nyuv2/segmentation.py ADDED Viewed

@@ -0,0 +1,43 @@
+from typing import List, cast
+import torch
+from torch import Tensor, nn
+from torchmetrics import Metric
+class SegmentationMertic(Metric):
+    metric_names = ["mIoU", "pixAcc"]
+    def __init__(self, num_classes=13):
+        super().__init__()
+        self.num_classes = num_classes
+        self.add_state(
+            "record",
+            default=torch.zeros(
+                (self.num_classes, self.num_classes), dtype=torch.int64
+            ),
+            dist_reduce_fx="sum",
+        )
+    def reset(self):
+        self.record.zero_()
+    def update(self, preds: Tensor, target: Tensor):
+        preds = preds.softmax(1).argmax(1).flatten()
+        target = target.long().flatten()
+        k = (target >= 0) & (target < self.num_classes)
+        inds = self.num_classes * target[k].to(torch.int64) + preds[k]
+        self.record += torch.bincount(inds, minlength=self.num_classes**2).reshape(
+            self.num_classes, self.num_classes
+        )
+    def compute(self):
+        """
+        return mIoU and pixel accuracy
+        """
+        h = cast(Tensor, self.record).float()
+        iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))
+        acc = torch.diag(h).sum() / h.sum()
+        return [torch.mean(iu), acc]

fusion_bench/metrics/text_to_image_generation/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""
+In this module, we implement some metrics for text-to-image generation tasks.
+Including reward functions for alignment and Reinforcement Learning with Human Feedback training (RLHF).
+"""
+# flake8: noqa F401
+from .aesthetic_scorer import aesthetic_scorer
+from .compressibility import jpeg_compressibility_scorer, jpeg_incompressibility_scorer
+from .pickscore_scorer import pickscore_scorer

fusion_bench/metrics/text_to_image_generation/aesthetic_scorer.py ADDED Viewed

@@ -0,0 +1,123 @@
+import os
+from typing import cast
+import torch
+from huggingface_hub import hf_hub_download
+from huggingface_hub.utils import EntryNotFoundError
+from torch import Tensor, nn
+from transformers import CLIPModel, CLIPProcessor
+from trl.import_utils import is_npu_available, is_xpu_available
+class MLP(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.layers = nn.Sequential(
+            nn.Linear(768, 1024),
+            nn.Dropout(0.2),
+            nn.Linear(1024, 128),
+            nn.Dropout(0.2),
+            nn.Linear(128, 64),
+            nn.Dropout(0.1),
+            nn.Linear(64, 16),
+            nn.Linear(16, 1),
+        )
+    @torch.no_grad()
+    def forward(self, embed: Tensor) -> Tensor:
+        """
+        Forward pass through the MLP. The return value is a single scalar.
+        """
+        return self.layers(embed)
+class AestheticScorer(torch.nn.Module):
+    """
+    This model attempts to predict the aesthetic score of an image. The aesthetic score
+    is a numerical approximation of how much a specific image is liked by humans on average.
+    This is from https://github.com/christophschuhmann/improved-aesthetic-predictor
+    Note for `model_id` and `model_filename`:
+        In some implementation, the filename of the MLP model is 'sac+logos+ava1-l14-linearMSE.pth',
+        which is the same as the default value of the 'model_filename' parameter in the constructor ('aesthetic-model.pth').
+        It was simply renamed to 'aesthetic-model.pth' in the implementation.
+        see https://huggingface.co/trl-lib/ddpo-aesthetic-predictor/commit/7f639699bec8126062148a47ecb1a4312d8e6688
+    """
+    def __init__(
+        self,
+        *,
+        dtype: torch.dtype,
+        model_id: str = "trl-lib/ddpo-aesthetic-predictor",
+        model_filename: str = "aesthetic-model.pth",
+    ):
+        """
+        Initialize the AestheticScorer class.
+        Args:
+            dtype (torch.dtype): The data type of the tensors.
+            model_id (str, optional): The ID of the model to download. Defaults to "trl-lib/ddpo-aesthetic-predictor".
+            model_filename (str, optional): The filename of the model to download. Defaults to "aesthetic-model.pth". This is the same as 'sac+logos+ava1-l14-linearMSE.pth' in some implementations.
+        """
+        super().__init__()
+        self.clip = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
+        self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
+        self.mlp = MLP()
+        try:
+            cached_path = hf_hub_download(model_id, model_filename)
+        except EntryNotFoundError:
+            cached_path = os.path.join(model_id, model_filename)
+        state_dict = torch.load(cached_path, map_location=torch.device("cpu"))
+        self.mlp.load_state_dict(state_dict)
+        self.dtype = dtype
+        self.eval()
+    @torch.no_grad()
+    def __call__(self, images: Tensor) -> Tensor:
+        """
+        Process the given images and return their aesthetic scores.
+        This method processes the images using the CLIP model, normalizes the embeddings,
+        and then passes them through a MLP to get the aesthetic scores.
+        Args:
+            images (torch.Tensor): A batch of images to process.
+        Returns:
+            Tensor: The aesthetic scores of the images. Return shape is (batch_size,).
+        """
+        device = next(self.parameters()).device
+        inputs = self.processor(images=images, return_tensors="pt")
+        inputs = {
+            k: cast(Tensor, v).to(self.dtype).to(device) for k, v in inputs.items()
+        }
+        embed = self.clip.get_image_features(**inputs)
+        # normalize embedding
+        embed = embed / torch.linalg.vector_norm(embed, dim=-1, keepdim=True)
+        return self.mlp(embed).squeeze(1)
+def aesthetic_scorer(
+    dtype: torch.dtype = torch.float32,
+    hub_model_id: str = "trl-lib/ddpo-aesthetic-predictor",
+    model_filename: str = "aesthetic-model.pth",
+):
+    scorer = AestheticScorer(
+        dtype=dtype,
+        model_id=hub_model_id,
+        model_filename=model_filename,
+    )
+    if is_npu_available():
+        scorer = scorer.npu()
+    elif is_xpu_available():
+        scorer = scorer.xpu()
+    else:
+        scorer = scorer.cuda()
+    def _fn(images: Tensor, prompts, metadata):
+        images = (images * 255).round().clamp(0, 255).to(torch.uint8)
+        scores: Tensor = scorer(images)
+        return scores, {}
+    return _fn

fusion_bench/metrics/text_to_image_generation/compressibility.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""
+In this script, two reward functions are defined:
+- compressibility, in which the file size of the image after JPEG compression is minimized
+- incompressibility, in which the same measure is maximized.
+"""
+import io
+from typing import List
+import numpy as np
+import torch
+from PIL import Image
+def jpeg_incompressibility_scorer():
+    """
+    Function to calculate the incompressibility score of an image.
+    The score is calculated based on the size of the image after JPEG compression.
+    The larger the size, the higher the incompressibility score.
+    """
+    def _fn(images: torch.Tensor, prompts, metadata):
+        if isinstance(images, torch.Tensor):
+            images = (images * 255).round().clamp(0, 255).to(torch.uint8).cpu().numpy()
+            images = images.permute(0, 2, 3, 1)  # NCHW -> NHWC
+        images: List[Image.Image] = [Image.fromarray(image) for image in images]
+        buffers = [io.BytesIO() for _ in images]
+        for image, buffer in zip(images, buffers):
+            image.save(buffer, format="JPEG", quality=95)
+        sizes = [buffer.tell() / 1000 for buffer in buffers]
+        return torch.asarray(sizes), {}
+    return _fn
+def jpeg_compressibility_scorer():
+    """
+    Function to calculate the compressibility score of an image.
+    The score is calculated based on the size of the image after JPEG compression.
+    The smaller the size, the higher the compressibility score.
+    """
+    jpeg_fn = jpeg_incompressibility_scorer()
+    def _fn(images: torch.Tensor, prompts, metadata):
+        reward, metadata = jpeg_fn(images, prompts, metadata)
+        return -reward, metadata
+    return _fn

fusion_bench/metrics/text_to_image_generation/pickscore_scorer.py ADDED Viewed

@@ -0,0 +1,95 @@
+import os
+from typing import List, cast
+import torch
+from transformers import AutoModel, CLIPModel, CLIPProcessor
+from trl.import_utils import is_npu_available, is_xpu_available
+class PickScoreScorer(torch.nn.Module):
+    """
+    References:
+        - Pick-a-Pic: An Open Dataset of User Preferences for Text-to-Image Generation.
+            http://arxiv.org/abs/2305.01569
+    """
+    def __init__(
+        self,
+        *,
+        dtype: torch.dtype,
+        model_id: str = "yuvalkirstain/PickScore_v1",
+        processor_name_or_path: str = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
+    ):
+        super().__init__()
+        self.dtype = dtype
+        self.processor = CLIPProcessor.from_pretrained(processor_name_or_path)
+        self.model = (
+            cast(CLIPModel, AutoModel.from_pretrained(model_id))
+            .eval()
+            .to(dtype=self.dtype)
+        )
+    @torch.no_grad()
+    def __call__(self, images: torch.Tensor, prompts: List[str]):
+        """
+        Scores the given images based on their relevance to the given prompts.
+        Args:
+            images (torch.Tensor): The images to score.
+            prompts (List[str]): The prompts to score the images against.
+        Returns:
+            scores (torch.Tensor): The scores of the images.
+        """
+        device = next(self.parameters()).device
+        inputs = self.processor(images=images, return_tensors="pt")
+        inputs = {k: v.to(self.dtype).to(device) for k, v in inputs.items()}
+        text_inputs = self.processor(
+            text=prompts,
+            padding=True,
+            truncation=True,
+            max_length=77,
+            return_tensors="pt",
+        ).to(device)
+        image_embeds = self.model.get_image_features(**inputs)
+        image_embeds = image_embeds / torch.norm(image_embeds, dim=-1, keepdim=True)
+        text_embeds = self.model.get_text_features(**text_inputs)
+        text_embeds = text_embeds / torch.norm(text_embeds, dim=-1, keepdim=True)
+        logits_per_image = image_embeds @ text_embeds.T
+        scores = torch.diagonal(logits_per_image)
+        return scores
+def pickscore_scorer(
+    dtype: torch.dtype = torch.float32,
+    hub_model_id: str = "yuvalkirstain/PickScore_v1",
+):
+    """
+    Creates a scoring function that scores images based on their relevance to a set of prompts.
+    Args:
+        dtype (torch.dtype, optional): The data type to use for the computations. Defaults to torch.float32.
+        hub_model_id (str, optional): The id of the pretrained model to use. Defaults to "yuvalkirstain/PickScore_v1".
+    Returns:
+        _fn (function): The scoring function.
+    """
+    scorer = PickScoreScorer(
+        dtype=dtype,
+        model_id=hub_model_id,
+    )
+    if is_npu_available():
+        scorer = scorer.npu()
+    elif is_xpu_available():
+        scorer = scorer.xpu()
+    else:
+        scorer = scorer.cuda()
+    def _fn(images: torch.Tensor, prompts, metadata):
+        images = (images * 255).round().clamp(0, 255).to(torch.uint8)
+        scores: torch.Tensor = scorer(images, prompts)
+        return scores, {}
+    return _fn

fusion_bench/mixins/__init__.py ADDED Viewed

@@ -0,0 +1,28 @@
+# flake8: noqa F401
+import sys
+from typing_extensions import TYPE_CHECKING
+from fusion_bench.utils.lazy_imports import LazyImporter
+_import_structure = {
+    "lightning_fabric": ["LightningFabricMixin"],
+    "serialization": ["YAMLSerializationMixin", "BaseYAMLSerializableModel"],
+    "simple_profiler": ["SimpleProfilerMixin"],
+    "clip_classification": ["CLIPClassificationMixin"],
+    "fabric_training": ["FabricTrainingMixin"],
+}
+if TYPE_CHECKING:
+    from .clip_classification import CLIPClassificationMixin
+    from .fabric_training import FabricTrainingMixin
+    from .lightning_fabric import LightningFabricMixin
+    from .serialization import BaseYAMLSerializableModel, YAMLSerializationMixin
+    from .simple_profiler import SimpleProfilerMixin
+else:
+    sys.modules[__name__] = LazyImporter(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+    )