PyPI - dataeval - Versions diffs - 0.61.0__py3-none-any.whl - Mend

dataeval 0.61.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

dataeval/__init__.py +18 -0
dataeval/_internal/detectors/__init__.py +0 -0
dataeval/_internal/detectors/clusterer.py +469 -0
dataeval/_internal/detectors/drift/__init__.py +0 -0
dataeval/_internal/detectors/drift/base.py +265 -0
dataeval/_internal/detectors/drift/cvm.py +97 -0
dataeval/_internal/detectors/drift/ks.py +100 -0
dataeval/_internal/detectors/drift/mmd.py +166 -0
dataeval/_internal/detectors/drift/torch.py +310 -0
dataeval/_internal/detectors/drift/uncertainty.py +149 -0
dataeval/_internal/detectors/duplicates.py +49 -0
dataeval/_internal/detectors/linter.py +78 -0
dataeval/_internal/detectors/ood/__init__.py +0 -0
dataeval/_internal/detectors/ood/ae.py +77 -0
dataeval/_internal/detectors/ood/aegmm.py +69 -0
dataeval/_internal/detectors/ood/base.py +199 -0
dataeval/_internal/detectors/ood/llr.py +284 -0
dataeval/_internal/detectors/ood/vae.py +86 -0
dataeval/_internal/detectors/ood/vaegmm.py +79 -0
dataeval/_internal/flags.py +47 -0
dataeval/_internal/metrics/__init__.py +0 -0
dataeval/_internal/metrics/base.py +92 -0
dataeval/_internal/metrics/ber.py +124 -0
dataeval/_internal/metrics/coverage.py +80 -0
dataeval/_internal/metrics/divergence.py +94 -0
dataeval/_internal/metrics/hash.py +79 -0
dataeval/_internal/metrics/parity.py +180 -0
dataeval/_internal/metrics/stats.py +332 -0
dataeval/_internal/metrics/uap.py +45 -0
dataeval/_internal/metrics/utils.py +158 -0
dataeval/_internal/models/__init__.py +0 -0
dataeval/_internal/models/pytorch/__init__.py +0 -0
dataeval/_internal/models/pytorch/autoencoder.py +202 -0
dataeval/_internal/models/pytorch/blocks.py +46 -0
dataeval/_internal/models/pytorch/utils.py +67 -0
dataeval/_internal/models/tensorflow/__init__.py +0 -0
dataeval/_internal/models/tensorflow/autoencoder.py +317 -0
dataeval/_internal/models/tensorflow/gmm.py +115 -0
dataeval/_internal/models/tensorflow/losses.py +107 -0
dataeval/_internal/models/tensorflow/pixelcnn.py +1106 -0
dataeval/_internal/models/tensorflow/trainer.py +102 -0
dataeval/_internal/models/tensorflow/utils.py +254 -0
dataeval/_internal/workflows/sufficiency.py +555 -0
dataeval/detectors/__init__.py +29 -0
dataeval/flags/__init__.py +3 -0
dataeval/metrics/__init__.py +7 -0
dataeval/models/__init__.py +15 -0
dataeval/models/tensorflow/__init__.py +6 -0
dataeval/models/torch/__init__.py +8 -0
dataeval/py.typed +0 -0
dataeval/workflows/__init__.py +8 -0
dataeval-0.61.0.dist-info/LICENSE.txt +21 -0
dataeval-0.61.0.dist-info/METADATA +114 -0
dataeval-0.61.0.dist-info/RECORD +55 -0
dataeval-0.61.0.dist-info/WHEEL +4 -0

dataeval/_internal/metrics/utils.py ADDED Viewed

@@ -0,0 +1,158 @@
+from typing import Any, Literal, NamedTuple, Tuple, Union
+import numpy as np
+from scipy.signal import convolve2d
+from scipy.sparse import csr_matrix
+from scipy.sparse.csgraph import minimum_spanning_tree as mst
+from scipy.spatial.distance import pdist, squareform
+from sklearn.neighbors import NearestNeighbors
+EPSILON = 1e-5
+EDGE_KERNEL = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype=np.int8)
+BIT_DEPTH = (1, 8, 12, 16, 32)
+def minimum_spanning_tree(X: np.ndarray) -> Any:
+    """
+    Returns the minimum spanning tree from a NumPy image array.
+    Parameters
+    ----------
+    X: np.ndarray
+        Numpy image array
+    Returns
+    -------
+        Data representing the minimum spanning tree
+    """
+    # All features belong on second dimension
+    X = X.reshape((X.shape[0], -1))
+    # We add a small constant to the distance matrix to ensure scipy interprets
+    # the input graph as fully-connected.
+    dense_eudist = squareform(pdist(X)) + EPSILON
+    eudist_csr = csr_matrix(dense_eudist)
+    return mst(eudist_csr)
+def get_classes_counts(labels: np.ndarray) -> Tuple[int, int]:
+    """
+    Returns the classes and counts of from an array of labels
+    Parameters
+    ----------
+    label: np.ndarray
+        Numpy labels array
+    Returns
+    -------
+        Classes and counts
+    Raises
+    ------
+    ValueError
+        If the number of unique classes is less than 2
+    """
+    classes, counts = np.unique(labels, return_counts=True)
+    M = len(classes)
+    if M < 2:
+        raise ValueError("Label vector contains less than 2 classes!")
+    N = np.sum(counts).astype(int)
+    return M, N
+def compute_neighbors(
+    A: np.ndarray,
+    B: np.ndarray,
+    k: int = 1,
+    algorithm: Literal["auto", "ball_tree", "kd_tree"] = "auto",
+) -> np.ndarray:
+    """
+    For each sample in A, compute the nearest neighbor in B
+    Parameters
+    ----------
+    A, B : np.ndarray
+        The n_samples and n_features respectively
+    k : int
+        The number of neighbors to find
+    algorithm : Literal
+        Tree method for nearest neighbor (auto, ball_tree or kd_tree)
+    Note
+    ----
+        Do not use kd_tree if n_features > 20
+    Returns
+    -------
+    List:
+        Closest points to each point in A and B
+    See Also
+    --------
+    :func:`sklearn.neighbors.NearestNeighbors`
+    """
+    nbrs = NearestNeighbors(n_neighbors=k + 1, algorithm=algorithm).fit(B)
+    nns = nbrs.kneighbors(A)[1]
+    nns = nns[:, 1:].squeeze()
+    return nns
+class BitDepth(NamedTuple):
+    depth: int
+    pmin: Union[float, int]
+    pmax: Union[float, int]
+def get_bitdepth(image: np.ndarray) -> BitDepth:
+    """
+    Approximates the bit depth of the image using the
+    min and max pixel values.
+    """
+    pmin, pmax = np.min(image), np.max(image)
+    if pmin < 0:
+        return BitDepth(0, pmin, pmax)
+    else:
+        depth = ([x for x in BIT_DEPTH if 2**x > pmax] or [max(BIT_DEPTH)])[0]
+        return BitDepth(depth, 0, 2**depth - 1)
+def rescale(image: np.ndarray, depth: int = 1) -> np.ndarray:
+    """
+    Rescales the image using the bit depth provided.
+    """
+    bitdepth = get_bitdepth(image)
+    if bitdepth.depth == depth:
+        return image
+    else:
+        normalized = (image + bitdepth.pmin) / (bitdepth.pmax - bitdepth.pmin)
+        return normalized * (2**depth - 1)
+def normalize_image_shape(image: np.ndarray) -> np.ndarray:
+    """
+    Normalizes the image shape into (C,H,W).
+    """
+    ndim = image.ndim
+    if ndim == 2:
+        return np.expand_dims(image, axis=0)
+    elif ndim == 3:
+        return image
+    elif ndim > 3:
+        # Slice all but the last 3 dimensions
+        return image[(0,) * (ndim - 3)]
+    else:
+        raise ValueError("Images must have 2 or more dimensions.")
+def edge_filter(image: np.ndarray, offset: float = 0.5) -> np.ndarray:
+    """
+    Returns the image filtered using a 3x3 edge detection kernel:
+    [[ -1, -1, -1 ],
+     [ -1,  8, -1 ],
+     [ -1, -1, -1 ]]
+    """
+    edges = convolve2d(image, EDGE_KERNEL, mode="same", boundary="symm") + offset
+    np.clip(edges, 0, 255, edges)
+    return edges

dataeval/_internal/models/__init__.py ADDED Viewed

File without changes

dataeval/_internal/models/pytorch/__init__.py ADDED Viewed

File without changes

dataeval/_internal/models/pytorch/autoencoder.py ADDED Viewed

@@ -0,0 +1,202 @@
+from typing import Any, List, Union
+import torch
+import torch.nn as nn
+from torch.optim import Adam
+from torch.utils.data import DataLoader, Dataset
+torch.manual_seed(0)
+def get_images_from_batch(batch: Any) -> Any:
+    """Extracts images from a batch of collated data by DataLoader"""
+    return batch[0] if isinstance(batch, (list, tuple)) else batch
+class AETrainer:
+    def __init__(
+        self,
+        model: nn.Module,
+        device: Union[str, torch.device] = "auto",
+        batch_size: int = 8,
+    ):
+        """
+        model : nn.Module
+            Model to be trained
+        device : str | torch.device, default "cpu"
+            Hardware device for model, optimizer, and data to run on
+        batch_size : int, default 8
+            Number of images to group together in `torch.utils.data.DataLoader`
+        """
+        if device == "auto":
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = device
+        self.model = model.to(device)
+        self.batch_size = batch_size
+    def train(self, dataset: Dataset, epochs: int = 25) -> List[float]:
+        """
+        Basic training function for Autoencoder models for reconstruction tasks
+        Uses `torch.optim.Adam` and `torch.nn.MSELoss` as default hyperparameters
+        Parameters
+        ----------
+        dataset : Dataset
+            Torch Dataset containing images in the first return position
+        epochs : int, default 25
+            Number of full training loops
+        Note
+        ----
+        To replace this function with a custom function, do
+            AETrainer.train = custom_function
+        """
+        # Setup training
+        self.model.train()
+        dataloader = DataLoader(dataset, batch_size=self.batch_size)
+        opt = Adam(self.model.parameters(), lr=0.001)
+        criterion = nn.MSELoss().to(self.device)
+        # Record loss
+        loss_history: List[float] = []
+        for _ in range(epochs):
+            epoch_loss: float = 0
+            for batch in dataloader:
+                imgs = get_images_from_batch(batch)
+                imgs = imgs.to(self.device)
+                # Zero your gradients for every batch!
+                opt.zero_grad()
+                # Make predictions for this batch
+                pred = self.model(imgs)
+                # Compute the loss and its gradients
+                loss = criterion(pred, imgs)
+                loss.backward()
+                # Adjust learning weights
+                opt.step()
+                # Gather data and report
+                epoch_loss += loss.item()
+            # Will take the average from all batches
+            epoch_loss /= len(dataloader)
+            loss_history.append(epoch_loss)
+        return loss_history
+    @torch.no_grad
+    def eval(self, dataset: Dataset) -> float:
+        """
+        Basic evaluation function for Autoencoder models for reconstruction tasks
+        Uses `torch.optim.Adam` and `torch.nn.MSELoss` as default hyperparameters
+        Parameters
+        ----------
+        dataset : Dataset
+            Torch Dataset containing images in the first return position
+        Returns
+        -------
+        float
+            Total reconstruction loss over all data
+        Note
+        ----
+        To replace this function with a custom function, do
+            AETrainer.eval = custom_function
+        """
+        self.model.eval()
+        dataloader = DataLoader(dataset, batch_size=self.batch_size)
+        criterion = nn.MSELoss().to(self.device)
+        total_loss: float = 0.0
+        for batch in dataloader:
+            imgs = get_images_from_batch(batch)
+            imgs = imgs.to(self.device)
+            pred = self.model(imgs)
+            loss = criterion(pred, imgs)
+            total_loss += loss.item()
+        return total_loss / len(dataloader)
+    @torch.no_grad
+    def encode(self, dataset: Dataset) -> torch.Tensor:
+        """
+        Encode data through model if it has an encode attribute,
+        otherwise passes data through model.forward
+        Parameters
+        ----------
+        dataset: Dataset
+            Dataset containing images to be encoded by the model
+        Returns
+        -------
+        torch.Tensor
+            Data encoded by the model
+        """
+        self.model.eval()
+        dl = DataLoader(dataset, batch_size=self.batch_size)
+        encodings = torch.Tensor([])
+        # Get encode function if defined
+        encode_func = self.model.encode if getattr(self.model, "encode", None) else self.model.forward
+        # Accumulate encodings from batches
+        for batch in dl:
+            imgs = get_images_from_batch(batch)
+            imgs = imgs.to(self.device)
+            embeddings = encode_func(imgs).to("cpu")
+            encodings = torch.vstack((encodings, embeddings)) if len(encodings) else embeddings
+        return encodings
+class AriaAutoencoder(nn.Module):
+    def __init__(self, channels=3):
+        super().__init__()
+        self.encoder = Encoder(channels)
+        self.decoder = Decoder(channels)
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.decoder(x)
+        return x
+    def encode(self, x):
+        return self.encoder(x)
+class Encoder(nn.Module):
+    def __init__(self, channels=3):
+        super().__init__()
+        self.encoder = nn.Sequential(
+            nn.Conv2d(channels, 256, 2, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2),
+            nn.Conv2d(256, 128, 2, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2),
+            nn.Conv2d(128, 64, 2, stride=1),
+        )
+    def forward(self, x):
+        return self.encoder(x)
+class Decoder(nn.Module):
+    def __init__(self, channels):
+        super().__init__()
+        self.decoder = nn.Sequential(
+            nn.ConvTranspose2d(64, 128, 2, stride=1),
+            nn.ReLU(),
+            nn.ConvTranspose2d(128, 256, 2, stride=2),
+            nn.ReLU(),
+            nn.ConvTranspose2d(256, channels, 2, stride=2),
+            nn.Sigmoid(),
+        )
+    def forward(self, x):
+        return self.decoder(x)

dataeval/_internal/models/pytorch/blocks.py ADDED Viewed

@@ -0,0 +1,46 @@
+import torch.nn as nn
+class Conv(nn.Module):
+    """
+    Wrapper for conv modules, so we don't have to specify everything every time
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        k=1,
+        s=1,
+        p=0,
+        activation="relu",
+        norm="instance",
+    ):
+        super().__init__()
+        conv = nn.Conv2d(in_channels, out_channels, kernel_size=k, stride=s, padding=p)
+        norm = self.get_norm_func(norm=norm, out_channels=out_channels)
+        act = self.get_activation_func(activation=activation)
+        self.module = nn.Sequential(conv, norm, act)
+    def get_norm_func(self, norm: str, out_channels) -> nn.Module:
+        if norm == "batch":
+            return nn.BatchNorm2d(out_channels)
+        if norm == "instance":
+            return nn.InstanceNorm2d(out_channels)
+        if norm == "layer":
+            return nn.LayerNorm(out_channels)
+        return nn.Identity()
+    def get_activation_func(self, activation: str) -> nn.Module:
+        if activation == "selu":
+            return nn.SELU()
+        if activation == "relu":
+            return nn.ReLU()
+        if activation == "leaky":
+            return nn.LeakyReLU()
+        if activation == "tanh":
+            return nn.Tanh()
+        return nn.Identity()
+    def forward(self, x):
+        return self.module(x)

dataeval/_internal/models/pytorch/utils.py ADDED Viewed

@@ -0,0 +1,67 @@
+from numpy import float32, ndarray
+from torch import Tensor, from_numpy
+def torch_to_numpy(tensor: Tensor) -> ndarray:
+    """
+    Converts a PyTorch tensor to a NumPy array
+    """
+    if isinstance(tensor, ndarray):  # Already array, return
+        return tensor
+    if not isinstance(tensor, Tensor):
+        raise TypeError("Tensor is not of type Tensor")
+    x: ndarray = tensor.detach().cpu().numpy()
+    return x
+def numpy_to_torch(array: ndarray) -> Tensor:
+    """
+    Converts a NumPy array to a PyTorch tensor
+    """
+    if isinstance(array, Tensor):  # Already tensor, return
+        return array
+    if not isinstance(array, ndarray):
+        raise TypeError("Array is not of type numpy.ndarray")
+    x: Tensor = from_numpy(array.astype(float32))
+    return x
+def permute_to_torch(array: ndarray) -> Tensor:
+    """
+    Converts and permutes a NumPy image array into a PyTorch image tensor.
+    Parameters
+    ----------
+    array: ndarray
+        Array containing image data in the format NHWC
+    Returns
+    -------
+    Tensor
+        Tensor containing image data in the format NCHW
+    """
+    x = numpy_to_torch(array)
+    x = x.permute(0, 3, 1, 2)  # NHWC -> NCHW
+    return x
+def permute_to_numpy(tensor: Tensor) -> ndarray:
+    """
+    Converts and permutes a PyTorch image tensor into a NumPy image array.
+    Does not permute if given ndarray
+    Parameters
+    ----------
+    tensor: Tensor
+        Tensor containing image data in the format NCHW
+    Returns
+    -------
+    ndarray
+        Array containing image data in the format NHWC
+    """
+    x = tensor.permute(0, 2, 3, 1)
+    x = torch_to_numpy(x)  # NCHW -> NHWC
+    return x

dataeval/_internal/models/tensorflow/__init__.py ADDED Viewed

File without changes