PyPI - dataeval - Versions diffs - 0.61.0__py3-none-any.whl - Mend

dataeval 0.61.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

dataeval/__init__.py +18 -0
dataeval/_internal/detectors/__init__.py +0 -0
dataeval/_internal/detectors/clusterer.py +469 -0
dataeval/_internal/detectors/drift/__init__.py +0 -0
dataeval/_internal/detectors/drift/base.py +265 -0
dataeval/_internal/detectors/drift/cvm.py +97 -0
dataeval/_internal/detectors/drift/ks.py +100 -0
dataeval/_internal/detectors/drift/mmd.py +166 -0
dataeval/_internal/detectors/drift/torch.py +310 -0
dataeval/_internal/detectors/drift/uncertainty.py +149 -0
dataeval/_internal/detectors/duplicates.py +49 -0
dataeval/_internal/detectors/linter.py +78 -0
dataeval/_internal/detectors/ood/__init__.py +0 -0
dataeval/_internal/detectors/ood/ae.py +77 -0
dataeval/_internal/detectors/ood/aegmm.py +69 -0
dataeval/_internal/detectors/ood/base.py +199 -0
dataeval/_internal/detectors/ood/llr.py +284 -0
dataeval/_internal/detectors/ood/vae.py +86 -0
dataeval/_internal/detectors/ood/vaegmm.py +79 -0
dataeval/_internal/flags.py +47 -0
dataeval/_internal/metrics/__init__.py +0 -0
dataeval/_internal/metrics/base.py +92 -0
dataeval/_internal/metrics/ber.py +124 -0
dataeval/_internal/metrics/coverage.py +80 -0
dataeval/_internal/metrics/divergence.py +94 -0
dataeval/_internal/metrics/hash.py +79 -0
dataeval/_internal/metrics/parity.py +180 -0
dataeval/_internal/metrics/stats.py +332 -0
dataeval/_internal/metrics/uap.py +45 -0
dataeval/_internal/metrics/utils.py +158 -0
dataeval/_internal/models/__init__.py +0 -0
dataeval/_internal/models/pytorch/__init__.py +0 -0
dataeval/_internal/models/pytorch/autoencoder.py +202 -0
dataeval/_internal/models/pytorch/blocks.py +46 -0
dataeval/_internal/models/pytorch/utils.py +67 -0
dataeval/_internal/models/tensorflow/__init__.py +0 -0
dataeval/_internal/models/tensorflow/autoencoder.py +317 -0
dataeval/_internal/models/tensorflow/gmm.py +115 -0
dataeval/_internal/models/tensorflow/losses.py +107 -0
dataeval/_internal/models/tensorflow/pixelcnn.py +1106 -0
dataeval/_internal/models/tensorflow/trainer.py +102 -0
dataeval/_internal/models/tensorflow/utils.py +254 -0
dataeval/_internal/workflows/sufficiency.py +555 -0
dataeval/detectors/__init__.py +29 -0
dataeval/flags/__init__.py +3 -0
dataeval/metrics/__init__.py +7 -0
dataeval/models/__init__.py +15 -0
dataeval/models/tensorflow/__init__.py +6 -0
dataeval/models/torch/__init__.py +8 -0
dataeval/py.typed +0 -0
dataeval/workflows/__init__.py +8 -0
dataeval-0.61.0.dist-info/LICENSE.txt +21 -0
dataeval-0.61.0.dist-info/METADATA +114 -0
dataeval-0.61.0.dist-info/RECORD +55 -0
dataeval-0.61.0.dist-info/WHEEL +4 -0

dataeval/_internal/models/tensorflow/autoencoder.py ADDED Viewed

@@ -0,0 +1,317 @@
+"""
+Source code derived from Alibi-Detect 0.11.4
+https://github.com/SeldonIO/alibi-detect/tree/v0.11.4
+Original code Copyright (c) 2023 Seldon Technologies Ltd
+Licensed under Apache Software License (Apache 2.0)
+"""
+# pyright: reportIncompatibleMethodOverride=false
+from typing import Callable, Tuple, cast
+import keras
+import tensorflow as tf
+from keras.layers import (
+    Dense,
+    Flatten,
+    Layer,
+)
+def relative_euclidean_distance(x: tf.Tensor, y: tf.Tensor, eps: float = 1e-12, axis: int = -1) -> tf.Tensor:
+    """
+    Relative Euclidean distance.
+    Parameters
+    ----------
+    x
+        Tensor used in distance computation.
+    y
+        Tensor used in distance computation.
+    eps
+        Epsilon added to denominator for numerical stability.
+    axis
+        Axis used to compute distance.
+    Returns
+    -------
+    Tensor with relative Euclidean distance across specified axis.
+    """
+    denom = tf.concat(
+        [
+            tf.reshape(tf.norm(x, ord=2, axis=axis), (-1, 1)),  # type: ignore
+            tf.reshape(tf.norm(y, ord=2, axis=axis), (-1, 1)),  # type: ignore
+        ],
+        axis=1,
+    )
+    dist = tf.norm(tf.math.subtract(x, y), ord=2, axis=axis) / (tf.reduce_min(denom, axis=axis) + eps)  # type: ignore
+    return dist
+def eucl_cosim_features(x: tf.Tensor, y: tf.Tensor, max_eucl: float = 1e2) -> tf.Tensor:
+    """
+    Compute features extracted from the reconstructed instance using the
+    relative Euclidean distance and cosine similarity between 2 tensors.
+    Parameters
+    ----------
+    x
+        Tensor used in feature computation.
+    y
+        Tensor used in feature computation.
+    max_eucl
+        Maximum value to clip relative Euclidean distance by.
+    Returns
+    -------
+    Tensor concatenating the relative Euclidean distance and cosine similarity features.
+    """
+    if len(x.shape) > 2 or len(y.shape) > 2:
+        x = cast(tf.Tensor, Flatten()(x))
+        y = cast(tf.Tensor, Flatten()(y))
+    rec_cos = tf.reshape(keras.losses.cosine_similarity(y, x, -1), (-1, 1))
+    rec_euc = tf.reshape(relative_euclidean_distance(y, x, -1), (-1, 1))
+    # rec_euc could become very large so should be clipped
+    rec_euc = tf.clip_by_value(rec_euc, 0, max_eucl)
+    return cast(tf.Tensor, tf.concat([rec_cos, rec_euc], -1))
+class Sampling(Layer):
+    """Reparametrization trick. Uses (z_mean, z_log_var) to sample the latent vector z."""
+    def call(self, inputs: Tuple[tf.Tensor, tf.Tensor]) -> tf.Tensor:
+        """
+        Sample z.
+        Parameters
+        ----------
+        inputs
+            Tuple with mean and log variance.
+        Returns
+        -------
+        Sampled vector z.
+        """
+        z_mean, z_log_var = inputs
+        batch, dim = tuple(tf.shape(z_mean).numpy().ravel()[:2])  # type: ignore
+        epsilon = cast(tf.Tensor, keras.backend.random_normal(shape=(batch, dim)))
+        return z_mean + tf.exp(tf.math.multiply(0.5, z_log_var)) * epsilon
+class EncoderAE(Layer):
+    def __init__(self, encoder_net: keras.Model) -> None:
+        """
+        Encoder of AE.
+        Parameters
+        ----------
+        encoder_net
+            Layers for the encoder wrapped in a keras.Sequential class.
+        name
+            Name of encoder.
+        """
+        super().__init__(name="encoder_ae")
+        self.encoder_net = encoder_net
+    def call(self, x: tf.Tensor) -> tf.Tensor:
+        return cast(tf.Tensor, self.encoder_net(x))
+class EncoderVAE(Layer):
+    def __init__(self, encoder_net: keras.Model, latent_dim: int) -> None:
+        """
+        Encoder of VAE.
+        Parameters
+        ----------
+        encoder_net
+            Layers for the encoder wrapped in a keras.Sequential class.
+        latent_dim
+            Dimensionality of the latent space.
+        name
+            Name of encoder.
+        """
+        super().__init__(name="encoder_vae")
+        self.encoder_net = encoder_net
+        self.fc_mean = Dense(latent_dim, activation=None)
+        self.fc_log_var = Dense(latent_dim, activation=None)
+        self.sampling = Sampling()
+    def call(self, x: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+        x = cast(tf.Tensor, self.encoder_net(x))
+        if len(x.shape) > 2:
+            x = cast(tf.Tensor, Flatten()(x))
+        z_mean = cast(tf.Tensor, self.fc_mean(x))
+        z_log_var = cast(tf.Tensor, self.fc_log_var(x))
+        z = cast(tf.Tensor, self.sampling((z_mean, z_log_var)))
+        return z_mean, z_log_var, z
+class Decoder(Layer):
+    def __init__(self, decoder_net: keras.Model) -> None:
+        """
+        Decoder of AE and VAE.
+        Parameters
+        ----------
+        decoder_net
+            Layers for the decoder wrapped in a keras.Sequential class.
+        name
+            Name of decoder.
+        """
+        super().__init__(name="decoder")
+        self.decoder_net = decoder_net
+    def call(self, x: tf.Tensor) -> tf.Tensor:
+        return cast(tf.Tensor, self.decoder_net(x))
+class AE(keras.Model):
+    """
+    Combine encoder and decoder in AE.
+    Parameters
+    ----------
+    encoder_net
+        Layers for the encoder wrapped in a keras.Sequential class.
+    decoder_net
+        Layers for the decoder wrapped in a keras.Sequential class.
+    """
+    def __init__(self, encoder_net: keras.Model, decoder_net: keras.Model) -> None:
+        super().__init__(name="ae")
+        self.encoder = EncoderAE(encoder_net)
+        self.decoder = Decoder(decoder_net)
+    def call(self, x: tf.Tensor) -> tf.Tensor:
+        z = cast(tf.Tensor, self.encoder(x))
+        x_recon = cast(tf.Tensor, self.decoder(z))
+        return x_recon
+class VAE(keras.Model):
+    """
+    Combine encoder and decoder in VAE.
+    Parameters
+    ----------
+    encoder_net
+        Layers for the encoder wrapped in a keras.Sequential class.
+    decoder_net
+        Layers for the decoder wrapped in a keras.Sequential class.
+    latent_dim
+        Dimensionality of the latent space.
+    beta
+        Beta parameter for KL-divergence loss term.
+    """
+    def __init__(self, encoder_net: keras.Model, decoder_net: keras.Model, latent_dim: int, beta: float = 1.0) -> None:
+        super().__init__(name="vae_model")
+        self.encoder = EncoderVAE(encoder_net, latent_dim)
+        self.decoder = Decoder(decoder_net)
+        self.beta = beta
+        self.latent_dim = latent_dim
+    def call(self, x: tf.Tensor) -> tf.Tensor:
+        z_mean, z_log_var, z = cast(Tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(x))
+        x_recon = self.decoder(z)
+        # add KL divergence loss term
+        kl_loss = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)
+        self.add_loss(self.beta * kl_loss)
+        return cast(tf.Tensor, x_recon)
+class AEGMM(keras.Model):
+    """
+    Deep Autoencoding Gaussian Mixture Model.
+    Parameters
+    ----------
+    encoder_net
+        Layers for the encoder wrapped in a keras.Sequential class.
+    decoder_net
+        Layers for the decoder wrapped in a keras.Sequential class.
+    gmm_density_net
+        Layers for the GMM network wrapped in a keras.Sequential class.
+    n_gmm
+        Number of components in GMM.
+    recon_features
+        Function to extract features from the reconstructed instance by the decoder.
+    """
+    def __init__(
+        self,
+        encoder_net: keras.Model,
+        decoder_net: keras.Model,
+        gmm_density_net: keras.Model,
+        n_gmm: int,
+        recon_features: Callable = eucl_cosim_features,
+    ) -> None:
+        super().__init__("aegmm")
+        self.encoder = encoder_net
+        self.decoder = decoder_net
+        self.gmm_density = gmm_density_net
+        self.n_gmm = n_gmm
+        self.recon_features = recon_features
+    def call(self, x: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+        enc = self.encoder(x)
+        x_recon = cast(tf.Tensor, self.decoder(enc))
+        recon_features = self.recon_features(x, x_recon)
+        z = cast(tf.Tensor, tf.concat([enc, recon_features], -1))
+        gamma = cast(tf.Tensor, self.gmm_density(z))
+        return x_recon, z, gamma
+class VAEGMM(keras.Model):
+    """
+    Variational Autoencoding Gaussian Mixture Model.
+    Parameters
+    ----------
+    encoder_net
+        Layers for the encoder wrapped in a keras.Sequential class.
+    decoder_net
+        Layers for the decoder wrapped in a keras.Sequential class.
+    gmm_density_net
+        Layers for the GMM network wrapped in a keras.Sequential class.
+    n_gmm
+        Number of components in GMM.
+    latent_dim
+        Dimensionality of the latent space.
+    recon_features
+        Function to extract features from the reconstructed instance by the decoder.
+    beta
+        Beta parameter for KL-divergence loss term.
+    """
+    def __init__(
+        self,
+        encoder_net: keras.Model,
+        decoder_net: keras.Model,
+        gmm_density_net: keras.Model,
+        n_gmm: int,
+        latent_dim: int,
+        recon_features: Callable = eucl_cosim_features,
+        beta: float = 1.0,
+    ) -> None:
+        super().__init__(name="vaegmm")
+        self.encoder = EncoderVAE(encoder_net, latent_dim)
+        self.decoder = decoder_net
+        self.gmm_density = gmm_density_net
+        self.n_gmm = n_gmm
+        self.latent_dim = latent_dim
+        self.recon_features = recon_features
+        self.beta = beta
+    def call(self, x: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+        enc_mean, enc_log_var, enc = cast(Tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(x))
+        x_recon = cast(tf.Tensor, self.decoder(enc))
+        recon_features = self.recon_features(x, x_recon)
+        z = cast(tf.Tensor, tf.concat([enc, recon_features], -1))
+        gamma = cast(tf.Tensor, self.gmm_density(z))
+        # add KL divergence loss term
+        kl_loss = -0.5 * tf.reduce_mean(enc_log_var - tf.square(enc_mean) - tf.exp(enc_log_var) + 1)
+        self.add_loss(self.beta * kl_loss)
+        return x_recon, z, gamma

dataeval/_internal/models/tensorflow/gmm.py ADDED Viewed

@@ -0,0 +1,115 @@
+"""
+Source code derived from Alibi-Detect 0.11.4
+https://github.com/SeldonIO/alibi-detect/tree/v0.11.4
+Original code Copyright (c) 2023 Seldon Technologies Ltd
+Licensed under Apache Software License (Apache 2.0)
+"""
+from typing import NamedTuple, Tuple
+import numpy as np
+import tensorflow as tf
+class GaussianMixtureModelParams(NamedTuple):
+    """
+    phi : tf.Tensor
+        Mixture component distribution weights.
+    mu : tf.Tensor
+        Mixture means.
+    cov : tf.Tensor
+        Mixture covariance.
+    L : tf.Tensor
+        Cholesky decomposition of `cov`.
+    log_det_cov : tf.Tensor
+        Log of the determinant of `cov`.
+    """
+    phi: tf.Tensor
+    mu: tf.Tensor
+    cov: tf.Tensor
+    L: tf.Tensor
+    log_det_cov: tf.Tensor
+def gmm_params(z: tf.Tensor, gamma: tf.Tensor) -> GaussianMixtureModelParams:
+    """
+    Compute parameters of Gaussian Mixture Model.
+    Parameters
+    ----------
+    z : tf.Tensor
+        Observations.
+    gamma : tf.Tensor
+        Mixture probabilities to derive mixture distribution weights from.
+    Returns
+    -------
+    GaussianMixtureModelParams(phi, mu, cov, L, log_det_cov)
+        The parameters used to calculate energy.
+    """
+    # compute gmm parameters phi, mu and cov
+    N = gamma.shape[0]  # nb of samples in batch
+    sum_gamma = tf.reduce_sum(gamma, 0)  # K
+    phi = sum_gamma / N  # K
+    mu = tf.reduce_sum(tf.expand_dims(gamma, -1) * tf.expand_dims(z, 1), 0) / tf.expand_dims(
+        sum_gamma, -1
+    )  # K x D (D = latent_dim)
+    z_mu = tf.expand_dims(z, 1) - tf.expand_dims(mu, 0)  # N x K x D
+    z_mu_outer = tf.expand_dims(z_mu, -1) * tf.expand_dims(z_mu, -2)  # N x K x D x D
+    cov = tf.reduce_sum(tf.expand_dims(tf.expand_dims(gamma, -1), -1) * z_mu_outer, 0) / tf.expand_dims(
+        tf.expand_dims(sum_gamma, -1), -1
+    )  # K x D x D
+    # cholesky decomposition of covariance and determinant derivation
+    D = tf.shape(cov)[1]  # type: ignore
+    eps = 1e-6
+    L = tf.linalg.cholesky(cov + tf.eye(D) * eps)  # K x D x D
+    log_det_cov = 2.0 * tf.reduce_sum(tf.math.log(tf.linalg.diag_part(L)), 1)  # K
+    return GaussianMixtureModelParams(phi, mu, cov, L, log_det_cov)
+def gmm_energy(
+    z: tf.Tensor,
+    params: GaussianMixtureModelParams,
+    return_mean: bool = True,
+) -> Tuple[tf.Tensor, tf.Tensor]:
+    """
+    Compute sample energy from Gaussian Mixture Model.
+    Parameters
+    ----------
+    params : GaussianMixtureModelParams
+        The gaussian mixture model parameters.
+    return_mean : bool, default True
+        Take mean across all sample energies in a batch.
+    Returns
+    -------
+    sample_energy
+        The sample energy of the GMM.
+    cov_diag
+        The inverse sum of the diagonal components of the covariance matrix.
+    """
+    D = tf.shape(params.cov)[1]  # type: ignore
+    z_mu = tf.expand_dims(z, 1) - tf.expand_dims(params.mu, 0)  # N x K x D
+    z_mu_T = tf.transpose(z_mu, perm=[1, 2, 0])  # K x D x N
+    v = tf.linalg.triangular_solve(params.L, z_mu_T, lower=True)  # K x D x D
+    # rewrite sample energy in logsumexp format for numerical stability
+    logits = tf.math.log(tf.expand_dims(params.phi, -1)) - 0.5 * (
+        tf.reduce_sum(tf.square(v), 1)
+        + tf.cast(D, tf.float32) * tf.math.log(2.0 * np.pi)  # type: ignore py38
+        + tf.expand_dims(params.log_det_cov, -1)
+    )  # K x N
+    sample_energy = -tf.reduce_logsumexp(logits, axis=0)  # N
+    if return_mean:
+        sample_energy = tf.reduce_mean(sample_energy)
+    # inverse sum of variances
+    cov_diag = tf.reduce_sum(tf.divide(1, tf.linalg.diag_part(params.cov)))
+    return sample_energy, cov_diag

dataeval/_internal/models/tensorflow/losses.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""
+Source code derived from Alibi-Detect 0.11.4
+https://github.com/SeldonIO/alibi-detect/tree/v0.11.4
+Original code Copyright (c) 2023 Seldon Technologies Ltd
+Licensed under Apache Software License (Apache 2.0)
+"""
+from typing import Literal, Optional, Union, cast
+import numpy as np
+import tensorflow as tf
+from keras.layers import Flatten
+from tensorflow_probability.python.distributions.mvn_diag import MultivariateNormalDiag
+from tensorflow_probability.python.distributions.mvn_tril import MultivariateNormalTriL
+from tensorflow_probability.python.stats import covariance
+from dataeval._internal.models.tensorflow.gmm import gmm_energy, gmm_params
+class Elbo:
+    """
+    Compute ELBO loss. The covariance matrix can be specified by passing the full covariance matrix, the matrix
+    diagonal, or a scale identity multiplier. Only one of these should be specified. If none are specified, the
+    identity matrix is used.
+    Parameters
+    ----------
+    cov_type
+        Full covariance matrix, diagonal variance matrix, or scale identity multiplier.
+    x
+        Dataset used to calculate the covariance matrix.  Required for full and diagonal covariance matrix types.
+    """
+    def __init__(
+        self,
+        cov_type: Union[Literal["cov_full", "cov_diag"], float] = 1.0,
+        x: Optional[Union[tf.Tensor, np.ndarray]] = None,
+    ):
+        if isinstance(cov_type, float):
+            self.cov = ("sim", cov_type)
+        elif cov_type in ["cov_full", "cov_diag"]:
+            x_np: np.ndarray = x.numpy() if tf.is_tensor(x) else x  # type: ignore
+            cov = covariance(x_np.reshape(x_np.shape[0], -1))  # type: ignore py38
+            if cov_type == "cov_diag":  # infer standard deviation from covariance matrix
+                cov = tf.math.sqrt(tf.linalg.diag_part(cov))
+            self.cov = (cov_type, cov)
+        else:
+            raise ValueError("Only cov_full, cov_diag or sim value should be specified.")
+    def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
+        y_pred_flat = cast(tf.Tensor, Flatten()(y_pred))
+        if self.cov[0] == "cov_full":
+            y_mn = MultivariateNormalTriL(y_pred_flat, scale_tril=tf.linalg.cholesky(self.cov[1]))
+        else:  # cov_diag and sim
+            cov_diag = self.cov[1] if self.cov[0] == "cov_diag" else self.cov[1] * tf.ones(y_pred_flat.shape[-1])
+            y_mn = MultivariateNormalDiag(y_pred_flat, scale_diag=cov_diag)
+        loss = -tf.reduce_mean(y_mn.log_prob(Flatten()(y_true)))
+        return loss
+class LossGMM:
+    """
+    Loss function used for AE and VAE with GMM.
+    Parameters
+    ----------
+    w_recon
+        Weight on elbo loss term.
+    w_energy
+        Weight on sample energy loss term.
+    w_cov_diag
+        Weight on covariance regularizing loss term.
+    elbo
+        ELBO loss function used to calculate w_recon.
+    """
+    def __init__(
+        self,
+        w_recon: float = 1e-7,
+        w_energy: float = 0.1,
+        w_cov_diag: float = 0.005,
+        elbo: Optional[Elbo] = None,
+    ):
+        self.w_recon = w_recon
+        self.w_energy = w_energy
+        self.w_cov_diag = w_cov_diag
+        self.elbo = elbo
+    def __call__(
+        self,
+        x_true: tf.Tensor,
+        x_pred: tf.Tensor,
+        z: tf.Tensor,
+        gamma: tf.Tensor,
+    ) -> tf.Tensor:
+        w_recon = (
+            tf.reduce_mean(tf.subtract(x_true, x_pred) ** 2)
+            if self.elbo is None
+            else tf.multiply(self.w_recon, self.elbo(x_true, x_pred))
+        )
+        sample_energy, cov_diag = gmm_energy(z, gmm_params(z, gamma))
+        w_energy = tf.multiply(self.w_energy, sample_energy)
+        w_cov_diag = tf.multiply(self.w_cov_diag, cov_diag)
+        return w_recon + w_energy + w_cov_diag