PyPI - dataeval - Versions diffs - 0.65.0__py3-none-any.whl → 0.66.0__py3-none-any.whl - Mend

dataeval 0.65.0py3-none-any.whl → 0.66.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

dataeval/__init__.py +13 -9
dataeval/_internal/detectors/clusterer.py +24 -22
dataeval/_internal/detectors/drift/base.py +206 -26
dataeval/_internal/detectors/drift/cvm.py +25 -23
dataeval/_internal/detectors/drift/ks.py +28 -25
dataeval/_internal/detectors/drift/mmd.py +30 -29
dataeval/_internal/detectors/drift/torch.py +66 -58
dataeval/_internal/detectors/drift/uncertainty.py +28 -28
dataeval/_internal/detectors/duplicates.py +28 -18
dataeval/_internal/detectors/ood/ae.py +15 -29
dataeval/_internal/detectors/ood/aegmm.py +33 -27
dataeval/_internal/detectors/ood/base.py +61 -43
dataeval/_internal/detectors/ood/llr.py +27 -24
dataeval/_internal/detectors/ood/vae.py +32 -31
dataeval/_internal/detectors/ood/vaegmm.py +34 -28
dataeval/_internal/detectors/{linter.py → outliers.py} +33 -27
dataeval/_internal/flags.py +5 -3
dataeval/_internal/interop.py +4 -2
dataeval/_internal/metrics/balance.py +33 -4
dataeval/_internal/metrics/ber.py +6 -4
dataeval/_internal/metrics/diversity.py +45 -12
dataeval/_internal/metrics/parity.py +114 -26
dataeval/_internal/metrics/stats.py +154 -16
dataeval/_internal/metrics/uap.py +28 -2
dataeval/_internal/metrics/utils.py +20 -18
dataeval/_internal/models/pytorch/autoencoder.py +127 -22
dataeval/_internal/models/tensorflow/autoencoder.py +33 -30
dataeval/_internal/models/tensorflow/gmm.py +4 -2
dataeval/_internal/models/tensorflow/losses.py +15 -11
dataeval/_internal/models/tensorflow/pixelcnn.py +19 -18
dataeval/_internal/models/tensorflow/trainer.py +8 -6
dataeval/_internal/models/tensorflow/utils.py +21 -19
dataeval/_internal/output.py +13 -10
dataeval/_internal/utils.py +5 -3
dataeval/_internal/workflows/sufficiency.py +42 -30
dataeval/detectors/__init__.py +6 -25
dataeval/detectors/drift/__init__.py +16 -0
dataeval/detectors/drift/kernels/__init__.py +6 -0
dataeval/detectors/drift/updates/__init__.py +3 -0
dataeval/detectors/linters/__init__.py +5 -0
dataeval/detectors/ood/__init__.py +11 -0
dataeval/metrics/__init__.py +2 -26
dataeval/metrics/bias/__init__.py +14 -0
dataeval/metrics/estimators/__init__.py +9 -0
dataeval/metrics/stats/__init__.py +6 -0
dataeval/tensorflow/__init__.py +3 -0
dataeval/tensorflow/loss/__init__.py +3 -0
dataeval/tensorflow/models/__init__.py +5 -0
dataeval/tensorflow/recon/__init__.py +3 -0
dataeval/torch/__init__.py +3 -0
dataeval/{models/torch → torch/models}/__init__.py +1 -2
dataeval/torch/trainer/__init__.py +3 -0
dataeval/utils/__init__.py +3 -6
dataeval/workflows/__init__.py +2 -4
{dataeval-0.65.0.dist-info → dataeval-0.66.0.dist-info}/METADATA +1 -1
dataeval-0.66.0.dist-info/RECORD +72 -0
dataeval/models/__init__.py +0 -15
dataeval/models/tensorflow/__init__.py +0 -6
dataeval-0.65.0.dist-info/RECORD +0 -60
{dataeval-0.65.0.dist-info → dataeval-0.66.0.dist-info}/LICENSE.txt +0 -0
{dataeval-0.65.0.dist-info → dataeval-0.66.0.dist-info}/WHEEL +0 -0

dataeval/_internal/detectors/drift/ks.py CHANGED Viewed

@@ -6,7 +6,9 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
-from typing import Callable, Literal, Optional, Tuple
+from __future__ import annotations
+from typing import Callable, Literal
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
@@ -19,38 +21,38 @@ from .base import BaseDriftUnivariate, UpdateStrategy, preprocess_x
 class DriftKS(BaseDriftUnivariate):
     """
-    Kolmogorov-Smirnov (K-S) data drift detector with Bonferroni or False Discovery
-    Rate (FDR) correction for multivariate data.
+    Drift detector employing the Kolmogorov-Smirnov (KS) distribution test.
+    The KS test detects changes in the maximum distance between two data
+    distributions with Bonferroni or False Discovery Rate (FDR) correction
+    for multivariate data.
     Parameters
     ----------
-    x_ref : NDArray
+    x_ref : ArrayLike
         Data used as reference distribution.
-    p_val : float, default 0.05
+    p_val : float | None, default 0.05
         p-value used for significance of the statistical test for each feature.
         If the FDR correction method is used, this corresponds to the acceptable
         q-value.
     x_ref_preprocessed : bool, default False
-        Whether the given reference data `x_ref` has been preprocessed yet. If
-        `x_ref_preprocessed=True`, only the test data `x` will be preprocessed at
-        prediction time. If `x_ref_preprocessed=False`, the reference data will also
-        be preprocessed.
-    update_x_ref : Optional[UpdateStrategy], default None
+        Whether the given reference data ``x_ref`` has been preprocessed yet.
+        If ``True``, only the test data ``x`` will be preprocessed at prediction time.
+        If ``False``, the reference data will also be preprocessed.
+    update_x_ref : UpdateStrategy | None, default None
         Reference data can optionally be updated using an UpdateStrategy class. Update
-        using the last n instances seen by the detector with
-        :py:class:`dataeval.detectors.LastSeenUpdateStrategy`
-        or via reservoir sampling with
-        :py:class:`dataeval.detectors.ReservoirSamplingUpdateStrategy`.
-    preprocess_fn : Optional[Callable[[NDArray], NDArray]], default None
+        using the last n instances seen by the detector with LastSeenUpdateStrategy
+        or via reservoir sampling with ReservoirSamplingUpdateStrategy.
+    preprocess_fn : Callable | None, default None
         Function to preprocess the data before computing the data drift metrics.
         Typically a dimensionality reduction technique.
-    correction : Literal["bonferroni", "fdr"], default "bonferroni"
+    correction : "bonferroni" | "fdr", default "bonferroni"
         Correction type for multivariate data. Either 'bonferroni' or 'fdr' (False
         Discovery Rate).
-    alternative : Literal["two-sided", "less", "greater"], default "two-sided"
+    alternative : "two-sided" | "less" | "greater", default "two-sided"
         Defines the alternative hypothesis. Options are 'two-sided', 'less' or
         'greater'.
-    n_features
+    n_features : int | None, default None
         Number of features used in the statistical test. No need to pass it if no
         preprocessing takes place. In case of a preprocessing step, this can also
         be inferred automatically but could be more expensive to compute.
@@ -61,11 +63,11 @@ class DriftKS(BaseDriftUnivariate):
         x_ref: ArrayLike,
         p_val: float = 0.05,
         x_ref_preprocessed: bool = False,
-        update_x_ref: Optional[UpdateStrategy] = None,
-        preprocess_fn: Optional[Callable[[ArrayLike], ArrayLike]] = None,
+        update_x_ref: UpdateStrategy | None = None,
+        preprocess_fn: Callable[[ArrayLike], ArrayLike] | None = None,
         correction: Literal["bonferroni", "fdr"] = "bonferroni",
         alternative: Literal["two-sided", "less", "greater"] = "two-sided",
-        n_features: Optional[int] = None,
+        n_features: int | None = None,
     ) -> None:
         super().__init__(
             x_ref=x_ref,
@@ -81,18 +83,19 @@ class DriftKS(BaseDriftUnivariate):
         self.alternative = alternative
     @preprocess_x
-    def score(self, x: ArrayLike) -> Tuple[NDArray[np.float32], NDArray[np.float32]]:
+    def score(self, x: ArrayLike) -> tuple[NDArray[np.float32], NDArray[np.float32]]:
         """
-        Compute K-S scores and statistics per feature.
+        Compute KS scores and statistics per feature.
         Parameters
         ----------
-        x
+        x : ArrayLike
             Batch of instances.
         Returns
         -------
-        Feature level p-values and K-S statistics.
+        tuple[NDArray, NDArray]
+            Feature level p-values and KS statistic
         """
         x = to_numpy(x)
         x = x.reshape(x.shape[0], -1)

dataeval/_internal/detectors/drift/mmd.py CHANGED Viewed

@@ -6,8 +6,10 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 from dataclasses import dataclass
-from typing import Callable, Optional, Tuple
+from typing import Callable
 import torch
 from numpy.typing import ArrayLike
@@ -15,13 +17,15 @@ from numpy.typing import ArrayLike
 from dataeval._internal.interop import to_numpy
 from dataeval._internal.output import set_metadata
-from .base import BaseDrift, DriftOutput, UpdateStrategy, preprocess_x, update_x_ref
+from .base import BaseDrift, DriftBaseOutput, UpdateStrategy, preprocess_x, update_x_ref
 from .torch import GaussianRBF, get_device, mmd2_from_kernel_matrix
 @dataclass(frozen=True)
-class DriftMMDOutput(DriftOutput):
+class DriftMMDOutput(DriftBaseOutput):
     """
+    Output class for DriftMMD
     Attributes
     ----------
     is_drift : bool
@@ -51,28 +55,24 @@ class DriftMMD(BaseDrift):
     ----------
     x_ref : ArrayLike
         Data used as reference distribution.
-    p_val : float, default 0.05
-        p-value used for the significance of the permutation test.
+    p_val : float | None, default 0.05
+        p-value used for significance of the statistical test for each feature.
+        If the FDR correction method is used, this corresponds to the acceptable
+        q-value.
     x_ref_preprocessed : bool, default False
-        Whether the given reference data `x_ref` has been preprocessed yet. If
-        `x_ref_preprocessed=True`, only the test data `x` will be preprocessed
-        at prediction time. If `x_ref_preprocessed=False`, the reference data
-        will also be preprocessed.
-    preprocess_at_init : bool, default True
-        Whether to preprocess the reference data when the detector is instantiated.
-        Otherwise, the reference data will be preprocessed at prediction time. Only
-        applies if `x_ref_preprocessed=False`.
-    update_x_ref : Optional[UpdateStrategy], default None
+        Whether the given reference data ``x_ref`` has been preprocessed yet.
+        If ``True``, only the test data ``x`` will be preprocessed at prediction time.
+        If ``False``, the reference data will also be preprocessed.
+    update_x_ref : UpdateStrategy | None, default None
         Reference data can optionally be updated using an UpdateStrategy class. Update
-        using the last n instances seen by the detector with
-        :py:class:`dataeval.detectors.LastSeenUpdateStrategy`
-        or via reservoir sampling with
-        :py:class:`dataeval.detectors.ReservoirSamplingUpdateStrategy`.
-    preprocess_fn : Optional[Callable], default None
+        using the last n instances seen by the detector with LastSeenUpdateStrategy
+        or via reservoir sampling with ReservoirSamplingUpdateStrategy.
+    preprocess_fn : Callable | None, default None
         Function to preprocess the data before computing the data drift metrics.
-    kernel : Callable, default :py:class:`dataeval.detectors.GaussianRBF`
+        Typically a dimensionality reduction technique.
+    kernel : Callable, default GaussianRBF
         Kernel used for the MMD computation, defaults to Gaussian RBF kernel.
-    sigma : Optional[ArrayLike], default None
+    sigma : ArrayLike | None, default None
         Optionally set the GaussianRBF kernel bandwidth. Can also pass multiple
         bandwidth values as an array. The kernel evaluation is then averaged over
         those bandwidths.
@@ -80,7 +80,7 @@ class DriftMMD(BaseDrift):
         Whether to already configure the kernel bandwidth from the reference data.
     n_permutations : int, default 100
         Number of permutations used in the permutation test.
-    device : Optional[str], default None
+    device : str | None, default None
         Device type used. The default None uses the GPU and falls back on CPU.
         Can be specified by passing either 'cuda', 'gpu' or 'cpu'.
     """
@@ -90,13 +90,13 @@ class DriftMMD(BaseDrift):
         x_ref: ArrayLike,
         p_val: float = 0.05,
         x_ref_preprocessed: bool = False,
-        update_x_ref: Optional[UpdateStrategy] = None,
-        preprocess_fn: Optional[Callable[[ArrayLike], ArrayLike]] = None,
+        update_x_ref: UpdateStrategy | None = None,
+        preprocess_fn: Callable[[ArrayLike], ArrayLike] | None = None,
         kernel: Callable = GaussianRBF,
-        sigma: Optional[ArrayLike] = None,
+        sigma: ArrayLike | None = None,
         configure_kernel_from_x_ref: bool = True,
         n_permutations: int = 100,
-        device: Optional[str] = None,
+        device: str | None = None,
     ) -> None:
         super().__init__(x_ref, p_val, x_ref_preprocessed, update_x_ref, preprocess_fn)
@@ -130,7 +130,7 @@ class DriftMMD(BaseDrift):
         return kernel_mat
     @preprocess_x
-    def score(self, x: ArrayLike) -> Tuple[float, float, float]:
+    def score(self, x: ArrayLike) -> tuple[float, float, float]:
         """
         Compute the p-value resulting from a permutation test using the maximum mean
         discrepancy as a distance measure between the reference data and the data to
@@ -143,8 +143,9 @@ class DriftMMD(BaseDrift):
         Returns
         -------
-        p-value obtained from the permutation test, the MMD^2 between the reference and
-        test set, and the MMD^2 threshold above which drift is flagged.
+        tuple(float, float, float)
+            p-value obtained from the permutation test, MMD^2 between the reference and test set,
+            and MMD^2 threshold above which drift is flagged
         """
         x = to_numpy(x)
         x_ref = torch.from_numpy(self.x_ref).to(self.device)

dataeval/_internal/detectors/drift/torch.py CHANGED Viewed

@@ -6,8 +6,10 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 from functools import partial
-from typing import Callable, Optional, Type, Union
+from typing import Callable
 import numpy as np
 import torch
@@ -15,15 +17,15 @@ import torch.nn as nn
 from numpy.typing import NDArray
-def get_device(device: Optional[Union[str, torch.device]] = None) -> torch.device:
+def get_device(device: str | torch.device | None = None) -> torch.device:
     """
     Instantiates a PyTorch device object.
     Parameters
     ----------
-    device
-        Either `None`, a str ('gpu' or 'cpu') indicating the device to choose, or an
-        already instantiated device object. If `None`, the GPU is selected if it is
+    device : str | torch.device | None, default None
+        Either ``None``, a str ('gpu' or 'cpu') indicating the device to choose, or an
+        already instantiated device object. If ``None``, the GPU is selected if it is
         detected, otherwise the CPU is used as a fallback.
     Returns
@@ -49,18 +51,19 @@ def mmd2_from_kernel_matrix(
     Parameters
     ----------
-    kernel_mat
+    kernel_mat : torch.Tensor
         Kernel matrix between samples x and y.
-    m
+    m : int
         Number of instances in y.
-    permute
+    permute : bool, default False
         Whether to permute the row indices. Used for permutation tests.
-    zero_diag
+    zero_diag : bool, default True
         Whether to zero out the diagonal of the kernel matrix.
     Returns
     -------
-    MMD^2 between the samples from the kernel matrix.
+    torch.Tensor
+        MMD^2 between the samples from the kernel matrix.
     """
     n = kernel_mat.shape[0] - m
     if zero_diag:
@@ -75,35 +78,36 @@ def mmd2_from_kernel_matrix(
 def predict_batch(
-    x: Union[NDArray, torch.Tensor],
-    model: Union[Callable, nn.Module, nn.Sequential],
-    device: Optional[torch.device] = None,
+    x: NDArray | torch.Tensor,
+    model: Callable | nn.Module | nn.Sequential,
+    device: torch.device | None = None,
     batch_size: int = int(1e10),
-    preprocess_fn: Optional[Callable] = None,
-    dtype: Union[Type[np.generic], torch.dtype] = np.float32,
-) -> Union[NDArray, torch.Tensor, tuple]:
+    preprocess_fn: Callable | None = None,
+    dtype: type[np.generic] | torch.dtype = np.float32,
+) -> NDArray | torch.Tensor | tuple:
     """
     Make batch predictions on a model.
     Parameters
     ----------
-    x
+    x : np.ndarray | torch.Tensor
         Batch of instances.
-    model
+    model : Callable | nn.Module | nn.Sequential
         PyTorch model.
-    device
+    device : torch.device | None, default None
         Device type used. The default None tries to use the GPU and falls back on CPU.
         Can be specified by passing either torch.device('cuda') or torch.device('cpu').
-    batch_size
+    batch_size : int, default 1e10
         Batch size used during prediction.
-    preprocess_fn
+    preprocess_fn : Callable | None, default None
         Optional preprocessing function for each batch.
-    dtype
-        Model output type, e.g. np.float32 or torch.float32.
+    dtype : np.dtype | torch.dtype, default np.float32
+        Model output type, either a numpy or torch dtype, e.g. np.float32 or torch.float32.
     Returns
     -------
-    Numpy array, torch tensor or tuples of those with model outputs.
+    NDArray | torch.Tensor | tuple
+        Numpy array, torch tensor or tuples of those with model outputs.
     """
     device = get_device(device)
     if isinstance(x, np.ndarray):
@@ -143,7 +147,7 @@ def predict_batch(
                     torch.Tensor."
                 )
     concat = partial(np.concatenate, axis=0) if return_np else partial(torch.cat, dim=0)
-    out: Union[tuple, np.ndarray, torch.Tensor] = (
+    out: tuple | np.ndarray | torch.Tensor = (
         tuple(concat(p) for p in preds) if isinstance(preds, tuple) else concat(preds)  # type: ignore
     )
     return out
@@ -152,34 +156,35 @@ def predict_batch(
 def preprocess_drift(
     x: NDArray,
     model: nn.Module,
-    device: Optional[torch.device] = None,
-    preprocess_batch_fn: Optional[Callable] = None,
+    device: torch.device | None = None,
+    preprocess_batch_fn: Callable | None = None,
     batch_size: int = int(1e10),
-    dtype: Union[Type[np.generic], torch.dtype] = np.float32,
-) -> Union[NDArray, torch.Tensor, tuple]:
+    dtype: type[np.generic] | torch.dtype = np.float32,
+) -> NDArray | torch.Tensor | tuple:
     """
     Prediction function used for preprocessing step of drift detector.
     Parameters
     ----------
-    x
+    x : NDArray
         Batch of instances.
-    model
+    model : nn.Module
         Model used for preprocessing.
-    device
+    device : torch.device | None, default None
         Device type used. The default None tries to use the GPU and falls back on CPU.
         Can be specified by passing either torch.device('cuda') or torch.device('cpu').
-    preprocess_batch_fn
+    preprocess_batch_fn : Callable | None, default None
         Optional batch preprocessing function. For example to convert a list of objects
         to a batch which can be processed by the PyTorch model.
-    batch_size
+    batch_size : int, default 1e10
         Batch size used during prediction.
-    dtype
-        Model output type, e.g. np.float32 or torch.float32.
+    dtype : np.dtype | torch.dtype, default np.float32
+        Model output type, either a numpy or torch dtype, e.g. np.float32 or torch.float32.
     Returns
     -------
-    Numpy array or torch tensor with predictions.
+    NDArray | torch.Tensor | tuple
+        Numpy array, torch tensor or tuples of those with model outputs.
     """
     return predict_batch(
         x,
@@ -200,15 +205,17 @@ def squared_pairwise_distance(
     Parameters
     ----------
-    x
+    x : torch.Tensor
         Batch of instances of shape [Nx, features].
-    y
+    y : torch.Tensor
         Batch of instances of shape [Ny, features].
-    a_min
+    a_min : float
         Lower bound to clip distance values.
     Returns
     -------
-    Pairwise squared Euclidean distance [Nx, Ny].
+    torch.Tensor
+        Pairwise squared Euclidean distance [Nx, Ny].
     """
     x2 = x.pow(2).sum(dim=-1, keepdim=True)
     y2 = y.pow(2).sum(dim=-1, keepdim=True)
@@ -222,17 +229,18 @@ def sigma_median(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch.
     Parameters
     ----------
-    x
+    x : torch.Tensor
         Tensor of instances with dimension [Nx, features].
-    y
+    y : torch.Tensor
         Tensor of instances with dimension [Ny, features].
-    dist
+    dist : torch.Tensor
         Tensor with dimensions [Nx, Ny], containing the pairwise distances
         between `x` and `y`.
     Returns
     -------
-    The computed bandwidth, `sigma`.
+    torch.Tensor
+        The computed bandwidth, `sigma`.
     """
     n = min(x.shape[0], y.shape[0])
     n = n if (x[:n] == y[:n]).all() and x.shape == y.shape else 0
@@ -243,28 +251,28 @@ def sigma_median(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch.
 class GaussianRBF(nn.Module):
     """
-    Gaussian RBF kernel: k(x,y) = exp(-(1/(2*sigma^2)||x-y||^2). A forward pass
-    takes a batch of instances x [Nx, features] and y [Ny, features] and returns
-    the kernel matrix [Nx, Ny].
+    Gaussian RBF kernel: k(x,y) = exp(-(1/(2*sigma^2)||x-y||^2).
+    A forward pass takes a batch of instances x [Nx, features] and
+    y [Ny, features] and returns the kernel matrix [Nx, Ny].
     Parameters
     ----------
-    sigma : Optional[torch.Tensor], default None
+    sigma : torch.Tensor | None, default None
         Bandwidth used for the kernel. Needn't be specified if being inferred or
         trained. Can pass multiple values to eval kernel with and then average.
-    init_sigma_fn : Optional[Callable], default None
-        Function used to compute the bandwidth `sigma`. Used when `sigma` is to be
-        inferred. The function's signature should take in the tensors `x`, `y` and
-        `dist` and return `sigma`. If `None`, it is set to
-        :func:`~dataeval._internal.detectors.drift.torch.sigma_median`.
+    init_sigma_fn : Callable | None, default None
+        Function used to compute the bandwidth ``sigma``. Used when ``sigma`` is to be
+        inferred. The function's signature should take in the tensors ``x``, ``y`` and
+        ``dist`` and return ``sigma``. If ``None``, it is set to ``sigma_median``.
     trainable : bool, default False
         Whether or not to track gradients w.r.t. `sigma` to allow it to be trained.
     """
     def __init__(
         self,
-        sigma: Optional[torch.Tensor] = None,
-        init_sigma_fn: Optional[Callable] = None,
+        sigma: torch.Tensor | None = None,
+        init_sigma_fn: Callable | None = None,
         trainable: bool = False,
     ) -> None:
         super().__init__()
@@ -290,8 +298,8 @@ class GaussianRBF(nn.Module):
     def forward(
         self,
-        x: Union[np.ndarray, torch.Tensor],
-        y: Union[np.ndarray, torch.Tensor],
+        x: np.ndarray | torch.Tensor,
+        y: np.ndarray | torch.Tensor,
         infer_sigma: bool = False,
     ) -> torch.Tensor:
         x, y = torch.as_tensor(x), torch.as_tensor(y)

dataeval/_internal/detectors/drift/uncertainty.py CHANGED Viewed

@@ -6,15 +6,17 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 from functools import partial
-from typing import Callable, Literal, Optional
+from typing import Callable, Literal
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
 from scipy.special import softmax
 from scipy.stats import entropy
-from .base import DriftUnivariateOutput, UpdateStrategy
+from .base import DriftOutput, UpdateStrategy
 from .ks import DriftKS
 from .torch import get_device, preprocess_drift
@@ -29,18 +31,19 @@ def classifier_uncertainty(
     Parameters
     ----------
-    x
+    x : np.ndarray
         Batch of instances.
-    model_fn
+    model_fn : Callable
         Function that evaluates a classification model on x in a single call (contains
         batching logic if necessary).
-    preds_type
+    preds_type : "probs" | "logits", default "probs"
         Type of prediction output by the model. Options are 'probs' (in [0,1]) or
         'logits' (in [-inf,inf]).
     Returns
     -------
-    A scalar indication of uncertainty of the model on each instance in x.
+    NDArray
+        A scalar indication of uncertainty of the model on each instance in x.
     """
     preds = model_fn(x)
@@ -61,42 +64,38 @@ def classifier_uncertainty(
 class DriftUncertainty:
     """
     Test for a change in the number of instances falling into regions on which the
-    model is uncertain. Performs a K-S test on prediction entropies.
+    model is uncertain.
+    Performs a K-S test on prediction entropies.
     Parameters
     ----------
     x_ref : ArrayLike
-        Data used as reference distribution. Should be disjoint from the data the
-        model was trained on for accurate p-values.
+        Data used as reference distribution.
     model : Callable
         Classification model outputting class probabilities (or logits)
     p_val : float, default 0.05
         p-value used for the significance of the test.
     x_ref_preprocessed : bool, default False
-        Whether the given reference data `x_ref` has been preprocessed yet. If
-        `x_ref_preprocessed=True`, only the test data `x` will be preprocessed at
-        prediction time. If `x_ref_preprocessed=False`, the reference data will
-        also be preprocessed.
-    update_x_ref : Optional[UpdateStrategy], default None
+        Whether the given reference data ``x_ref`` has been preprocessed yet.
+        If ``True``, only the test data ``x`` will be preprocessed at prediction time.
+        If ``False``, the reference data will also be preprocessed.
+    update_x_ref : UpdateStrategy | None, default None
         Reference data can optionally be updated using an UpdateStrategy class. Update
-        using the last n instances seen by the detector with
-        :py:class:`dataeval.detectors.LastSeenUpdateStrategy`
-        or via reservoir sampling with
-        :py:class:`dataeval.detectors.ReservoirSamplingUpdateStrategy`.
-    preds_type : Literal["probs", "logits"], default "logits"
+        using the last n instances seen by the detector with LastSeenUpdateStrategy
+        or via reservoir sampling with ReservoirSamplingUpdateStrategy.
+    preds_type : "probs" | "logits", default "logits"
         Type of prediction output by the model. Options are 'probs' (in [0,1]) or
         'logits' (in [-inf,inf]).
     batch_size : int, default 32
         Batch size used to evaluate model. Only relevant when backend has been
         specified for batch prediction.
-    preprocess_batch_fn : Optional[Callable], default None
+    preprocess_batch_fn : Callable | None, default None
         Optional batch preprocessing function. For example to convert a list of
         objects to a batch which can be processed by the model.
-    device : Optional[str], default None
+    device : str | None, default None
         Device type used. The default None tries to use the GPU and falls back on
         CPU if needed. Can be specified by passing either 'cuda', 'gpu' or 'cpu'.
-    input_shape : Optional[tuple], default None
-        Shape of input data.
     """
     def __init__(
@@ -105,11 +104,11 @@ class DriftUncertainty:
         model: Callable,
         p_val: float = 0.05,
         x_ref_preprocessed: bool = False,
-        update_x_ref: Optional[UpdateStrategy] = None,
+        update_x_ref: UpdateStrategy | None = None,
         preds_type: Literal["probs", "logits"] = "probs",
         batch_size: int = 32,
-        preprocess_batch_fn: Optional[Callable] = None,
-        device: Optional[str] = None,
+        preprocess_batch_fn: Callable | None = None,
+        device: str | None = None,
     ) -> None:
         def model_fn(x: NDArray) -> NDArray:
             return preprocess_drift(
@@ -134,7 +133,7 @@ class DriftUncertainty:
             preprocess_fn=preprocess_fn,  # type: ignore
         )
-    def predict(self, x: ArrayLike) -> DriftUnivariateOutput:
+    def predict(self, x: ArrayLike) -> DriftOutput:
         """
         Predict whether a batch of data has drifted from the reference data.
@@ -145,6 +144,7 @@ class DriftUncertainty:
         Returns
         -------
-        Dictionary containing the drift prediction, p-value, and threshold statistics.
+        DriftUnvariateOutput
+            Dictionary containing the drift prediction, p-value, and threshold statistics.
         """
         return self._detector.predict(x)

dataeval 0.65.0__py3-none-any.whl → 0.66.0__py3-none-any.whl

dataeval 0.65.0py3-none-any.whl → 0.66.0py3-none-any.whl