PyPI - dataeval - Versions diffs - 0.88.0__py3-none-any.whl → 0.89.0__py3-none-any.whl - Mend

dataeval 0.88.0py3-none-any.whl → 0.89.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

dataeval/_version.py +2 -2
dataeval/data/_embeddings.py +2 -2
dataeval/data/_metadata.py +2 -1
dataeval/detectors/drift/_base.py +152 -27
dataeval/detectors/drift/_cvm.py +44 -25
dataeval/detectors/drift/_ks.py +56 -28
dataeval/detectors/drift/_mmd.py +44 -18
dataeval/detectors/drift/_uncertainty.py +119 -45
dataeval/outputs/_drift.py +67 -29
dataeval/outputs/_workflows.py +81 -17
dataeval/typing.py +23 -4
dataeval/workflows/sufficiency.py +1 -2
{dataeval-0.88.0.dist-info → dataeval-0.89.0.dist-info}/METADATA +1 -1
{dataeval-0.88.0.dist-info → dataeval-0.89.0.dist-info}/RECORD +16 -16
{dataeval-0.88.0.dist-info → dataeval-0.89.0.dist-info}/WHEEL +0 -0
{dataeval-0.88.0.dist-info → dataeval-0.89.0.dist-info}/licenses/LICENSE +0 -0

dataeval/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.88.0'
-__version_tuple__ = version_tuple = (0, 88, 0)
+__version__ = version = '0.89.0'
+__version_tuple__ = version_tuple = (0, 89, 0)

dataeval/data/_embeddings.py CHANGED Viewed

@@ -5,7 +5,7 @@ __all__ = []
 import logging
 import math
 import os
-from collections.abc import Iterator, Sequence
+from collections.abc import Iterable, Iterator, Sequence
 from pathlib import Path
 from typing import Any, cast
@@ -80,7 +80,7 @@ class Embeddings:
         # Technically more permissive than ImageClassificationDataset or ObjectDetectionDataset
         dataset: Dataset[tuple[ArrayLike, Any, Any]] | Dataset[ArrayLike],
         batch_size: int,
-        transforms: Transform[torch.Tensor] | Sequence[Transform[torch.Tensor]] | None = None,
+        transforms: Transform[torch.Tensor] | Iterable[Transform[torch.Tensor]] | None = None,
         model: torch.nn.Module | None = None,
         device: DeviceLike | None = None,
         cache: Path | str | bool = False,

dataeval/data/_metadata.py CHANGED Viewed

@@ -15,6 +15,7 @@ from tqdm.auto import tqdm
 from dataeval.typing import (
     AnnotatedDataset,
     Array,
+    DatumMetadata,
     ObjectDetectionTarget,
 )
 from dataeval.utils._array import as_numpy
@@ -76,7 +77,7 @@ class Metadata:
     def __init__(
         self,
-        dataset: AnnotatedDataset[tuple[Any, Any, Mapping[str, Any]]],
+        dataset: AnnotatedDataset[tuple[Any, Any, DatumMetadata]],
         *,
         continuous_factor_bins: Mapping[str, int | Sequence[float]] | None = None,
         auto_bin_method: Literal["uniform_width", "uniform_count", "clusters"] = "uniform_width",

dataeval/detectors/drift/_base.py CHANGED Viewed

@@ -55,6 +55,42 @@ def update_strategy(fn: Callable[..., R]) -> Callable[..., R]:
 class BaseDrift:
+    """Base class for drift detection algorithms.
+    Provides common functionality for drift detectors including reference data
+    management, encoding of input data, and statistical correction methods.
+    Subclasses implement specific drift detection algorithms.
+    Parameters
+    ----------
+    data : Embeddings or Array
+        Reference dataset used as baseline for drift detection.
+        Can be image embeddings or raw arrays.
+    p_val : float, default 0.05
+        Significance threshold for drift detection, between 0 and 1.
+        Default 0.05 limits false drift alerts to 5% when no drift exists (Type I error rate).
+    update_strategy : UpdateStrategy or None, default None
+        Strategy for updating reference data when new data arrives.
+        When None, reference data remains fixed throughout detection.
+        Default None maintains stable baseline for consistent comparison.
+    correction : {"bonferroni", "fdr"}, default "bonferroni"
+        Multiple testing correction method for multivariate drift detection.
+        "bonferroni" provides conservative family-wise error control.
+        "fdr" (False Discovery Rate) offers less conservative control.
+        Default "bonferroni" minimizes false positive drift detections.
+    Attributes
+    ----------
+    p_val : float
+        Significance threshold for statistical tests.
+    update_strategy : UpdateStrategy or None
+        Reference data update strategy.
+    correction : {"bonferroni", "fdr"}
+        Multiple testing correction method.
+    n : int
+        Number of samples in the reference dataset.
+    """
     p_val: float
     update_strategy: UpdateStrategy | None
     correction: Literal["bonferroni", "fdr"]
@@ -83,19 +119,43 @@ class BaseDrift:
     @property
     def x_ref(self) -> NDArray[np.float32]:
-        """
-        Retrieve the reference data of the drift detector.
+        """Reference data for drift detection.
+        Lazily encodes the reference dataset on first access.
+        Data is flattened and converted to 32-bit floating point for
+        consistent numerical processing across different input types.
         Returns
         -------
         NDArray[np.float32]
-            The reference data as a 32-bit floating point numpy array.
+            Reference data as flattened 32-bit floating point array.
+            Shape is (n_samples, n_features_flattened).
+        Notes
+        -----
+        Data is cached after first access to avoid repeated encoding overhead.
         """
         if self._x_ref is None:
             self._x_ref = self._encode(self._data)
         return self._x_ref
     def _encode(self, data: Embeddings | Array) -> NDArray[np.float32]:
+        """
+        Encode input data to consistent numpy format.
+        Handles different input types (Embeddings, Arrays) and converts
+        them to flattened 32-bit floating point arrays for drift detection.
+        Parameters
+        ----------
+        data : Embeddings or Array
+            Input data to encode.
+        Returns
+        -------
+        NDArray[np.float32]
+            Encoded data as flattened 32-bit floating point array.
+        """
         array = (
             data.to_numpy().astype(np.float32)
             if isinstance(data, Embeddings)
@@ -107,6 +167,46 @@ class BaseDrift:
 class BaseDriftUnivariate(BaseDrift):
+    """
+    Base class for univariate drift detection algorithms.
+    Extends BaseDrift with feature-wise drift detection capabilities.
+    Applies statistical tests independently to each feature (pixel) and
+    uses multiple testing correction to control false discovery rates.
+    Parameters
+    ----------
+    data : Embeddings or Array
+        Reference dataset used as baseline for drift detection.
+    p_val : float, default 0.05
+        Significance threshold for drift detection, between 0 and 1.
+        Default 0.05 limits false drift alerts to 5% when no drift exists (Type I error rate).
+    update_strategy : UpdateStrategy or None, default None
+        Strategy for updating reference data when new data arrives.
+        When None, reference data remains fixed throughout detection.
+        Default None maintains stable baseline for consistent comparison.
+    correction : {"bonferroni", "fdr"}, default "bonferroni"
+        Multiple testing correction method for controlling false positives
+        across multiple features. "bonferroni" divides significance level
+        by number of features. "fdr" uses Benjamini-Hochberg procedure.
+        Default "bonferroni" provides conservative family-wise error control.
+    n_features : int or None, default None
+        Number of features to analyze. When None, automatically inferred
+        from the first sample's flattened shape. Default None enables
+        automatic feature detection for flexible input handling.
+    Attributes
+    ----------
+    p_val : float
+        Significance threshold for statistical tests.
+    update_strategy : UpdateStrategy or None
+        Reference data update strategy.
+    correction : {"bonferroni", "fdr"}
+        Multiple testing correction method.
+    n : int
+        Number of samples in the reference dataset.
+    """
     def __init__(
         self,
         data: Embeddings | Array,
@@ -121,16 +221,22 @@ class BaseDriftUnivariate(BaseDrift):
     @property
     def n_features(self) -> int:
-        """
-        Get the number of features in the reference data.
+        """Number of features in the reference data.
-        If the number of features is not provided during initialization, it will be inferred
-        from the reference data (``x_ref``).
+        Lazily computes the number of features from the first data sample
+        if not provided during initialization. Features correspond to the
+        flattened dimensionality of the input data (e.g., pixels for images).
         Returns
         -------
         int
-            Number of features in the reference data.
+            Number of features (flattened dimensions) in the reference data.
+            Always > 0 for valid datasets.
+        Notes
+        -----
+        For image data, this equals C x H x W.
+        Computed once and cached for efficiency.
         """
         # lazy process n_features as needed
         if self._n_features is None:
@@ -139,18 +245,27 @@ class BaseDriftUnivariate(BaseDrift):
         return self._n_features
     def score(self, data: Embeddings | Array) -> tuple[NDArray[np.float32], NDArray[np.float32]]:
-        """
-        Calculates p-values and test statistics per feature.
+        """Calculate feature-wise p-values and test statistics.
+        Applies the detector's statistical test independently to each feature,
+        comparing the distribution of each feature between reference and test data.
         Parameters
         ----------
         data : Embeddings or Array
-            Batch of instances to score.
+            Test dataset to compare against reference data.
         Returns
         -------
-        tuple[NDArray, NDArray]
-            Feature level p-values and test statistics
+        tuple[NDArray[np.float32], NDArray[np.float32]]
+            First array contains p-values for each feature (all between 0 and 1).
+            Second array contains test statistics for each feature (all >= 0).
+            Both arrays have shape (n_features,).
+        Notes
+        -----
+        Lower p-values indicate stronger evidence of drift for that feature.
+        Higher test statistics indicate greater distributional differences.
         """
         x_np = self._encode(data)
         p_val = np.zeros(self.n_features, dtype=np.float32)
@@ -164,22 +279,29 @@ class BaseDriftUnivariate(BaseDrift):
     def _apply_correction(self, p_vals: NDArray[np.float32]) -> tuple[bool, float]:
         """
-        Apply the specified correction method (Bonferroni or FDR) to the p-values.
+        Apply multiple testing correction to feature-wise p-values.
-        If the correction method is Bonferroni, the threshold for detecting :term:`drift<Drift>`
-        is divided by the number of features. For FDR, the correction is applied
-        using the Benjamini-Hochberg procedure.
+        Corrects for multiple comparisons across features to control
+        false positive rates. Bonferroni correction divides the significance
+        threshold by the number of features. FDR correction uses the
+        Benjamini-Hochberg procedure for less conservative control.
         Parameters
         ----------
-        p_vals : NDArray
-            Array of p-values from the univariate tests for each feature.
+        p_vals : NDArray[np.float32]
+            Array of p-values from univariate tests for each feature.
+            All values should be between 0 and 1.
         Returns
         -------
         tuple[bool, float]
-            A tuple containing a boolean indicating if drift was detected and the
-            threshold after correction.
+            Boolean indicating whether drift was detected after correction.
+            Float is the effective threshold used for detection.
+        Notes
+        -----
+        Bonferroni correction: threshold = p_val / n_features
+        FDR correction: Uses Benjamini-Hochberg step-up procedure
         """
         if self.correction == "bonferroni":
             threshold = self.p_val / self.n_features
@@ -201,21 +323,24 @@ class BaseDriftUnivariate(BaseDrift):
     @set_metadata
     @update_strategy
     def predict(self, data: Embeddings | Array) -> DriftOutput:
-        """
-        Predict whether a batch of data has drifted from the reference data and update
-        reference data using specified update strategy.
+        """Predict drift and update reference data using specified strategy.
+        Performs feature-wise drift detection, applies multiple testing
+        correction, and optionally updates the reference dataset based
+        on the configured update strategy.
         Parameters
         ----------
         data : Embeddings or Array
-            Batch of instances to predict drift on.
+            Test dataset to analyze for drift against reference data.
         Returns
         -------
         DriftOutput
-            Dictionary containing the :term:`drift<Drift>` prediction and optionally the feature level
-            p-values, threshold after multivariate correction if needed and test :term:`statistics<Statistics>`.
+            Complete drift detection results including overall :term:`drift<Drift>` prediction,
+            corrected thresholds, feature-level analysis, and summary :term:`statistics<Statistics>`.
         """
         # compute drift scores
         p_vals, dist = self.score(data)

dataeval/detectors/drift/_cvm.py CHANGED Viewed

@@ -22,47 +22,66 @@ from dataeval.typing import Array
 class DriftCVM(BaseDriftUnivariate):
-    """
-    :term:`Drift` detector employing the :term:`Cramér-von Mises (CVM) Drift Detection` test.
+    """:term:`Drift` detector using the :term:`Cramér-von Mises (CVM) Test`.
+    Detects distributional changes in continuous data by comparing empirical
+    cumulative distribution functions between reference and test datasets.
+    For multivariate data, applies CVM test independently to each feature
+    and aggregates results using either the Bonferroni or
+    :term:`False Discovery Rate (FDR)` correction.
-    The CVM test detects changes in the distribution of continuous
-    univariate data. For multivariate data, a separate CVM test is applied to each
-    feature, and the obtained p-values are aggregated via the Bonferroni or
-    :term:`False Discovery Rate (FDR)` corrections.
+    The CVM test is particularly effective at detecting subtle
+    distributional shifts throughout the entire domain, providing higher
+    power than Kolmogorov-Smirnov for many types of drift.
     Parameters
     ----------
     data : Embeddings or Array
-        Data used as reference distribution.
-    p_val : float or None, default 0.05
-        :term:`p-value<P-Value>` used for significance of the statistical test for each feature.
-        If the FDR correction method is used, this corresponds to the acceptable
-        q-value.
+        Reference dataset used as baseline distribution for drift detection.
+        Should represent the expected data distribution.
+    p_val : float, default 0.05
+        Significance threshold for drift detection, between 0 and 1.
+        Default 0.05 limits false drift alerts to 5% when no drift exists (Type I error rate).
     update_strategy : UpdateStrategy or None, default None
-        Reference data can optionally be updated using an UpdateStrategy class. Update
-        using the last n instances seen by the detector with LastSeenUpdateStrategy
-        or via reservoir sampling with ReservoirSamplingUpdateStrategy.
+        Strategy for updating reference data when new data arrives.
+        When None, reference data remains fixed throughout detection.
     correction : "bonferroni" or "fdr", default "bonferroni"
-        Correction type for multivariate data. Either 'bonferroni' or 'fdr' (False
-        Discovery Rate).
+        Multiple testing correction method for multivariate drift detection.
+        "bonferroni" provides conservative family-wise error control by
+        dividing significance threshold by number of features.
+        "fdr" uses Benjamini-Hochberg procedure for less conservative control.
+        Default "bonferroni" minimizes false positive drift detections.
     n_features : int or None, default None
-        Number of features used in the univariate drift tests. If not provided, it will
-        be inferred from the data.
+        Number of features to analyze in univariate tests.
+        When None, automatically inferred from the flattened shape of first data sample.
     Example
     -------
+    Basic drift detection with image embeddings
     >>> from dataeval.data import Embeddings
+    >>> train_emb = Embeddings(train_images, model=encoder, batch_size=64)
+    >>> drift_detector = DriftCVM(train_emb)
-    Use Embeddings to encode images before testing for drift
+    Test incoming images for distributional drift
-    >>> train_emb = Embeddings(train_images, model=encoder, batch_size=64)
-    >>> drift = DriftCVM(train_emb)
+    >>> result = drift_detector.predict(test_images)
+    >>> print(f"Drift detected: {result.drifted}")
+    Drift detected: True
+    >>> print(f"Mean CVM statistic: {result.distance:.4f}")
+    Mean CVM statistic: 24.1325
+    Using different correction methods
+    >>> drift_fdr = DriftCVM(train_emb, correction="fdr", p_val=0.1)
+    >>> result = drift_fdr.predict(test_images)
-    Test incoming images for drift
+    Access feature level results
-    >>> drift.predict(test_images).drifted
-    True
+    >>> n_features = result.feature_drift
+    >>> print(f"Features showing drift: {n_features.sum()} / {len(n_features)}")
+    Features showing drift: 576 / 576
     """
     def __init__(

dataeval/detectors/drift/_ks.py CHANGED Viewed

@@ -22,49 +22,77 @@ from dataeval.typing import Array
 class DriftKS(BaseDriftUnivariate):
-    """
-    :term:`Drift` detector employing the :term:`Kolmogorov-Smirnov (KS) \
+    """:term:`Drift` detector employing the :term:`Kolmogorov-Smirnov (KS) \
     distribution<Kolmogorov-Smirnov (K-S) test>` test.
-    The KS test detects changes in the maximum distance between two data
-    distributions with Bonferroni or :term:`False Discovery Rate (FDR)` correction
-    for multivariate data.
+    Detects distributional changes by measuring the maximum distance between
+    empirical cumulative distribution functions of reference and test datasets.
+    For multivariate data, applies KS test independently to each feature
+    and aggregates results using multiple testing correction.
+    The Kolmogorov-Smirnov test is particularly sensitive to differences in
+    the middle portions of distributions but has reduced power in the tails
+    where cumulative distribution functions are constrained near 0 and 1.
     Parameters
     ----------
     data : Embeddings or Array
-        Data used as reference distribution.
-    p_val : float or None, default 0.05
-        :term:`p-value<P-Value>` used for significance of the statistical test for each feature.
-        If the FDR correction method is used, this corresponds to the acceptable
-        q-value.
+        Reference dataset used as baseline distribution for drift detection.
+        Should represent the expected data distribution.
+    p_val : float, default 0.05
+        Significance threshold for drift detection, between 0 and 1.
+        Default 0.05 limits false drift alerts to 5% when no drift exists (Type I error rate).
     update_strategy : UpdateStrategy or None, default None
-        Reference data can optionally be updated using an UpdateStrategy class. Update
-        using the last n instances seen by the detector with LastSeenUpdateStrategy
-        or via reservoir sampling with ReservoirSamplingUpdateStrategy.
+        Strategy for updating reference data when new data arrives.
+        When None, reference data remains fixed throughout detection.
     correction : "bonferroni" or "fdr", default "bonferroni"
-        Correction type for multivariate data. Either 'bonferroni' or 'fdr' (False
-        Discovery Rate).
+        Multiple testing correction method for multivariate drift detection.
+        "bonferroni" provides conservative family-wise error control by
+        dividing significance threshold by number of features.
+        "fdr" uses Benjamini-Hochberg procedure for less conservative control.
+        Default "bonferroni" minimizes false positive drift detections.
     alternative : "two-sided", "less" or "greater", default "two-sided"
-        Defines the alternative hypothesis. Options are 'two-sided', 'less' or
-        'greater'.
+        Alternative hypothesis for the statistical test. "two-sided" detects
+        any distributional difference. "less" tests if test distribution is
+        stochastically smaller. "greater" tests if test distribution is
+        stochastically larger. Default "two-sided" provides most general
+        drift detection without directional assumptions.
     n_features : int | None, default None
-        Number of features used in the univariate drift tests. If not provided, it will
-        be inferred from the data.
+        Number of features to analyze in univariate tests.
+        When None, automatically inferred from the flattened shape of first data sample.
     Example
     -------
-    >>> from dataeval.data import Embeddings
-    Use Embeddings to encode images before testing for drift
+    Basic drift detection with image embeddings:
+    >>> from dataeval.data import Embeddings
     >>> train_emb = Embeddings(train_images, model=encoder, batch_size=64)
-    >>> drift = DriftKS(train_emb)
-    Test incoming images for drift
-    >>> drift.predict(test_images).drifted
-    True
+    >>> drift_detector = DriftKS(train_emb)
+    Test incoming images for distributional drift
+    >>> result = drift_detector.predict(test_images)
+    >>> print(f"Drift detected: {result.drifted}")
+    Drift detected: True
+    >>> print(f"Mean KS statistic: {result.distance:.4f}")
+    Mean KS statistic: 0.8750
+    Detect if test data has systematically higher values
+    >>> drift_greater = DriftKS(train_emb, alternative="greater")
+    >>> result = drift_greater.predict(test_images)
+    Using different correction methods
+    >>> drift_fdr = DriftKS(train_emb, correction="fdr", p_val=0.1)
+    >>> result = drift_fdr.predict(test_images)
+    Access feature-level results
+    >>> n_features = result.feature_drift
+    >>> print(f"Features showing drift: {n_features.sum()} / {len(n_features)}")
+    Features showing drift: 576 / 576
     """
     def __init__(

dataeval/detectors/drift/_mmd.py CHANGED Viewed

@@ -24,31 +24,57 @@ from dataeval.typing import Array
 class DriftMMD(BaseDrift):
-    """
-    :term:`Maximum Mean Discrepancy (MMD) Drift Detection` algorithm \
-    using a permutation test.
+    """Drift detector using :term:`Maximum Mean Discrepancy (MMD) Drift Detection` with permutation test.
+    Detects distributional differences by comparing kernel embeddings of reference
+    and test datasets in a reproducing kernel Hilbert space (RKHS). Uses permutation
+    testing to assess statistical significance of the observed MMD^2 statistic.
+    MMD is particularly effective for high-dimensional data like images as it can
+    capture complex distributional differences that univariate tests might miss.
+    The kernel-based approach enables detection of both marginal and dependency
+    changes between features.
     Parameters
     ----------
     data : Embeddings or Array
-        Data used as reference distribution.
-    p_val : float or None, default 0.05
-        :term:`P-value` used for significance of the statistical test for each feature.
-        If the FDR correction method is used, this corresponds to the acceptable
-        q-value.
+        Reference dataset used as baseline distribution for drift detection.
+        Should represent the expected data distribution.
+    p_val : float, default 0.05
+        Significance threshold for statistical tests, between 0 and 1.
+        For FDR correction, this represents the acceptable false discovery rate.
+        Default 0.05 provides 95% confidence level for drift detection.
     update_strategy : UpdateStrategy or None, default None
-        Reference data can optionally be updated using an UpdateStrategy class. Update
-        using the last n instances seen by the detector with LastSeenUpdateStrategy
-        or via reservoir sampling with ReservoirSamplingUpdateStrategy.
+        Strategy for updating reference data when new data arrives.
+        When None, reference data remains fixed throughout detection.
     sigma : Array or None, default None
-        Optionally set the internal GaussianRBF kernel bandwidth. Can also pass multiple
-        bandwidth values as an array. The kernel evaluation is then averaged over
-        those bandwidths.
+        Bandwidth parameter(s) for the Gaussian RBF kernel. Controls the
+        kernel's sensitivity to distance between data points. When None,
+        automatically selects bandwidth using median heuristic. Can provide
+        multiple values as array to average over different scales.
     n_permutations : int, default 100
-        Number of permutations used in the permutation test.
+        Number of random permutations used in the permutation test to estimate
+        the null distribution of MMD² under no drift. Higher values provide
+        more accurate p-value estimates but increase computation time.
+        Default 100 balances statistical accuracy with computational efficiency.
     device : DeviceLike or None, default None
-        The hardware device to use if specified, otherwise uses the DataEval
-        default or torch default.
+        Hardware device for computation. When None, automatically selects
+        DataEval's configured device, falling back to PyTorch's default.
+    Attributes
+    ----------
+    p_val : float
+        Significance threshold for statistical tests.
+    update_strategy : UpdateStrategy or None
+        Reference data update strategy.
+    n : int
+        Number of samples in the reference dataset.
+    sigma : Array or None
+        Gaussian RBF kernel bandwidth parameter(s).
+    n_permutations : int
+        Number of permutations for statistical testing.
+    device : torch.device
+        Hardware device used for computations.
     Example
     -------
@@ -56,7 +82,7 @@ class DriftMMD(BaseDrift):
     Use Embeddings to encode images before testing for drift
-    >>> train_emb = Embeddings(train_images, model=encoder, batch_size=64)
+    >>> train_emb = Embeddings(train_images, model=encoder, batch_size=16)
     >>> drift = DriftMMD(train_emb)
     Test incoming images for drift

dataeval 0.88.0__py3-none-any.whl → 0.89.0__py3-none-any.whl

dataeval 0.88.0py3-none-any.whl → 0.89.0py3-none-any.whl