PyPI - dataeval - Versions diffs - 0.64.0__py3-none-any.whl → 0.66.0__py3-none-any.whl - Mend

dataeval 0.64.0py3-none-any.whl → 0.66.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

dataeval/__init__.py +13 -9
dataeval/_internal/detectors/clusterer.py +63 -49
dataeval/_internal/detectors/drift/base.py +248 -51
dataeval/_internal/detectors/drift/cvm.py +28 -26
dataeval/_internal/detectors/drift/ks.py +31 -28
dataeval/_internal/detectors/drift/mmd.py +62 -42
dataeval/_internal/detectors/drift/torch.py +69 -60
dataeval/_internal/detectors/drift/uncertainty.py +32 -32
dataeval/_internal/detectors/duplicates.py +67 -31
dataeval/_internal/detectors/ood/ae.py +15 -29
dataeval/_internal/detectors/ood/aegmm.py +33 -27
dataeval/_internal/detectors/ood/base.py +86 -47
dataeval/_internal/detectors/ood/llr.py +34 -31
dataeval/_internal/detectors/ood/vae.py +32 -31
dataeval/_internal/detectors/ood/vaegmm.py +34 -28
dataeval/_internal/detectors/{linter.py → outliers.py} +60 -38
dataeval/_internal/flags.py +44 -21
dataeval/_internal/interop.py +5 -3
dataeval/_internal/metrics/balance.py +42 -5
dataeval/_internal/metrics/ber.py +11 -8
dataeval/_internal/metrics/coverage.py +15 -8
dataeval/_internal/metrics/divergence.py +41 -7
dataeval/_internal/metrics/diversity.py +57 -19
dataeval/_internal/metrics/parity.py +141 -66
dataeval/_internal/metrics/stats.py +330 -313
dataeval/_internal/metrics/uap.py +33 -4
dataeval/_internal/metrics/utils.py +79 -40
dataeval/_internal/models/pytorch/autoencoder.py +127 -22
dataeval/_internal/models/tensorflow/autoencoder.py +33 -30
dataeval/_internal/models/tensorflow/gmm.py +4 -2
dataeval/_internal/models/tensorflow/losses.py +17 -13
dataeval/_internal/models/tensorflow/pixelcnn.py +19 -18
dataeval/_internal/models/tensorflow/trainer.py +10 -7
dataeval/_internal/models/tensorflow/utils.py +23 -20
dataeval/_internal/output.py +85 -0
dataeval/_internal/utils.py +5 -3
dataeval/_internal/workflows/sufficiency.py +122 -121
dataeval/detectors/__init__.py +6 -25
dataeval/detectors/drift/__init__.py +16 -0
dataeval/detectors/drift/kernels/__init__.py +6 -0
dataeval/detectors/drift/updates/__init__.py +3 -0
dataeval/detectors/linters/__init__.py +5 -0
dataeval/detectors/ood/__init__.py +11 -0
dataeval/flags/__init__.py +2 -2
dataeval/metrics/__init__.py +2 -26
dataeval/metrics/bias/__init__.py +14 -0
dataeval/metrics/estimators/__init__.py +9 -0
dataeval/metrics/stats/__init__.py +6 -0
dataeval/tensorflow/__init__.py +3 -0
dataeval/tensorflow/loss/__init__.py +3 -0
dataeval/tensorflow/models/__init__.py +5 -0
dataeval/tensorflow/recon/__init__.py +3 -0
dataeval/torch/__init__.py +3 -0
dataeval/{models/torch → torch/models}/__init__.py +1 -2
dataeval/torch/trainer/__init__.py +3 -0
dataeval/utils/__init__.py +3 -6
dataeval/workflows/__init__.py +2 -4
{dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/METADATA +1 -1
dataeval-0.66.0.dist-info/RECORD +72 -0
dataeval/_internal/metrics/base.py +0 -10
dataeval/models/__init__.py +0 -15
dataeval/models/tensorflow/__init__.py +0 -6
dataeval-0.64.0.dist-info/RECORD +0 -60
{dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/LICENSE.txt +0 -0
{dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/WHEEL +0 -0

dataeval/_internal/detectors/ood/llr.py CHANGED Viewed

@@ -6,15 +6,17 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 from functools import partial
-from typing import Callable, Optional, Tuple
+from typing import Callable
 import keras
 import numpy as np
 import tensorflow as tf
 from keras.layers import Input
 from keras.models import Model
-from numpy.typing import ArrayLike
+from numpy.typing import ArrayLike, NDArray
 from dataeval._internal.detectors.ood.base import OODBase, OODScore
 from dataeval._internal.interop import to_numpy
@@ -24,8 +26,8 @@ from dataeval._internal.models.tensorflow.utils import predict_batch
 def build_model(
-    dist: PixelCNN, input_shape: Optional[tuple] = None, filepath: Optional[str] = None
-) -> Tuple[keras.Model, PixelCNN]:
+    dist: PixelCNN, input_shape: tuple | None = None, filepath: str | None = None
+) -> tuple[keras.Model, PixelCNN]:
     """
     Create keras.Model from TF distribution.
@@ -52,7 +54,7 @@ def build_model(
 def mutate_categorical(
-    X: np.ndarray,
+    X: NDArray,
     rate: float,
     seed: int = 0,
     feature_range: tuple = (0, 255),
@@ -87,28 +89,29 @@ def mutate_categorical(
 class OOD_LLR(OODBase):
+    """
+    Likelihood Ratios based outlier detector.
+    Parameters
+    ----------
+    model : PixelCNN
+        Generative distribution model.
+    model_background : Optional[PixelCNN], default None
+        Optional model for the background. Only needed if it is different from `model`.
+    log_prob : Optional[Callable], default None
+        Function used to evaluate log probabilities under the model
+        if the model does not have a `log_prob` function.
+    sequential : bool, default False
+        Whether the data is sequential. Used to create targets during training.
+    """
     def __init__(
         self,
         model: PixelCNN,
-        model_background: Optional[PixelCNN] = None,
-        log_prob: Optional[Callable] = None,
+        model_background: PixelCNN | None = None,
+        log_prob: Callable | None = None,
         sequential: bool = False,
     ) -> None:
-        """
-        Likelihood Ratios based outlier detector.
-        Parameters
-        ----------
-        model : PixelCNN
-            Generative distribution model.
-        model_background : Optional[PixelCNN], default None
-            Optional model for the background. Only needed if it is different from `model`.
-        log_prob : Optional[Callable], default None
-            Function used to evaluate log probabilities under the model
-            if the model does not have a `log_prob` function.
-        sequential : bool, default False
-            Whether the data is sequential. Used to create targets during training.
-        """
         self.dist_s = model
         self.dist_b = (
             model.copy()
@@ -123,13 +126,13 @@ class OOD_LLR(OODBase):
         self._ref_score: OODScore
         self._threshold_perc: float
-        self._data_info: Optional[Tuple[tuple, type]] = None
+        self._data_info: tuple[tuple, type] | None = None
     def fit(
         self,
         x_ref: ArrayLike,
         threshold_perc: float = 100.0,
-        loss_fn: Optional[Callable] = None,
+        loss_fn: Callable | None = None,
         optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
         epochs: int = 20,
         batch_size: int = 64,
@@ -144,10 +147,10 @@ class OOD_LLR(OODBase):
         Parameters
         ----------
         x_ref : ArrayLike
-            Training batch.
+            Training data.
         threshold_perc : float, default 100.0
             Percentage of reference data that is normal.
-        loss_fn : Optional[Callable], default None
+        loss_fn : Callable | None, default None
             Loss function used for training.
         optimizer : keras.optimizers.Optimizer, default keras.optimizers.Adam
             Optimizer used for training.
@@ -221,10 +224,10 @@ class OOD_LLR(OODBase):
     def _logp(
         self,
         dist,
-        X: np.ndarray,
+        X: NDArray,
         return_per_feature: bool = False,
         batch_size: int = int(1e10),
-    ) -> np.ndarray:
+    ) -> NDArray:
         """
         Compute log probability of a batch of instances under the generative model.
         """
@@ -235,10 +238,10 @@ class OOD_LLR(OODBase):
     def _logp_alt(
         self,
         model: keras.Model,
-        X: np.ndarray,
+        X: NDArray,
         return_per_feature: bool = False,
         batch_size: int = int(1e10),
-    ) -> np.ndarray:
+    ) -> NDArray:
         """
         Compute log probability of a batch of instances with the user defined log_prob function.
         """
@@ -254,7 +257,7 @@ class OOD_LLR(OODBase):
             axis = tuple(np.arange(len(logp.shape))[1:])
             return np.mean(logp, axis=axis)
-    def _llr(self, X: np.ndarray, return_per_feature: bool, batch_size: int = int(1e10)) -> np.ndarray:
+    def _llr(self, X: NDArray, return_per_feature: bool, batch_size: int = int(1e10)) -> NDArray:
         """
         Compute likelihood ratios.

dataeval/_internal/detectors/ood/vae.py CHANGED Viewed

@@ -6,10 +6,13 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 from typing import Callable
 import keras
 import numpy as np
+import tensorflow as tf
 from numpy.typing import ArrayLike
 from dataeval._internal.detectors.ood.base import OODBase, OODScore
@@ -20,17 +23,33 @@ from dataeval._internal.models.tensorflow.utils import predict_batch
 class OOD_VAE(OODBase):
+    """
+    VAE based outlier detector.
+    Parameters
+    ----------
+    model : VAE
+        A VAE model.
+    samples : int, default 10
+        Number of samples sampled to evaluate each instance.
+    Examples
+    --------
+    Instantiate an OOD detector metric with a generic dataset - batch of images with shape (3,25,25)
+    >>> metric = OOD_VAE(create_model(VAE, dataset[0].shape))
+    Adjusting fit parameters,
+    including setting the fit threshold at 85% for a training set with about 15% out-of-distribution
+    >>> metric.fit(dataset, threshold_perc=85, batch_size=128, verbose=False)
+    Detect out of distribution samples at the 'feature' level
+    >>> result = metric.predict(dataset, ood_type="feature")
+    """
     def __init__(self, model: VAE, samples: int = 10) -> None:
-        """
-        VAE based outlier detector.
-        Parameters
-        ----------
-        model : VAE
-            A VAE model.
-        samples : int, default 10
-            Number of samples sampled to evaluate each instance.
-        """
         super().__init__(model)
         self.samples = samples
@@ -38,32 +57,14 @@ class OOD_VAE(OODBase):
         self,
         x_ref: ArrayLike,
         threshold_perc: float = 100.0,
-        loss_fn: Callable = Elbo(0.05),
+        loss_fn: Callable[..., tf.Tensor] | None = None,
         optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
         epochs: int = 20,
         batch_size: int = 64,
         verbose: bool = True,
     ) -> None:
-        """
-        Train the VAE model.
-        Parameters
-        ----------
-        x_ref : ArrayLike
-            Training batch.
-        threshold_perc : float, default 100.0
-            Percentage of reference data that is normal.
-        loss_fn : Callable, default Elbo(0.05)
-            Loss function used for training.
-        optimizer : keras.optimizers.Optimizer, default keras.optimizers.Adam
-            Optimizer used for training.
-        epochs : int, default 20
-            Number of training epochs.
-        batch_size : int, default 64
-            Batch size used for training.
-        verbose : bool, default True
-            Whether to print training progress.
-        """
+        if loss_fn is None:
+            loss_fn = Elbo(0.05)
         super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
     def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:

dataeval/_internal/detectors/ood/vaegmm.py CHANGED Viewed

@@ -6,10 +6,13 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 from typing import Callable
 import keras
 import numpy as np
+import tensorflow as tf
 from numpy.typing import ArrayLike
 from dataeval._internal.detectors.ood.base import OODGMMBase, OODScore
@@ -21,17 +24,18 @@ from dataeval._internal.models.tensorflow.utils import predict_batch
 class OOD_VAEGMM(OODGMMBase):
-    def __init__(self, model: VAEGMM, samples: int = 10) -> None:
-        """
-        VAE with Gaussian Mixture Model based outlier detector.
+    """
+    VAE with Gaussian Mixture Model based outlier detector.
-        Parameters
-        ----------
-        model : VAEGMM
-            A VAEGMM model.
-        samples
-            Number of samples sampled to evaluate each instance.
-        """
+    Parameters
+    ----------
+    model : VAEGMM
+        A VAEGMM model.
+    samples
+        Number of samples sampled to evaluate each instance.
+    """
+    def __init__(self, model: VAEGMM, samples: int = 10) -> None:
         super().__init__(model)
         self.samples = samples
@@ -39,35 +43,37 @@ class OOD_VAEGMM(OODGMMBase):
         self,
         x_ref: ArrayLike,
         threshold_perc: float = 100.0,
-        loss_fn: Callable = LossGMM(elbo=Elbo(0.05)),
+        loss_fn: Callable[..., tf.Tensor] | None = None,
         optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
         epochs: int = 20,
         batch_size: int = 64,
         verbose: bool = True,
     ) -> None:
+        if loss_fn is None:
+            loss_fn = LossGMM(elbo=Elbo(0.05))
+        super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
+    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:
         """
-        Train the AE model with recommended loss function and optimizer.
+        Compute the out-of-distribution (OOD) score for a given dataset.
         Parameters
         ----------
         X : ArrayLike
-            Training batch.
-        threshold_perc : float, default 100.0
-            Percentage of reference data that is normal.
-        loss_fn : Callable, default LossGMM(elbo=Elbo(0.05))
-            Loss function used for training.
-        optimizer : keras.optimizers.Optimizer, default keras.optimizers.Adam
-            Optimizer used for training.
-        epochs : int, default 20
-            Number of training epochs.
-        batch_size : int, default 64
-            Batch size used for training.
-        verbose : bool, default True
-            Whether to print training progress.
-        """
-        super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
+            Input data to score.
+        batch_size : int, default 1e10
+            Number of instances to process in each batch.
+            Use a smaller batch size if your dataset is large or if you encounter memory issues.
-    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:
+        Returns
+        -------
+        OODScore
+            An object containing the instance-level OOD score.
+        Note
+        ----
+        This model does not produce a feature level score like the OOD_AE or OOD_VAE models.
+        """
         self._validate(X := to_numpy(X))
         # draw samples from latent space

dataeval/_internal/detectors/{linter.py → outliers.py} RENAMED Viewed

@@ -1,15 +1,32 @@
-from typing import Iterable, Literal, Optional, Sequence, Union
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Iterable, Literal
 import numpy as np
-from numpy.typing import ArrayLike
+from numpy.typing import ArrayLike, NDArray
+from dataeval._internal.flags import ImageStat, to_distinct, verify_supported
+from dataeval._internal.metrics.stats import StatsOutput, imagestats
+from dataeval._internal.output import OutputMetadata, set_metadata
+@dataclass(frozen=True)
+class OutliersOutput(OutputMetadata):
+    """
+    Attributes
+    ----------
+    issues : Dict[int, Dict[str, float]]
+        Dictionary containing the indices of outliers and a dictionary showing
+        the issues and calculated values for the given index.
+    """
-from dataeval._internal.flags import ImageProperty, ImageVisuals, LinterFlags
-from dataeval._internal.metrics.stats import ImageStats
+    issues: dict[int, dict[str, float]]
 def _get_outlier_mask(
-    values: np.ndarray, method: Literal["zscore", "modzscore", "iqr"], threshold: Optional[float]
-) -> np.ndarray:
+    values: NDArray, method: Literal["zscore", "modzscore", "iqr"], threshold: float | None
+) -> NDArray:
     if method == "zscore":
         threshold = threshold if threshold else 3.0
         std = np.std(values)
@@ -18,7 +35,7 @@ def _get_outlier_mask(
     elif method == "modzscore":
         threshold = threshold if threshold else 3.5
         abs_diff = np.abs(values - np.median(values))
-        med_abs_diff = np.median(abs_diff)
+        med_abs_diff = np.median(abs_diff) if np.median(abs_diff) != 0 else np.mean(abs_diff)
         mod_z_score = 0.6745 * abs_diff / med_abs_diff
         return mod_z_score > threshold
     elif method == "iqr":
@@ -30,14 +47,15 @@ def _get_outlier_mask(
         raise ValueError("Outlier method must be 'zscore' 'modzscore' or 'iqr'.")
-class Linter:
+class Outliers:
     r"""
     Calculates statistical outliers of a dataset using various statistical tests applied to each image
     Parameters
     ----------
-    flags : [ImageProperty | ImageStatistics | ImageVisuals], default None
+    flags : ImageStat, default ImageStat.ALL_PROPERTIES | ImageStat.ALL_VISUALS
         Metric(s) to calculate for each image - calculates all metrics if None
+        Only supports ImageStat.ALL_STATS
     outlier_method : ["modzscore" | "zscore" | "iqr"], optional - default "modzscore"
         Statistical method used to identify outliers
     outlier_threshold : float, optional - default None
@@ -46,8 +64,8 @@ class Linter:
     Attributes
     ----------
-    stats : ImageStats
-        Class to hold the value of each metric for each image
+    stats : Dict[str, Any]
+        Dictionary to hold the value of each metric for each image
     See Also
     --------
@@ -75,42 +93,40 @@ class Linter:
     Examples
     --------
-    Initialize the Linter class:
+    Initialize the Outliers class:
-    >>> lint = Linter()
+    >>> outliers = Outliers()
     Specifying specific metrics to analyze:
-    >>> lint = Linter(flags=[ImageProperty.SIZE, ImageVisuals.ALL])
+    >>> outliers = Outliers(flags=ImageStat.SIZE | ImageStat.ALL_VISUALS)
     Specifying an outlier method:
-    >>> lint = Linter(outlier_method="iqr")
+    >>> outliers = Outliers(outlier_method="iqr")
     Specifying an outlier method and threshold:
-    >>> lint = Linter(outlier_method="zscore", outlier_threshold=2.5)
+    >>> outliers = Outliers(outlier_method="zscore", outlier_threshold=2.5)
     """
     def __init__(
         self,
-        flags: Optional[Union[LinterFlags, Sequence[LinterFlags]]] = None,
+        flags: ImageStat = ImageStat.ALL_PROPERTIES | ImageStat.ALL_VISUALS,
         outlier_method: Literal["zscore", "modzscore", "iqr"] = "modzscore",
-        outlier_threshold: Optional[float] = None,
+        outlier_threshold: float | None = None,
     ):
-        flags = flags if flags is not None else (ImageProperty.ALL, ImageVisuals.ALL)
-        self.stats = ImageStats(flags)
+        verify_supported(flags, ImageStat.ALL_STATS)
+        self.flags = flags
         self.outlier_method: Literal["zscore", "modzscore", "iqr"] = outlier_method
         self.outlier_threshold = outlier_threshold
     def _get_outliers(self) -> dict:
         flagged_images = {}
-        for stat, values in self.results.items():
-            if not isinstance(values, np.ndarray):
-                continue
-            if values.ndim == 1 and np.std(values) != 0:
+        stats_dict = self.stats.dict()
+        supported = to_distinct(ImageStat.ALL_STATS)
+        for stat, values in stats_dict.items():
+            if stat in supported.values() and values.ndim == 1 and np.std(values) != 0:
                 mask = _get_outlier_mask(values, self.outlier_method, self.outlier_threshold)
                 indices = np.flatnonzero(mask)
                 for i, value in zip(indices, values[mask]):
@@ -118,30 +134,36 @@ class Linter:
         return dict(sorted(flagged_images.items()))
-    def evaluate(self, images: Iterable[ArrayLike]) -> dict:
+    @set_metadata("dataeval.detectors", ["flags", "outlier_method", "outlier_threshold"])
+    def evaluate(self, data: Iterable[ArrayLike] | StatsOutput) -> OutliersOutput:
         """
         Returns indices of outliers with the issues identified for each
         Parameters
         ----------
-        images : Iterable[ArrayLike], shape - (N, C, H, W)
-            A dataset in an ArrayLike format.
-            Function expects the data to have 3 dimensions, CxHxW.
+        data : Iterable[ArrayLike], shape - (C, H, W) | StatsOutput
+            A dataset of images in an ArrayLike format or the output from an imagestats metric analysis
         Returns
         -------
-        Dict[int, Dict[str, float]]
-            Dictionary containing the indices of outliers and a dictionary showing
+        OutliersOutput
+            Output class containing the indices of outliers and a dictionary showing
             the issues and calculated values for the given index.
         Example
         -------
         Evaluate the dataset:
-        >>> lint.evaluate(images)
-        {18: {'brightness': 0.78}, 25: {'brightness': 0.98}}
+        >>> outliers.evaluate(images)
+        OutliersOutput(issues={18: {'brightness': 0.78}, 25: {'brightness': 0.98}})
         """
-        self.stats.reset()
-        self.stats.update(images)
-        self.results = self.stats.compute()
-        return self._get_outliers()
+        if isinstance(data, StatsOutput):
+            flags = set(to_distinct(self.flags).values())
+            stats = set(data.dict())
+            missing = flags - stats
+            if missing:
+                raise ValueError(f"StatsOutput is missing {missing} from the required stats: {flags}.")
+            self.stats = data
+        else:
+            self.stats = imagestats(data, self.flags)
+        return OutliersOutput(self._get_outliers())

dataeval/_internal/flags.py CHANGED Viewed

@@ -1,37 +1,33 @@
-from enum import Flag, auto
-from typing import Union
+from __future__ import annotations
+from enum import IntFlag, auto
+from functools import reduce
+from typing import Iterable, TypeVar, cast
-class auto_all:
-    def __get__(self, _, cls):
-        return ~cls(0)
+TFlag = TypeVar("TFlag", bound=IntFlag)
-class ImageHash(Flag):
+class ImageStat(IntFlag):
+    """
+    Flags for calculating image and channel statistics
+    """
+    # HASHES
     XXHASH = auto()
     PCHASH = auto()
-    ALL = auto_all()
-class ImageProperty(Flag):
+    # PROPERTIES
     WIDTH = auto()
     HEIGHT = auto()
     SIZE = auto()
     ASPECT_RATIO = auto()
     CHANNELS = auto()
     DEPTH = auto()
-    ALL = auto_all()
-class ImageVisuals(Flag):
+    # VISUALS
     BRIGHTNESS = auto()
     BLURRINESS = auto()
     MISSING = auto()
     ZERO = auto()
-    ALL = auto_all()
-class ImageStatistics(Flag):
+    # PIXEL STATS
     MEAN = auto()
     STD = auto()
     VAR = auto()
@@ -40,8 +36,35 @@ class ImageStatistics(Flag):
     ENTROPY = auto()
     PERCENTILES = auto()
     HISTOGRAM = auto()
-    ALL = auto_all()
+    # JOINT FLAGS
+    ALL_HASHES = XXHASH | PCHASH
+    ALL_PROPERTIES = WIDTH | HEIGHT | SIZE | ASPECT_RATIO | CHANNELS | DEPTH
+    ALL_VISUALS = BRIGHTNESS | BLURRINESS | MISSING | ZERO
+    ALL_PIXELSTATS = MEAN | STD | VAR | SKEW | KURTOSIS | ENTROPY | PERCENTILES | HISTOGRAM
+    ALL_STATS = ALL_PROPERTIES | ALL_VISUALS | ALL_PIXELSTATS
+    ALL = ALL_HASHES | ALL_STATS
+def is_distinct(flag: IntFlag) -> bool:
+    return (flag & (flag - 1) == 0) and flag != 0
+def to_distinct(flag: TFlag) -> dict[TFlag, str]:
+    """
+    Returns a distinct set of all flags set on the input flag and their names
+    NOTE: this is supported natively in Python 3.11, but for earlier versions we need
+    to use a combination of list comprehension and bit fiddling to determine distinct
+    flag values from joint aliases.
+    """
+    if isinstance(flag, Iterable):  # >= py311
+        return {f: f.name.lower() for f in flag if f.name}
+    else:  # < py311
+        return {f: f.name.lower() for f in list(flag.__class__) if f & flag and is_distinct(f) and f.name}
-ImageStatsFlags = Union[ImageHash, ImageProperty, ImageVisuals, ImageStatistics]
-LinterFlags = Union[ImageProperty, ImageVisuals, ImageStatistics]
+def verify_supported(flag: TFlag, flags: TFlag | Iterable[TFlag]):
+    supported = flags if isinstance(flags, flag.__class__) else cast(TFlag, reduce(lambda a, b: a | b, flags))  # type: ignore
+    unsupported = flag & ~supported
+    if unsupported:
+        raise ValueError(f"Unsupported flags {unsupported} called.  Only {supported} flags are supported.")

dataeval/_internal/interop.py CHANGED Viewed

@@ -1,8 +1,10 @@
+from __future__ import annotations
 from importlib import import_module
-from typing import Iterable, Optional
+from typing import Iterable
 import numpy as np
-from numpy.typing import ArrayLike
+from numpy.typing import ArrayLike, NDArray
 module_cache = {}
@@ -20,7 +22,7 @@ def try_import(module_name):
     return module
-def to_numpy(array: Optional[ArrayLike]) -> np.ndarray:
+def to_numpy(array: ArrayLike | None) -> NDArray:
     if array is None:
         return np.ndarray([])

dataeval 0.64.0__py3-none-any.whl → 0.66.0__py3-none-any.whl

dataeval 0.64.0py3-none-any.whl → 0.66.0py3-none-any.whl