PyPI - dataeval - Versions diffs - 0.65.0__py3-none-any.whl → 0.66.0__py3-none-any.whl - Mend

dataeval 0.65.0py3-none-any.whl → 0.66.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

dataeval/__init__.py +13 -9
dataeval/_internal/detectors/clusterer.py +24 -22
dataeval/_internal/detectors/drift/base.py +206 -26
dataeval/_internal/detectors/drift/cvm.py +25 -23
dataeval/_internal/detectors/drift/ks.py +28 -25
dataeval/_internal/detectors/drift/mmd.py +30 -29
dataeval/_internal/detectors/drift/torch.py +66 -58
dataeval/_internal/detectors/drift/uncertainty.py +28 -28
dataeval/_internal/detectors/duplicates.py +28 -18
dataeval/_internal/detectors/ood/ae.py +15 -29
dataeval/_internal/detectors/ood/aegmm.py +33 -27
dataeval/_internal/detectors/ood/base.py +61 -43
dataeval/_internal/detectors/ood/llr.py +27 -24
dataeval/_internal/detectors/ood/vae.py +32 -31
dataeval/_internal/detectors/ood/vaegmm.py +34 -28
dataeval/_internal/detectors/{linter.py → outliers.py} +33 -27
dataeval/_internal/flags.py +5 -3
dataeval/_internal/interop.py +4 -2
dataeval/_internal/metrics/balance.py +33 -4
dataeval/_internal/metrics/ber.py +6 -4
dataeval/_internal/metrics/diversity.py +45 -12
dataeval/_internal/metrics/parity.py +114 -26
dataeval/_internal/metrics/stats.py +154 -16
dataeval/_internal/metrics/uap.py +28 -2
dataeval/_internal/metrics/utils.py +20 -18
dataeval/_internal/models/pytorch/autoencoder.py +127 -22
dataeval/_internal/models/tensorflow/autoencoder.py +33 -30
dataeval/_internal/models/tensorflow/gmm.py +4 -2
dataeval/_internal/models/tensorflow/losses.py +15 -11
dataeval/_internal/models/tensorflow/pixelcnn.py +19 -18
dataeval/_internal/models/tensorflow/trainer.py +8 -6
dataeval/_internal/models/tensorflow/utils.py +21 -19
dataeval/_internal/output.py +13 -10
dataeval/_internal/utils.py +5 -3
dataeval/_internal/workflows/sufficiency.py +42 -30
dataeval/detectors/__init__.py +6 -25
dataeval/detectors/drift/__init__.py +16 -0
dataeval/detectors/drift/kernels/__init__.py +6 -0
dataeval/detectors/drift/updates/__init__.py +3 -0
dataeval/detectors/linters/__init__.py +5 -0
dataeval/detectors/ood/__init__.py +11 -0
dataeval/metrics/__init__.py +2 -26
dataeval/metrics/bias/__init__.py +14 -0
dataeval/metrics/estimators/__init__.py +9 -0
dataeval/metrics/stats/__init__.py +6 -0
dataeval/tensorflow/__init__.py +3 -0
dataeval/tensorflow/loss/__init__.py +3 -0
dataeval/tensorflow/models/__init__.py +5 -0
dataeval/tensorflow/recon/__init__.py +3 -0
dataeval/torch/__init__.py +3 -0
dataeval/{models/torch → torch/models}/__init__.py +1 -2
dataeval/torch/trainer/__init__.py +3 -0
dataeval/utils/__init__.py +3 -6
dataeval/workflows/__init__.py +2 -4
{dataeval-0.65.0.dist-info → dataeval-0.66.0.dist-info}/METADATA +1 -1
dataeval-0.66.0.dist-info/RECORD +72 -0
dataeval/models/__init__.py +0 -15
dataeval/models/tensorflow/__init__.py +0 -6
dataeval-0.65.0.dist-info/RECORD +0 -60
{dataeval-0.65.0.dist-info → dataeval-0.66.0.dist-info}/LICENSE.txt +0 -0
{dataeval-0.65.0.dist-info → dataeval-0.66.0.dist-info}/WHEEL +0 -0

dataeval/_internal/detectors/duplicates.py CHANGED Viewed

@@ -1,12 +1,13 @@
+from __future__ import annotations
 from dataclasses import dataclass
-from typing import Dict, Iterable, List
+from typing import Iterable
 from numpy.typing import ArrayLike
-from dataeval._internal.metrics.stats import StatsOutput
+from dataeval._internal.flags import ImageStat
+from dataeval._internal.metrics.stats import StatsOutput, imagestats
 from dataeval._internal.output import OutputMetadata, set_metadata
-from dataeval.flags import ImageStat
-from dataeval.metrics import imagestats
 @dataclass(frozen=True)
@@ -20,8 +21,8 @@ class DuplicatesOutput(OutputMetadata):
         Indices of images that are near matches
     """
-    exact: List[List[int]]
-    near: List[List[int]]
+    exact: list[list[int]]
+    near: list[list[int]]
 class Duplicates:
@@ -34,6 +35,11 @@ class Duplicates:
     stats : StatsOutput
         Output class of stats
+    Parameters
+    ----------
+    only_exact : bool, default False
+        Only inspect the dataset for exact image matches
     Example
     -------
     Initialize the Duplicates class:
@@ -41,12 +47,11 @@ class Duplicates:
     >>> dups = Duplicates()
     """
-    def __init__(self, find_exact: bool = True, find_near: bool = True):
+    def __init__(self, only_exact: bool = False):
         self.stats: StatsOutput
-        self.find_exact = find_exact
-        self.find_near = find_near
+        self.only_exact = only_exact
-    def _get_duplicates(self) -> Dict[str, List[List[int]]]:
+    def _get_duplicates(self) -> dict[str, list[list[int]]]:
         stats_dict = self.stats.dict()
         if "xxhash" in stats_dict:
             exact = {}
@@ -56,7 +61,7 @@ class Duplicates:
         else:
             exact = []
-        if "pchash" in stats_dict:
+        if "pchash" in stats_dict and not self.only_exact:
             near = {}
             for i, value in enumerate(stats_dict["pchash"]):
                 near.setdefault(value, []).append(i)
@@ -69,15 +74,15 @@ class Duplicates:
             "near": sorted(near),
         }
-    @set_metadata("dataeval.detectors", ["find_exact", "find_near"])
-    def evaluate(self, images: Iterable[ArrayLike]) -> DuplicatesOutput:
+    @set_metadata("dataeval.detectors", ["only_exact"])
+    def evaluate(self, data: Iterable[ArrayLike] | StatsOutput) -> DuplicatesOutput:
         """
         Returns duplicate image indices for both exact matches and near matches
         Parameters
         ----------
-        images : Iterable[ArrayLike], shape - (N, C, H, W)
-            A set of images in an ArrayLike format
+        data : Iterable[ArrayLike], shape - (N, C, H, W) | StatsOutput
+            A dataset of images in an ArrayLike format or the output from an imagestats metric analysis
         Returns
         -------
@@ -93,7 +98,12 @@ class Duplicates:
         >>> dups.evaluate(images)
         DuplicatesOutput(exact=[[3, 20], [16, 37]], near=[[3, 20, 22], [12, 18], [13, 36], [14, 31], [17, 27], [19, 38, 47]])
         """  # noqa: E501
-        flag_exact = ImageStat.XXHASH if self.find_exact else ImageStat(0)
-        flag_near = ImageStat.PCHASH if self.find_near else ImageStat(0)
-        self.stats = imagestats(images, flag_exact | flag_near)
+        if isinstance(data, StatsOutput):
+            if not data.xxhash:
+                raise ValueError("StatsOutput must include xxhash information of the images.")
+            if not self.only_exact and not data.pchash:
+                raise ValueError("StatsOutput must include pchash information of the images for near matches.")
+            self.stats = data
+        else:
+            self.stats = imagestats(data, ImageStat.XXHASH | (ImageStat(0) if self.only_exact else ImageStat.PCHASH))
         return DuplicatesOutput(**self._get_duplicates())

dataeval/_internal/detectors/ood/ae.py CHANGED Viewed

@@ -6,10 +6,13 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 from typing import Callable
 import keras
 import numpy as np
+import tensorflow as tf
 from numpy.typing import ArrayLike
 from dataeval._internal.detectors.ood.base import OODBase, OODScore
@@ -19,47 +22,30 @@ from dataeval._internal.models.tensorflow.utils import predict_batch
 class OOD_AE(OODBase):
-    def __init__(self, model: AE) -> None:
-        """
-        Autoencoder based out-of-distribution detector.
+    """
+    Autoencoder based out-of-distribution detector.
+    Parameters
+    ----------
+    model : AE
+        An Autoencoder model.
+    """
-        Parameters
-        ----------
-        model : AE
-            An Autoencoder model.
-        """
+    def __init__(self, model: AE) -> None:
         super().__init__(model)
     def fit(
         self,
         x_ref: ArrayLike,
         threshold_perc: float = 100.0,
-        loss_fn: Callable = keras.losses.MeanSquaredError(),
+        loss_fn: Callable[..., tf.Tensor] | None = None,
         optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
         epochs: int = 20,
         batch_size: int = 64,
         verbose: bool = True,
     ) -> None:
-        """
-        Train the AE model with recommended loss function and optimizer.
-        Parameters
-        ----------
-        x_ref : ArrayLike
-            Training batch.
-        threshold_perc : float, default 100.0
-            Percentage of reference data that is normal.
-        loss_fn : Callable, default keras.losses.MeanSquaredError()
-            Loss function used for training.
-        optimizer : keras.optimizers.Optimizer, default keras.optimizers.Adam
-            Optimizer used for training.
-        epochs : int, default 20
-            Number of training epochs.
-        batch_size : int, default 64
-            Batch size used for training.
-        verbose : bool, default True
-            Whether to print training progress.
-        """
+        if loss_fn is None:
+            loss_fn = keras.losses.MeanSquaredError()
         super().fit(to_numpy(x_ref), threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
     def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:

dataeval/_internal/detectors/ood/aegmm.py CHANGED Viewed

@@ -6,9 +6,12 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 from typing import Callable
 import keras
+import tensorflow as tf
 from numpy.typing import ArrayLike
 from dataeval._internal.detectors.ood.base import OODGMMBase, OODScore
@@ -20,50 +23,53 @@ from dataeval._internal.models.tensorflow.utils import predict_batch
 class OOD_AEGMM(OODGMMBase):
-    def __init__(self, model: AEGMM) -> None:
-        """
-        AE with Gaussian Mixture Model based outlier detector.
+    """
+    AE with Gaussian Mixture Model based outlier detector.
-        Parameters
-        ----------
-        model : AEGMM
-            An AEGMM model.
-        """
+    Parameters
+    ----------
+    model : AEGMM
+        An AEGMM model.
+    """
+    def __init__(self, model: AEGMM) -> None:
         super().__init__(model)
     def fit(
         self,
         x_ref: ArrayLike,
         threshold_perc: float = 100.0,
-        loss_fn: Callable = LossGMM(),
+        loss_fn: Callable[..., tf.Tensor] | None = None,
         optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
         epochs: int = 20,
         batch_size: int = 64,
         verbose: bool = True,
     ) -> None:
+        if loss_fn is None:
+            loss_fn = LossGMM()
+        super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
+    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:
         """
-        Train the AEGMM model with recommended loss function and optimizer.
+        Compute the out-of-distribution (OOD) score for a given dataset.
         Parameters
         ----------
-        x_ref : ArrayLike
-            Training batch.
-        threshold_perc : float, default 100.0
-            Percentage of reference data that is normal.
-        loss_fn : Callable, default LossGMM()
-            Loss function used for training.
-        optimizer : keras.optimizers.Optimizer, default keras.optimizers.Adam
-            Optimizer used for training.
-        epochs : int, default 20
-            Number of training epochs.
-        batch_size : int, default 64
-            Batch size used for training.
-        verbose : bool, default True
-            Whether to print training progress.
-        """
-        super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
+        X : ArrayLike
+            Input data to score.
+        batch_size : int, default 1e10
+            Number of instances to process in each batch.
+            Use a smaller batch size if your dataset is large or if you encounter memory issues.
-    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:
+        Returns
+        -------
+        OODScore
+            An object containing the instance-level OOD score.
+        Note
+        ----
+        This model does not produce a feature level score like the OOD_AE or OOD_VAE models.
+        """
         self._validate(X := to_numpy(X))
         _, z, _ = predict_batch(X, self.model, batch_size=batch_size)
         energy, _ = gmm_energy(z, self.gmm_params, return_mean=False)

dataeval/_internal/detectors/ood/base.py CHANGED Viewed

@@ -6,9 +6,11 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Callable, List, Literal, NamedTuple, Optional, Tuple, cast
+from typing import Callable, Literal, NamedTuple, cast
 import keras
 import numpy as np
@@ -26,17 +28,17 @@ class OODOutput(OutputMetadata):
     """
     Attributes
     ----------
-    is_ood : NDArray[np.bool_]
+    is_ood : NDArray
         Array of images that are detected as out of distribution
-    instance_score : NDArray[np.float32]
+    instance_score : NDArray
         Instance score of the evaluated dataset
-    feature_score : Optional[NDArray[np.float32]]
+    feature_score : NDArray | None
         Feature score, if available, of the evaluated dataset
     """
     is_ood: NDArray[np.bool_]
     instance_score: NDArray[np.float32]
-    feature_score: Optional[NDArray[np.float32]]
+    feature_score: NDArray[np.float32] | None
 class OODScore(NamedTuple):
@@ -45,16 +47,28 @@ class OODScore(NamedTuple):
     Parameters
     ----------
-    instance_score : NDArray[np.float32]
+    instance_score : NDArray
         Instance score of the evaluated dataset.
-    feature_score : Optional[NDArray[np.float32]], default None
+    feature_score : NDArray | None, default None
         Feature score, if available, of the evaluated dataset.
     """
     instance_score: NDArray[np.float32]
-    feature_score: Optional[NDArray[np.float32]] = None
+    feature_score: NDArray[np.float32] | None = None
     def get(self, ood_type: Literal["instance", "feature"]) -> NDArray:
+        """
+        Returns either the instance or feature score
+        Parameters
+        ----------
+        ood_type : "instance" | "feature"
+        Returns
+        -------
+        NDArray
+            Either the instance or feature score based on input selection
+        """
         return self.instance_score if ood_type == "instance" or self.feature_score is None else self.feature_score
@@ -64,12 +78,12 @@ class OODBase(ABC):
         self._ref_score: OODScore
         self._threshold_perc: float
-        self._data_info: Optional[Tuple[tuple, type]] = None
+        self._data_info: tuple[tuple, type] | None = None
         if not isinstance(model, keras.Model):
             raise TypeError("Model should be of type 'keras.Model'.")
-    def _get_data_info(self, X: NDArray) -> Tuple[tuple, type]:
+    def _get_data_info(self, X: NDArray) -> tuple[tuple, type]:
         if not isinstance(X, np.ndarray):
             raise TypeError("Dataset should of type: `NDArray`.")
         return X.shape[1:], X.dtype.type
@@ -80,7 +94,7 @@ class OODBase(ABC):
             raise RuntimeError(f"Expect data of type: {self._data_info[1]} and shape: {self._data_info[0]}. \
                                Provided data is type: {check_data_info[1]} and shape: {check_data_info[0]}.")
-    def _validate_state(self, X: NDArray, additional_attrs: Optional[List[str]] = None) -> None:
+    def _validate_state(self, X: NDArray, additional_attrs: list[str] | None = None) -> None:
         attrs = ["_data_info", "_threshold_perc", "_ref_score"]
         attrs = attrs if additional_attrs is None else attrs + additional_attrs
         if not all(hasattr(self, attr) for attr in attrs) or any(getattr(self, attr) for attr in attrs) is None:
@@ -90,18 +104,20 @@ class OODBase(ABC):
     @abstractmethod
     def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:
         """
-        Compute instance and (optionally) feature level outlier scores.
+        Compute the out-of-distribution (OOD) scores for a given dataset.
         Parameters
         ----------
         X : ArrayLike
-            Batch of instances.
-        batch_size : int, default int(1e10)
-            Batch size used when making predictions with the autoencoder.
+            Input data to score.
+        batch_size : int, default 1e10
+            Number of instances to process in each batch.
+            Use a smaller batch size if your dataset is large or if you encounter memory issues.
         Returns
         -------
-        Instance and feature level outlier scores.
+        OODScore
+            An object containing the instance-level and feature-level OOD scores.
         """
     def _threshold_score(self, ood_type: Literal["feature", "instance"] = "instance") -> np.floating:
@@ -110,33 +126,34 @@ class OODBase(ABC):
     def fit(
         self,
         x_ref: ArrayLike,
-        threshold_perc: float,
-        loss_fn: Callable,
-        optimizer: keras.optimizers.Optimizer,
-        epochs: int,
-        batch_size: int,
-        verbose: bool,
+        threshold_perc: float = 100.0,
+        loss_fn: Callable[..., tf.Tensor] | None = None,
+        optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
+        epochs: int = 20,
+        batch_size: int = 64,
+        verbose: bool = True,
     ) -> None:
         """
         Train the model and infer the threshold value.
         Parameters
         ----------
-        x_ref: : ArrayLike
-            Training batch.
-        threshold_perc : float
+        x_ref : ArrayLike
+            Training data.
+        threshold_perc : float, default 100.0
             Percentage of reference data that is normal.
-        loss_fn : Callable
+        loss_fn : Callable | None, default None
             Loss function used for training.
-        optimizer : keras.optimizers.Optimizer
+        optimizer : Optimizer, default keras.optimizers.Adam
             Optimizer used for training.
-        epochs : int
+        epochs : int, default 20
             Number of training epochs.
-        batch_size : int
+        batch_size : int, default 64
             Batch size used for training.
-        verbose : bool
+        verbose : bool, default True
             Whether to print training progress.
         """
         # Train the model
         trainer(
             model=self.model,
@@ -165,15 +182,16 @@ class OODBase(ABC):
         Parameters
         ----------
         X : ArrayLike
-            Batch of instances.
-        batch_size : int, default int(1e10)
-            Batch size used when making predictions with the autoencoder.
-        ood_type : Literal["feature", "instance"], default "instance"
+            Input data for out-of-distribution prediction.
+        batch_size : int, default 1e10
+            Number of instances to process in each batch.
+        ood_type : "feature" | "instance", default "instance"
             Predict out-of-distribution at the 'feature' or 'instance' level.
         Returns
         -------
-        Dictionary containing the outlier predictions and both feature and instance level outlier scores.
+        Dictionary containing the outlier predictions for the selected level,
+        and the OOD scores for the data including both 'instance' and 'feature' (if present) level scores.
         """
         self._validate_state(X := to_numpy(X))
         # compute outlier scores
@@ -187,7 +205,7 @@ class OODGMMBase(OODBase):
         super().__init__(model)
         self.gmm_params: GaussianMixtureModelParams
-    def _validate_state(self, X: NDArray, additional_attrs: Optional[List[str]] = None) -> None:
+    def _validate_state(self, X: NDArray, additional_attrs: list[str] | None = None) -> None:
         if additional_attrs is None:
             additional_attrs = ["gmm_params"]
         super()._validate_state(X, additional_attrs)
@@ -195,12 +213,12 @@ class OODGMMBase(OODBase):
     def fit(
         self,
         x_ref: ArrayLike,
-        threshold_perc: float,
-        loss_fn: Callable[[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor], tf.Tensor],
-        optimizer: keras.optimizers.Optimizer,
-        epochs: int,
-        batch_size: int,
-        verbose: bool,
+        threshold_perc: float = 100.0,
+        loss_fn: Callable[..., tf.Tensor] | None = None,
+        optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
+        epochs: int = 20,
+        batch_size: int = 64,
+        verbose: bool = True,
     ) -> None:
         # Train the model
         trainer(
@@ -214,7 +232,7 @@ class OODGMMBase(OODBase):
         )
         # Calculate the GMM parameters
-        _, z, gamma = cast(Tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.model(x_ref))
+        _, z, gamma = cast(tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.model(x_ref))
         self.gmm_params = gmm_params(z, gamma)
         # Infer the threshold values

dataeval/_internal/detectors/ood/llr.py CHANGED Viewed

@@ -6,8 +6,10 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 from functools import partial
-from typing import Callable, Optional, Tuple
+from typing import Callable
 import keras
 import numpy as np
@@ -24,8 +26,8 @@ from dataeval._internal.models.tensorflow.utils import predict_batch
 def build_model(
-    dist: PixelCNN, input_shape: Optional[tuple] = None, filepath: Optional[str] = None
-) -> Tuple[keras.Model, PixelCNN]:
+    dist: PixelCNN, input_shape: tuple | None = None, filepath: str | None = None
+) -> tuple[keras.Model, PixelCNN]:
     """
     Create keras.Model from TF distribution.
@@ -87,28 +89,29 @@ def mutate_categorical(
 class OOD_LLR(OODBase):
+    """
+    Likelihood Ratios based outlier detector.
+    Parameters
+    ----------
+    model : PixelCNN
+        Generative distribution model.
+    model_background : Optional[PixelCNN], default None
+        Optional model for the background. Only needed if it is different from `model`.
+    log_prob : Optional[Callable], default None
+        Function used to evaluate log probabilities under the model
+        if the model does not have a `log_prob` function.
+    sequential : bool, default False
+        Whether the data is sequential. Used to create targets during training.
+    """
     def __init__(
         self,
         model: PixelCNN,
-        model_background: Optional[PixelCNN] = None,
-        log_prob: Optional[Callable] = None,
+        model_background: PixelCNN | None = None,
+        log_prob: Callable | None = None,
         sequential: bool = False,
     ) -> None:
-        """
-        Likelihood Ratios based outlier detector.
-        Parameters
-        ----------
-        model : PixelCNN
-            Generative distribution model.
-        model_background : Optional[PixelCNN], default None
-            Optional model for the background. Only needed if it is different from `model`.
-        log_prob : Optional[Callable], default None
-            Function used to evaluate log probabilities under the model
-            if the model does not have a `log_prob` function.
-        sequential : bool, default False
-            Whether the data is sequential. Used to create targets during training.
-        """
         self.dist_s = model
         self.dist_b = (
             model.copy()
@@ -123,13 +126,13 @@ class OOD_LLR(OODBase):
         self._ref_score: OODScore
         self._threshold_perc: float
-        self._data_info: Optional[Tuple[tuple, type]] = None
+        self._data_info: tuple[tuple, type] | None = None
     def fit(
         self,
         x_ref: ArrayLike,
         threshold_perc: float = 100.0,
-        loss_fn: Optional[Callable] = None,
+        loss_fn: Callable | None = None,
         optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
         epochs: int = 20,
         batch_size: int = 64,
@@ -144,10 +147,10 @@ class OOD_LLR(OODBase):
         Parameters
         ----------
         x_ref : ArrayLike
-            Training batch.
+            Training data.
         threshold_perc : float, default 100.0
             Percentage of reference data that is normal.
-        loss_fn : Optional[Callable], default None
+        loss_fn : Callable | None, default None
             Loss function used for training.
         optimizer : keras.optimizers.Optimizer, default keras.optimizers.Adam
             Optimizer used for training.

dataeval/_internal/detectors/ood/vae.py CHANGED Viewed

@@ -6,10 +6,13 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 from typing import Callable
 import keras
 import numpy as np
+import tensorflow as tf
 from numpy.typing import ArrayLike
 from dataeval._internal.detectors.ood.base import OODBase, OODScore
@@ -20,17 +23,33 @@ from dataeval._internal.models.tensorflow.utils import predict_batch
 class OOD_VAE(OODBase):
+    """
+    VAE based outlier detector.
+    Parameters
+    ----------
+    model : VAE
+        A VAE model.
+    samples : int, default 10
+        Number of samples sampled to evaluate each instance.
+    Examples
+    --------
+    Instantiate an OOD detector metric with a generic dataset - batch of images with shape (3,25,25)
+    >>> metric = OOD_VAE(create_model(VAE, dataset[0].shape))
+    Adjusting fit parameters,
+    including setting the fit threshold at 85% for a training set with about 15% out-of-distribution
+    >>> metric.fit(dataset, threshold_perc=85, batch_size=128, verbose=False)
+    Detect out of distribution samples at the 'feature' level
+    >>> result = metric.predict(dataset, ood_type="feature")
+    """
     def __init__(self, model: VAE, samples: int = 10) -> None:
-        """
-        VAE based outlier detector.
-        Parameters
-        ----------
-        model : VAE
-            A VAE model.
-        samples : int, default 10
-            Number of samples sampled to evaluate each instance.
-        """
         super().__init__(model)
         self.samples = samples
@@ -38,32 +57,14 @@ class OOD_VAE(OODBase):
         self,
         x_ref: ArrayLike,
         threshold_perc: float = 100.0,
-        loss_fn: Callable = Elbo(0.05),
+        loss_fn: Callable[..., tf.Tensor] | None = None,
         optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
         epochs: int = 20,
         batch_size: int = 64,
         verbose: bool = True,
     ) -> None:
-        """
-        Train the VAE model.
-        Parameters
-        ----------
-        x_ref : ArrayLike
-            Training batch.
-        threshold_perc : float, default 100.0
-            Percentage of reference data that is normal.
-        loss_fn : Callable, default Elbo(0.05)
-            Loss function used for training.
-        optimizer : keras.optimizers.Optimizer, default keras.optimizers.Adam
-            Optimizer used for training.
-        epochs : int, default 20
-            Number of training epochs.
-        batch_size : int, default 64
-            Batch size used for training.
-        verbose : bool, default True
-            Whether to print training progress.
-        """
+        if loss_fn is None:
+            loss_fn = Elbo(0.05)
         super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
     def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:

dataeval 0.65.0__py3-none-any.whl → 0.66.0__py3-none-any.whl

dataeval 0.65.0py3-none-any.whl → 0.66.0py3-none-any.whl