PyPI - dataeval - Versions diffs - 0.73.1__py3-none-any.whl → 0.74.1__py3-none-any.whl - Mend

dataeval 0.73.1py3-none-any.whl → 0.74.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

dataeval/__init__.py +3 -9
dataeval/detectors/__init__.py +2 -10
dataeval/detectors/drift/base.py +3 -3
dataeval/detectors/drift/mmd.py +1 -1
dataeval/detectors/drift/torch.py +1 -101
dataeval/detectors/linters/clusterer.py +3 -3
dataeval/detectors/linters/duplicates.py +4 -4
dataeval/detectors/linters/outliers.py +4 -4
dataeval/detectors/ood/__init__.py +9 -9
dataeval/detectors/ood/{ae.py → ae_torch.py} +22 -27
dataeval/detectors/ood/base.py +63 -113
dataeval/detectors/ood/base_torch.py +109 -0
dataeval/detectors/ood/metadata_ks_compare.py +52 -14
dataeval/interop.py +1 -1
dataeval/metrics/bias/__init__.py +3 -0
dataeval/metrics/bias/balance.py +73 -70
dataeval/metrics/bias/coverage.py +4 -4
dataeval/metrics/bias/diversity.py +67 -136
dataeval/metrics/bias/metadata_preprocessing.py +285 -0
dataeval/metrics/bias/metadata_utils.py +229 -0
dataeval/metrics/bias/parity.py +51 -161
dataeval/metrics/estimators/ber.py +3 -3
dataeval/metrics/estimators/divergence.py +3 -3
dataeval/metrics/estimators/uap.py +3 -3
dataeval/metrics/stats/base.py +2 -2
dataeval/metrics/stats/boxratiostats.py +1 -1
dataeval/metrics/stats/datasetstats.py +6 -6
dataeval/metrics/stats/dimensionstats.py +1 -1
dataeval/metrics/stats/hashstats.py +1 -1
dataeval/metrics/stats/labelstats.py +3 -3
dataeval/metrics/stats/pixelstats.py +1 -1
dataeval/metrics/stats/visualstats.py +1 -1
dataeval/output.py +77 -53
dataeval/utils/__init__.py +1 -7
dataeval/utils/gmm.py +26 -0
dataeval/utils/metadata.py +29 -9
dataeval/utils/torch/gmm.py +98 -0
dataeval/utils/torch/models.py +192 -0
dataeval/utils/torch/trainer.py +84 -5
dataeval/utils/torch/utils.py +107 -1
dataeval/workflows/sufficiency.py +4 -4
{dataeval-0.73.1.dist-info → dataeval-0.74.1.dist-info}/METADATA +3 -9
dataeval-0.74.1.dist-info/RECORD +65 -0
dataeval/detectors/ood/aegmm.py +0 -66
dataeval/detectors/ood/llr.py +0 -302
dataeval/detectors/ood/vae.py +0 -97
dataeval/detectors/ood/vaegmm.py +0 -75
dataeval/metrics/bias/metadata.py +0 -440
dataeval/utils/lazy.py +0 -26
dataeval/utils/tensorflow/__init__.py +0 -19
dataeval/utils/tensorflow/_internal/gmm.py +0 -123
dataeval/utils/tensorflow/_internal/loss.py +0 -121
dataeval/utils/tensorflow/_internal/models.py +0 -1394
dataeval/utils/tensorflow/_internal/trainer.py +0 -114
dataeval/utils/tensorflow/_internal/utils.py +0 -256
dataeval/utils/tensorflow/loss/__init__.py +0 -11
dataeval-0.73.1.dist-info/RECORD +0 -73
{dataeval-0.73.1.dist-info → dataeval-0.74.1.dist-info}/LICENSE.txt +0 -0
{dataeval-0.73.1.dist-info → dataeval-0.74.1.dist-info}/WHEEL +0 -0

dataeval/__init__.py CHANGED Viewed

@@ -1,10 +1,9 @@
-__version__ = "0.73.1"
+__version__ = "0.74.1"
 from importlib.util import find_spec
 _IS_TORCH_AVAILABLE = find_spec("torch") is not None
 _IS_TORCHVISION_AVAILABLE = find_spec("torchvision") is not None
-_IS_TENSORFLOW_AVAILABLE = find_spec("tensorflow") is not None and find_spec("tensorflow_probability") is not None
 del find_spec
@@ -13,11 +12,6 @@ from dataeval import detectors, metrics  # noqa: E402
 __all__ = ["detectors", "metrics"]
 if _IS_TORCH_AVAILABLE:
-    from dataeval import workflows
+    from dataeval import utils, workflows
-    __all__ += ["workflows"]
-if _IS_TENSORFLOW_AVAILABLE or _IS_TORCH_AVAILABLE:
-    from dataeval import utils
-    __all__ += ["utils"]
+    __all__ += ["utils", "workflows"]

dataeval/detectors/__init__.py CHANGED Viewed

@@ -2,14 +2,6 @@
 Detectors can determine if a dataset or individual images in a dataset are indicative of a specific issue.
 """
-from dataeval import _IS_TENSORFLOW_AVAILABLE
-from dataeval.detectors import drift, linters
+from dataeval.detectors import drift, linters, ood
-__all__ = ["drift", "linters"]
-if _IS_TENSORFLOW_AVAILABLE:
-    from dataeval.detectors import ood
-    __all__ += ["ood"]
-del _IS_TENSORFLOW_AVAILABLE
+__all__ = ["drift", "linters", "ood"]

dataeval/detectors/drift/base.py CHANGED Viewed

@@ -19,7 +19,7 @@ import numpy as np
 from numpy.typing import ArrayLike, NDArray
 from dataeval.interop import as_numpy
-from dataeval.output import OutputMetadata, set_metadata
+from dataeval.output import Output, set_metadata
 R = TypeVar("R")
@@ -43,7 +43,7 @@ class UpdateStrategy(ABC):
 @dataclass(frozen=True)
-class DriftBaseOutput(OutputMetadata):
+class DriftBaseOutput(Output):
     """
     Base output class for Drift detector classes
@@ -387,7 +387,7 @@ class BaseDriftUnivariate(BaseDrift):
         else:
             raise ValueError("`correction` needs to be either `bonferroni` or `fdr`.")
-    @set_metadata()
+    @set_metadata
     @preprocess_x
     @update_x_ref
     def predict(

dataeval/detectors/drift/mmd.py CHANGED Viewed

@@ -161,7 +161,7 @@ class DriftMMD(BaseDrift):
         distance_threshold = torch.sort(mmd2_permuted, descending=True).values[idx_threshold]
         return p_val.numpy().item(), mmd2.numpy().item(), distance_threshold.numpy().item()
-    @set_metadata()
+    @set_metadata
     @preprocess_x
     @update_x_ref
     def predict(self, x: ArrayLike) -> DriftMMDOutput:

dataeval/detectors/drift/torch.py CHANGED Viewed

@@ -10,7 +10,6 @@ from __future__ import annotations
 __all__ = []
-from functools import partial
 from typing import Any, Callable
 import numpy as np
@@ -18,30 +17,7 @@ import torch
 import torch.nn as nn
 from numpy.typing import NDArray
-def get_device(device: str | torch.device | None = None) -> torch.device:
-    """
-    Instantiates a PyTorch device object.
-    Parameters
-    ----------
-    device : str | torch.device | None, default None
-        Either ``None``, a str ('gpu' or 'cpu') indicating the device to choose, or an
-        already instantiated device object. If ``None``, the GPU is selected if it is
-        detected, otherwise the CPU is used as a fallback.
-    Returns
-    -------
-    The instantiated device object.
-    """
-    if isinstance(device, torch.device):  # Already a torch device
-        return device
-    else:  # Instantiate device
-        if device is None or device.lower() in ["gpu", "cuda"]:
-            torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        else:
-            torch_device = torch.device("cpu")
-    return torch_device
+from dataeval.utils.torch.utils import get_device, predict_batch
 def _mmd2_from_kernel_matrix(
@@ -79,82 +55,6 @@ def _mmd2_from_kernel_matrix(
     return mmd2
-def predict_batch(
-    x: NDArray[Any] | torch.Tensor,
-    model: Callable | nn.Module | nn.Sequential,
-    device: torch.device | None = None,
-    batch_size: int = int(1e10),
-    preprocess_fn: Callable | None = None,
-    dtype: type[np.generic] | torch.dtype = np.float32,
-) -> NDArray[Any] | torch.Tensor | tuple[Any, ...]:
-    """
-    Make batch predictions on a model.
-    Parameters
-    ----------
-    x : np.ndarray | torch.Tensor
-        Batch of instances.
-    model : Callable | nn.Module | nn.Sequential
-        PyTorch model.
-    device : torch.device | None, default None
-        Device type used. The default None tries to use the GPU and falls back on CPU.
-        Can be specified by passing either torch.device('cuda') or torch.device('cpu').
-    batch_size : int, default 1e10
-        Batch size used during prediction.
-    preprocess_fn : Callable | None, default None
-        Optional preprocessing function for each batch.
-    dtype : np.dtype | torch.dtype, default np.float32
-        Model output type, either a :term:`NumPy` or torch dtype, e.g. np.float32 or torch.float32.
-    Returns
-    -------
-    NDArray | torch.Tensor | tuple
-        Numpy array, torch tensor or tuples of those with model outputs.
-    """
-    device = get_device(device)
-    if isinstance(x, np.ndarray):
-        x = torch.from_numpy(x)
-    n = len(x)
-    n_minibatch = int(np.ceil(n / batch_size))
-    return_np = not isinstance(dtype, torch.dtype)
-    preds = []
-    with torch.no_grad():
-        for i in range(n_minibatch):
-            istart, istop = i * batch_size, min((i + 1) * batch_size, n)
-            x_batch = x[istart:istop]
-            if isinstance(preprocess_fn, Callable):
-                x_batch = preprocess_fn(x_batch)
-            preds_tmp = model(x_batch.to(device))
-            if isinstance(preds_tmp, (list, tuple)):
-                if len(preds) == 0:  # init tuple with lists to store predictions
-                    preds = tuple([] for _ in range(len(preds_tmp)))
-                for j, p in enumerate(preds_tmp):
-                    if isinstance(p, torch.Tensor):
-                        p = p.cpu()
-                    preds[j].append(p if not return_np or isinstance(p, np.ndarray) else p.numpy())
-            elif isinstance(preds_tmp, (np.ndarray, torch.Tensor)):
-                if isinstance(preds_tmp, torch.Tensor):
-                    preds_tmp = preds_tmp.cpu()
-                if isinstance(preds, tuple):
-                    preds = list(preds)
-                preds.append(
-                    preds_tmp
-                    if not return_np or isinstance(preds_tmp, np.ndarray)  # type: ignore
-                    else preds_tmp.numpy()
-                )
-            else:
-                raise TypeError(
-                    f"Model output type {type(preds_tmp)} not supported. The model \
-                    output type needs to be one of list, tuple, NDArray or \
-                    torch.Tensor."
-                )
-    concat = partial(np.concatenate, axis=0) if return_np else partial(torch.cat, dim=0)
-    out: tuple | np.ndarray | torch.Tensor = (
-        tuple(concat(p) for p in preds) if isinstance(preds, tuple) else concat(preds)  # type: ignore
-    )
-    return out
 def preprocess_drift(
     x: NDArray[Any],
     model: nn.Module,

dataeval/detectors/linters/clusterer.py CHANGED Viewed

@@ -11,12 +11,12 @@ from scipy.cluster.hierarchy import linkage
 from scipy.spatial.distance import pdist, squareform
 from dataeval.interop import to_numpy
-from dataeval.output import OutputMetadata, set_metadata
+from dataeval.output import Output, set_metadata
 from dataeval.utils.shared import flatten
 @dataclass(frozen=True)
-class ClustererOutput(OutputMetadata):
+class ClustererOutput(Output):
     """
     Output class for :class:`Clusterer` lint detector
@@ -495,7 +495,7 @@ class Clusterer:
         return exact_dupes, near_dupes
     # TODO: Move data input to evaluate from class
-    @set_metadata(["data"])
+    @set_metadata(state=["data"])
     def evaluate(self) -> ClustererOutput:
         """Finds and flags indices of the data for Outliers and :term:`duplicates<Duplicates>`

dataeval/detectors/linters/duplicates.py CHANGED Viewed

@@ -9,7 +9,7 @@ from numpy.typing import ArrayLike
 from dataeval.detectors.linters.merged_stats import combine_stats, get_dataset_step_from_idx
 from dataeval.metrics.stats.hashstats import HashStatsOutput, hashstats
-from dataeval.output import OutputMetadata, set_metadata
+from dataeval.output import Output, set_metadata
 DuplicateGroup = list[int]
 DatasetDuplicateGroupMap = dict[int, DuplicateGroup]
@@ -17,7 +17,7 @@ TIndexCollection = TypeVar("TIndexCollection", DuplicateGroup, DatasetDuplicateG
 @dataclass(frozen=True)
-class DuplicatesOutput(Generic[TIndexCollection], OutputMetadata):
+class DuplicatesOutput(Generic[TIndexCollection], Output):
     """
     Output class for :class:`Duplicates` lint detector
@@ -89,7 +89,7 @@ class Duplicates:
     @overload
     def from_stats(self, hashes: Sequence[HashStatsOutput]) -> DuplicatesOutput[DatasetDuplicateGroupMap]: ...
-    @set_metadata(["only_exact"])
+    @set_metadata(state=["only_exact"])
     def from_stats(
         self, hashes: HashStatsOutput | Sequence[HashStatsOutput]
     ) -> DuplicatesOutput[DuplicateGroup] | DuplicatesOutput[DatasetDuplicateGroupMap]:
@@ -138,7 +138,7 @@ class Duplicates:
         return DuplicatesOutput(**duplicates)
-    @set_metadata(["only_exact"])
+    @set_metadata(state=["only_exact"])
     def evaluate(self, data: Iterable[ArrayLike]) -> DuplicatesOutput[DuplicateGroup]:
         """
         Returns duplicate image indices for both exact matches and near matches

dataeval/detectors/linters/outliers.py CHANGED Viewed

@@ -14,7 +14,7 @@ from dataeval.metrics.stats.datasetstats import DatasetStatsOutput, datasetstats
 from dataeval.metrics.stats.dimensionstats import DimensionStatsOutput
 from dataeval.metrics.stats.pixelstats import PixelStatsOutput
 from dataeval.metrics.stats.visualstats import VisualStatsOutput
-from dataeval.output import OutputMetadata, set_metadata
+from dataeval.output import Output, set_metadata
 IndexIssueMap = dict[int, dict[str, float]]
 OutlierStatsOutput = Union[DimensionStatsOutput, PixelStatsOutput, VisualStatsOutput]
@@ -22,7 +22,7 @@ TIndexIssueMap = TypeVar("TIndexIssueMap", IndexIssueMap, list[IndexIssueMap])
 @dataclass(frozen=True)
-class OutliersOutput(Generic[TIndexIssueMap], OutputMetadata):
+class OutliersOutput(Generic[TIndexIssueMap], Output):
     """
     Output class for :class:`Outliers` lint detector
@@ -159,7 +159,7 @@ class Outliers:
     @overload
     def from_stats(self, stats: Sequence[OutlierStatsOutput]) -> OutliersOutput[list[IndexIssueMap]]: ...
-    @set_metadata(["outlier_method", "outlier_threshold"])
+    @set_metadata(state=["outlier_method", "outlier_threshold"])
     def from_stats(
         self, stats: OutlierStatsOutput | DatasetStatsOutput | Sequence[OutlierStatsOutput]
     ) -> OutliersOutput[IndexIssueMap] | OutliersOutput[list[IndexIssueMap]]:
@@ -228,7 +228,7 @@ class Outliers:
         return OutliersOutput(output_list)
-    @set_metadata(["use_dimension", "use_pixel", "use_visual", "outlier_method", "outlier_threshold"])
+    @set_metadata(state=["use_dimension", "use_pixel", "use_visual", "outlier_method", "outlier_threshold"])
     def evaluate(self, data: Iterable[ArrayLike]) -> OutliersOutput[IndexIssueMap]:
         """
         Returns indices of Outliers with the issues identified for each

dataeval/detectors/ood/__init__.py CHANGED Viewed

@@ -2,14 +2,14 @@
 Out-of-distribution (OOD)` detectors identify data that is different from the data used to train a particular model.
 """
-from dataeval import _IS_TENSORFLOW_AVAILABLE
+from dataeval import _IS_TORCH_AVAILABLE
+from dataeval.detectors.ood.base import OODOutput, OODScoreOutput
-if _IS_TENSORFLOW_AVAILABLE:
-    from dataeval.detectors.ood.ae import OOD_AE
-    from dataeval.detectors.ood.aegmm import OOD_AEGMM
-    from dataeval.detectors.ood.base import OODOutput, OODScoreOutput
-    from dataeval.detectors.ood.llr import OOD_LLR
-    from dataeval.detectors.ood.vae import OOD_VAE
-    from dataeval.detectors.ood.vaegmm import OOD_VAEGMM
+__all__ = ["OODOutput", "OODScoreOutput"]
-    __all__ = ["OOD_AE", "OOD_AEGMM", "OOD_LLR", "OOD_VAE", "OOD_VAEGMM", "OODOutput", "OODScoreOutput"]
+if _IS_TORCH_AVAILABLE:
+    from dataeval.detectors.ood.ae_torch import OOD_AE
+    __all__ += ["OOD_AE"]
+del _IS_TORCH_AVAILABLE

dataeval/detectors/ood/{ae.py → ae_torch.py} RENAMED Viewed

@@ -1,4 +1,6 @@
 """
+Adapted for Pytorch from
 Source code derived from Alibi-Detect 0.11.4
 https://github.com/SeldonIO/alibi-detect/tree/v0.11.4
@@ -8,55 +10,48 @@ Licensed under Apache Software License (Apache 2.0)
 from __future__ import annotations
-__all__ = ["OOD_AE"]
-from typing import TYPE_CHECKING, Callable
+from typing import Callable
 import numpy as np
+import torch
 from numpy.typing import ArrayLike
-from dataeval.detectors.ood.base import OODBase, OODScoreOutput
+from dataeval.detectors.ood.base import OODScoreOutput
+from dataeval.detectors.ood.base_torch import OODBase
 from dataeval.interop import as_numpy
-from dataeval.utils.lazy import lazyload
-from dataeval.utils.tensorflow._internal.utils import predict_batch
-if TYPE_CHECKING:
-    import tensorflow as tf
-    import tf_keras as keras
-    import dataeval.utils.tensorflow._internal.models as tf_models
-else:
-    tf = lazyload("tensorflow")
-    keras = lazyload("tf_keras")
-    tf_models = lazyload("dataeval.utils.tensorflow._internal.models")
+from dataeval.utils.torch.utils import predict_batch
 class OOD_AE(OODBase):
     """
-    Autoencoder-based :term:`out of distribution<Out-of-distribution (OOD)>` detector.
+    Autoencoder based out-of-distribution detector.
     Parameters
     ----------
-    model : AE
-       An :term:`autoencoder<Autoencoder>` model.
+    model : AriaAutoencoder
+        An Autoencoder model.
     """
-    def __init__(self, model: tf_models.AE) -> None:
-        super().__init__(model)
+    def __init__(self, model: torch.nn.Module, device: str | torch.device | None = None) -> None:
+        super().__init__(model, device)
     def fit(
         self,
         x_ref: ArrayLike,
-        threshold_perc: float = 100.0,
-        loss_fn: Callable[..., tf.Tensor] | None = None,
-        optimizer: keras.optimizers.Optimizer | None = None,
+        threshold_perc: float,
+        loss_fn: Callable[..., torch.nn.Module] | None = None,
+        optimizer: torch.optim.Optimizer | None = None,
         epochs: int = 20,
         batch_size: int = 64,
-        verbose: bool = True,
+        verbose: bool = False,
     ) -> None:
         if loss_fn is None:
-            loss_fn = keras.losses.MeanSquaredError()
-        super().fit(as_numpy(x_ref), threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
+            loss_fn = torch.nn.MSELoss()
+        if optimizer is None:
+            optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)
+        super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
     def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
         self._validate(X := as_numpy(X))

dataeval/detectors/ood/base.py CHANGED Viewed

@@ -12,27 +12,18 @@ __all__ = ["OODOutput", "OODScoreOutput"]
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Callable, Literal, cast
+from typing import Callable, Generic, Literal, TypeVar
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
 from dataeval.interop import to_numpy
-from dataeval.output import OutputMetadata, set_metadata
-from dataeval.utils.lazy import lazyload
-from dataeval.utils.tensorflow._internal.gmm import GaussianMixtureModelParams, gmm_params
-from dataeval.utils.tensorflow._internal.trainer import trainer
-if TYPE_CHECKING:
-    import tensorflow as tf
-    import tf_keras as keras
-else:
-    tf = lazyload("tensorflow")
-    keras = lazyload("tf_keras")
+from dataeval.output import Output, set_metadata
+from dataeval.utils.gmm import GaussianMixtureModelParams
 @dataclass(frozen=True)
-class OODOutput(OutputMetadata):
+class OODOutput(Output):
     """
     Output class for predictions from :class:`OOD_AE`, :class:`OOD_AEGMM`, :class:`OOD_LLR`,
     :class:`OOD_VAE`, and :class:`OOD_VAEGMM` out-of-distribution detectors
@@ -53,7 +44,7 @@ class OODOutput(OutputMetadata):
 @dataclass(frozen=True)
-class OODScoreOutput(OutputMetadata):
+class OODScoreOutput(Output):
     """
     Output class for instance and feature scores from :class:`OOD_AE`, :class:`OOD_AEGMM`,
     :class:`OOD_LLR`, :class:`OOD_VAE`, and :class:`OOD_VAEGMM` out-of-distribution detectors
@@ -85,16 +76,62 @@ class OODScoreOutput(OutputMetadata):
         return self.instance_score if ood_type == "instance" or self.feature_score is None else self.feature_score
-class OODBase(ABC):
-    def __init__(self, model: keras.Model) -> None:
-        self.model = model
+TGMMData = TypeVar("TGMMData")
+class OODGMMMixin(Generic[TGMMData]):
+    _gmm_params: GaussianMixtureModelParams[TGMMData]
-        self._ref_score: OODScoreOutput
-        self._threshold_perc: float
-        self._data_info: tuple[tuple, type] | None = None
+TModel = TypeVar("TModel", bound=Callable)
+TLossFn = TypeVar("TLossFn", bound=Callable)
+TOptimizer = TypeVar("TOptimizer")
+class OODFitMixin(Generic[TLossFn, TOptimizer], ABC):
+    @abstractmethod
+    def fit(
+        self,
+        x_ref: ArrayLike,
+        threshold_perc: float,
+        loss_fn: TLossFn | None,
+        optimizer: TOptimizer | None,
+        epochs: int,
+        batch_size: int,
+        verbose: bool,
+    ) -> None:
+        """
+        Train the model and infer the threshold value.
-        if not isinstance(model, keras.Model):
-            raise TypeError("Model should be of type 'keras.Model'.")
+        Parameters
+        ----------
+        x_ref : ArrayLike
+            Training data.
+        threshold_perc : float, default 100.0
+            Percentage of reference data that is normal.
+        loss_fn : TLossFn
+            Loss function used for training.
+        optimizer : TOptimizer
+            Optimizer used for training.
+        epochs : int, default 20
+            Number of training epochs.
+        batch_size : int, default 64
+            Batch size used for training.
+        verbose : bool, default True
+            Whether to print training progress.
+        """
+class OODBaseMixin(Generic[TModel], ABC):
+    _ref_score: OODScoreOutput
+    _threshold_perc: float
+    _data_info: tuple[tuple, type] | None = None
+    def __init__(
+        self,
+        model: TModel,
+    ) -> None:
+        self.model = model
     def _get_data_info(self, X: NDArray) -> tuple[tuple, type]:
         if not isinstance(X, np.ndarray):
@@ -107,9 +144,8 @@ class OODBase(ABC):
             raise RuntimeError(f"Expect data of type: {self._data_info[1]} and shape: {self._data_info[0]}. \
                                Provided data is type: {check_data_info[1]} and shape: {check_data_info[0]}.")
-    def _validate_state(self, X: NDArray, additional_attrs: list[str] | None = None) -> None:
-        attrs = ["_data_info", "_threshold_perc", "_ref_score"]
-        attrs = attrs if additional_attrs is None else attrs + additional_attrs
+    def _validate_state(self, X: NDArray) -> None:
+        attrs = [k for c in self.__class__.mro()[:-1][::-1] if hasattr(c, "__annotations__") for k in c.__annotations__]
         if not all(hasattr(self, attr) for attr in attrs) or any(getattr(self, attr) for attr in attrs) is None:
             raise RuntimeError("Metric needs to be `fit` before method call.")
         self._validate(X)
@@ -117,7 +153,7 @@ class OODBase(ABC):
     @abstractmethod
     def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput: ...
-    @set_metadata()
+    @set_metadata
     def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
         """
         Compute the :term:`out of distribution<Out-of-distribution (OOD)>` scores for a given dataset.
@@ -140,53 +176,7 @@ class OODBase(ABC):
     def _threshold_score(self, ood_type: Literal["feature", "instance"] = "instance") -> np.floating:
         return np.percentile(self._ref_score.get(ood_type), self._threshold_perc)
-    def fit(
-        self,
-        x_ref: ArrayLike,
-        threshold_perc: float,
-        loss_fn: Callable[..., tf.Tensor],
-        optimizer: keras.optimizers.Optimizer,
-        epochs: int,
-        batch_size: int,
-        verbose: bool,
-    ) -> None:
-        """
-        Train the model and infer the threshold value.
-        Parameters
-        ----------
-        x_ref : ArrayLike
-            Training data.
-        threshold_perc : float, default 100.0
-            Percentage of reference data that is normal.
-        loss_fn : Callable | None, default None
-            Loss function used for training.
-        optimizer : Optimizer, default keras.optimizers.Adam
-            Optimizer used for training.
-        epochs : int, default 20
-            Number of training epochs.
-        batch_size : int, default 64
-            Batch size used for training.
-        verbose : bool, default True
-            Whether to print training progress.
-        """
-        # Train the model
-        trainer(
-            model=self.model,
-            loss_fn=loss_fn,
-            x_train=to_numpy(x_ref),
-            optimizer=optimizer,
-            epochs=epochs,
-            batch_size=batch_size,
-            verbose=verbose,
-        )
-        # Infer the threshold values
-        self._ref_score = self.score(x_ref, batch_size)
-        self._threshold_perc = threshold_perc
-    @set_metadata()
+    @set_metadata
     def predict(
         self,
         X: ArrayLike,
@@ -215,43 +205,3 @@ class OODBase(ABC):
         score = self.score(X, batch_size=batch_size)
         ood_pred = score.get(ood_type) > self._threshold_score(ood_type)
         return OODOutput(is_ood=ood_pred, **score.dict())
-class OODGMMBase(OODBase):
-    def __init__(self, model: keras.Model) -> None:
-        super().__init__(model)
-        self.gmm_params: GaussianMixtureModelParams
-    def _validate_state(self, X: NDArray, additional_attrs: list[str] | None = None) -> None:
-        if additional_attrs is None:
-            additional_attrs = ["gmm_params"]
-        super()._validate_state(X, additional_attrs)
-    def fit(
-        self,
-        x_ref: ArrayLike,
-        threshold_perc: float,
-        loss_fn: Callable[..., tf.Tensor],
-        optimizer: keras.optimizers.Optimizer,
-        epochs: int,
-        batch_size: int,
-        verbose: bool,
-    ) -> None:
-        # Train the model
-        trainer(
-            model=self.model,
-            loss_fn=loss_fn,
-            x_train=to_numpy(x_ref),
-            optimizer=optimizer,
-            epochs=epochs,
-            batch_size=batch_size,
-            verbose=verbose,
-        )
-        # Calculate the GMM parameters
-        _, z, gamma = cast(tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.model(x_ref))
-        self.gmm_params = gmm_params(z, gamma)
-        # Infer the threshold values
-        self._ref_score = self.score(x_ref, batch_size)
-        self._threshold_perc = threshold_perc

dataeval 0.73.1__py3-none-any.whl → 0.74.1__py3-none-any.whl

dataeval 0.73.1py3-none-any.whl → 0.74.1py3-none-any.whl