PyPI - dataeval - Versions diffs - 0.69.4__py3-none-any.whl → 0.70.1__py3-none-any.whl - Mend

dataeval 0.69.4py3-none-any.whl → 0.70.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

dataeval/__init__.py +8 -8
dataeval/_internal/datasets.py +235 -131
dataeval/_internal/detectors/clusterer.py +2 -0
dataeval/_internal/detectors/drift/base.py +7 -8
dataeval/_internal/detectors/drift/mmd.py +4 -4
dataeval/_internal/detectors/duplicates.py +64 -45
dataeval/_internal/detectors/merged_stats.py +23 -54
dataeval/_internal/detectors/ood/ae.py +8 -6
dataeval/_internal/detectors/ood/aegmm.py +6 -4
dataeval/_internal/detectors/ood/base.py +12 -7
dataeval/_internal/detectors/ood/llr.py +6 -4
dataeval/_internal/detectors/ood/vae.py +5 -3
dataeval/_internal/detectors/ood/vaegmm.py +6 -4
dataeval/_internal/detectors/outliers.py +137 -63
dataeval/_internal/interop.py +11 -7
dataeval/_internal/metrics/balance.py +13 -11
dataeval/_internal/metrics/ber.py +5 -3
dataeval/_internal/metrics/coverage.py +4 -0
dataeval/_internal/metrics/divergence.py +9 -5
dataeval/_internal/metrics/diversity.py +14 -12
dataeval/_internal/metrics/parity.py +32 -22
dataeval/_internal/metrics/stats/base.py +231 -0
dataeval/_internal/metrics/stats/boxratiostats.py +159 -0
dataeval/_internal/metrics/stats/datasetstats.py +99 -0
dataeval/_internal/metrics/stats/dimensionstats.py +113 -0
dataeval/_internal/metrics/stats/hashstats.py +75 -0
dataeval/_internal/metrics/stats/labelstats.py +125 -0
dataeval/_internal/metrics/stats/pixelstats.py +119 -0
dataeval/_internal/metrics/stats/visualstats.py +124 -0
dataeval/_internal/metrics/uap.py +8 -4
dataeval/_internal/metrics/utils.py +30 -15
dataeval/_internal/models/pytorch/autoencoder.py +5 -5
dataeval/_internal/models/tensorflow/pixelcnn.py +1 -4
dataeval/_internal/output.py +3 -18
dataeval/_internal/utils.py +11 -16
dataeval/_internal/workflows/sufficiency.py +152 -151
dataeval/detectors/__init__.py +4 -0
dataeval/detectors/drift/__init__.py +8 -3
dataeval/detectors/drift/kernels/__init__.py +4 -0
dataeval/detectors/drift/updates/__init__.py +4 -0
dataeval/detectors/linters/__init__.py +15 -4
dataeval/detectors/ood/__init__.py +14 -2
dataeval/metrics/__init__.py +5 -0
dataeval/metrics/bias/__init__.py +13 -4
dataeval/metrics/estimators/__init__.py +8 -8
dataeval/metrics/stats/__init__.py +25 -3
dataeval/utils/__init__.py +16 -3
dataeval/utils/tensorflow/__init__.py +11 -0
dataeval/utils/torch/__init__.py +12 -0
dataeval/utils/torch/datasets/__init__.py +7 -0
dataeval/workflows/__init__.py +6 -2
{dataeval-0.69.4.dist-info → dataeval-0.70.1.dist-info}/METADATA +12 -4
dataeval-0.70.1.dist-info/RECORD +80 -0
{dataeval-0.69.4.dist-info → dataeval-0.70.1.dist-info}/WHEEL +1 -1
dataeval/_internal/flags.py +0 -77
dataeval/_internal/metrics/stats.py +0 -397
dataeval/flags/__init__.py +0 -3
dataeval/tensorflow/__init__.py +0 -3
dataeval/torch/__init__.py +0 -3
dataeval-0.69.4.dist-info/RECORD +0 -74
/dataeval/{tensorflow → utils/tensorflow}/loss/__init__.py +0 -0
/dataeval/{tensorflow → utils/tensorflow}/models/__init__.py +0 -0
/dataeval/{tensorflow → utils/tensorflow}/recon/__init__.py +0 -0
/dataeval/{torch → utils/torch}/models/__init__.py +0 -0
/dataeval/{torch → utils/torch}/trainer/__init__.py +0 -0
{dataeval-0.69.4.dist-info → dataeval-0.70.1.dist-info}/LICENSE.txt +0 -0

dataeval/_internal/models/pytorch/autoencoder.py CHANGED Viewed

@@ -61,9 +61,9 @@ class AETrainer:
         List[float]
             A list of average loss values for each epoch.
-        Notes
+        Note
         ----
-        To replace this function with a custom function, do
+        To replace this function with a custom function, do:
             AETrainer.train = custom_function
         """
         # Setup training
@@ -120,7 +120,7 @@ class AETrainer:
         Note
         ----
-        To replace this function with a custom function, do
+        To replace this function with a custom function, do:
             AETrainer.eval = custom_function
         """
         self.model.eval()
@@ -155,8 +155,8 @@ class AETrainer:
         torch.Tensor
             Data encoded by the model
-        Notes
-        -----
+        Note
+        ----
         This function should be run after the model has been trained and evaluated.
         """
         self.model.eval()

dataeval/_internal/models/tensorflow/pixelcnn.py CHANGED Viewed

@@ -272,8 +272,6 @@ class PixelCNN(distribution.Distribution):
         The minimum value of the input data.
     dtype : tensorflow dtype, default tf.float32
         Data type of the `Distribution`.
-    name : str, default "PixelCNN"
-        The name of the `Distribution`.
     """
     def __init__(
@@ -293,10 +291,9 @@ class PixelCNN(distribution.Distribution):
         high: int = 255,
         low: int = 0,
         dtype=tf.float32,
-        name: str = "PixelCNN",
     ) -> None:
         parameters = dict(locals())
-        with tf.name_scope(name) as name:
+        with tf.name_scope("PixelCNN") as name:
             super().__init__(
                 dtype=dtype,
                 reparameterization_type=reparameterization.NOT_REPARAMETERIZED,

dataeval/_internal/output.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 import inspect
 from datetime import datetime, timezone
 from functools import wraps
+from typing import Any
 import numpy as np
@@ -17,10 +18,10 @@ class OutputMetadata:
     _state: dict[str, str]
     _version: str
-    def dict(self) -> dict:
+    def dict(self) -> dict[str, Any]:
         return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
-    def meta(self) -> dict:
+    def meta(self) -> dict[str, Any]:
         return {k.removeprefix("_"): v for k, v in self.__dict__.items() if k.startswith("_")}
@@ -67,19 +68,3 @@ def set_metadata(module_name: str = "", state_attr: list[str] | None = None):
         return wrapper
     return decorator
-def populate_defaults(d: dict, c: type) -> dict:
-    def default(t):
-        t = (
-            t if isinstance(t, str) else t._name if hasattr(t, "_name") else t.__name__
-        ).lower()  # py3.9 : _name, py3.10 : __name__
-        if t.startswith("dict"):
-            return {}
-        if t.startswith("list"):
-            return []
-        if t.startswith("ndarray"):
-            return np.array([])
-        raise TypeError("Unrecognized annotation type")
-    return {k: d[k] if k in d else default(t) for k, t in c.__annotations__.items()}

dataeval/_internal/utils.py CHANGED Viewed

@@ -8,7 +8,7 @@ from torch.utils.data import Dataset
 def read_dataset(dataset: Dataset) -> list[list[Any]]:
     """
-    Extract information from a dataset at each index into a individual lists of each information position
+    Extract information from a dataset at each index into individual lists of each information position
     Parameters
     ----------
@@ -31,36 +31,31 @@ def read_dataset(dataset: Dataset) -> list[list[Any]]:
     Examples
     --------
     >>> import numpy as np
-    >>> data = np.ones((10, 3, 3))
+    >>> data = np.ones((10, 1, 3, 3))
     >>> labels = np.ones((10,))
     >>> class ICDataset:
     ...     def __init__(self, data, labels):
     ...         self.data = data
     ...         self.labels = labels
+    ...
     ...     def __getitem__(self, idx):
     ...         return self.data[idx], self.labels[idx]
     >>> ds = ICDataset(data, labels)
     >>> result = read_dataset(ds)
-    >>> assert len(result) == 2
-    True
-    >>> assert result[0].shape == (10, 3, 3)  # 10 3x3 images
-    True
-    >>> assert result[1].shape == (10,)  # 10 labels
-    True
+    >>> len(result)  # images and labels
+    2
+    >>> np.asarray(result[0]).shape  # images
+    (10, 1, 3, 3)
+    >>> np.asarray(result[1]).shape  # labels
+    (10,)
     """
-    ddict: dict[int, list] = defaultdict(list)
+    ddict: dict[int, list[Any]] = defaultdict(list[Any])
     for data in dataset:
-        # Convert to tuple if single return (e.g. images only)
-        if not isinstance(data, tuple):
-            data = (data,)
-        for i, d in enumerate(data):
+        for i, d in enumerate(data if isinstance(data, tuple) else (data,)):
             ddict[i].append(d)
     return list(ddict.values())

dataeval/_internal/workflows/sufficiency.py CHANGED Viewed

@@ -2,23 +2,26 @@ from __future__ import annotations
 import warnings
 from dataclasses import dataclass
-from typing import Any, Callable, Sequence, cast
+from typing import Any, Callable, Iterable, Mapping, Sequence, cast
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
 import torch.nn as nn
 from matplotlib.figure import Figure
-from numpy.typing import NDArray
+from numpy.typing import ArrayLike, NDArray
 from scipy.optimize import basinhopping
 from torch.utils.data import Dataset
+from dataeval._internal.interop import as_numpy
 from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class SufficiencyOutput(OutputMetadata):
     """
+    Output class for :class:`Sufficiency` workflow
     Attributes
     ----------
     steps : NDArray
@@ -42,6 +45,130 @@ class SufficiencyOutput(OutputMetadata):
             if c != c_v:
                 raise ValueError(f"{m} does not contain the expected number ({c}) of data points.")
+    @set_metadata("dataeval.workflows.SufficiencyOutput")
+    def project(
+        self,
+        projection: int | Iterable[int],
+    ) -> SufficiencyOutput:
+        """Projects the measures for each value of X
+        Parameters
+        ----------
+        projection : int | Iterable[int]
+            Step or steps to project
+        Returns
+        -------
+        SufficiencyOutput
+            Dataclass containing the projected measures per projection
+        Raises
+        ------
+        ValueError
+            If the length of data points in the measures do not match
+            If `projection` is not numerical
+        """
+        projection = np.asarray(list(projection) if isinstance(projection, Iterable) else [projection])
+        if not np.issubdtype(projection.dtype, np.number):
+            raise ValueError("'projection' must consist of numerical values")
+        output = {}
+        for name, measures in self.measures.items():
+            if measures.ndim > 1:
+                result = []
+                for i in range(len(measures)):
+                    projected = project_steps(self.params[name][i], projection)
+                    result.append(projected)
+                output[name] = np.array(result)
+            else:
+                output[name] = project_steps(self.params[name], projection)
+        return SufficiencyOutput(projection, self.params, output)
+    def plot(self, class_names: Sequence[str] | None = None) -> list[Figure]:
+        """Plotting function for data sufficiency tasks
+        Parameters
+        ----------
+        class_names : Sequence[str] | None, default None
+            List of class names
+        Returns
+        -------
+        list[plt.Figure]
+            List of Figures for each measure
+        Raises
+        ------
+        ValueError
+            If the length of data points in the measures do not match
+        """
+        # Extrapolation parameters
+        last_X = self.steps[-1]
+        geomshape = (0.01 * last_X, last_X * 4, len(self.steps))
+        extrapolated = np.geomspace(*geomshape).astype(np.int64)
+        # Stores all plots
+        plots = []
+        # Create a plot for each measure on one figure
+        for name, measures in self.measures.items():
+            if measures.ndim > 1:
+                if class_names is not None and len(measures) != len(class_names):
+                    raise IndexError("Class name count does not align with measures")
+                for i, measure in enumerate(measures):
+                    class_name = str(i) if class_names is None else class_names[i]
+                    fig = plot_measure(
+                        f"{name}_{class_name}",
+                        self.steps,
+                        measure,
+                        self.params[name][i],
+                        extrapolated,
+                    )
+                    plots.append(fig)
+            else:
+                fig = plot_measure(name, self.steps, measures, self.params[name], extrapolated)
+                plots.append(fig)
+        return plots
+    def inv_project(self, targets: Mapping[str, ArrayLike]) -> dict[str, NDArray[np.float64]]:
+        """
+        Calculate training samples needed to achieve target model metric values.
+        Parameters
+        ----------
+        targets : Mapping[str, ArrayLike]
+            Mapping of target metric scores (from 0.0 to 1.0) that we want
+            to achieve, where the key is the name of the metric.
+        Returns
+        -------
+        dict[str, NDArray]
+            List of the number of training samples needed to achieve each
+            corresponding entry in targets
+        """
+        projection = {}
+        for name, target in targets.items():
+            tarray = as_numpy(target)
+            if name not in self.measures:
+                continue
+            measure = self.measures[name]
+            if measure.ndim > 1:
+                projection[name] = np.zeros((len(measure), len(tarray)))
+                for i in range(len(measure)):
+                    projection[name][i] = inv_project_steps(
+                        self.params[name][i], tarray[i] if tarray.ndim == measure.ndim else tarray
+                    )
+            else:
+                projection[name] = inv_project_steps(self.params[name], tarray)
+        return projection
 def f_out(n_i: NDArray, x: NDArray) -> NDArray:
     """
@@ -256,18 +383,18 @@ class Sufficiency:
         Function which takes a model (torch.nn.Module), a dataset
         (torch.utils.data.Dataset), indices to train on and executes model
         training against the data.
-    eval_fn : Callable[[nn.Module, Dataset], Dict[str, float | NDArray]]
+    eval_fn : Callable[[nn.Module, Dataset], Mapping[str, float | ArrayLike]]
         Function which takes a model (torch.nn.Module), a dataset
         (torch.utils.data.Dataset) and returns a dictionary of metric
-        values (Dict[str, float]) which is used to assess model performance
+        values (Mapping[str, float]) which is used to assess model performance
         given the model and data.
     runs : int, default 1
         Number of models to run over all subsets
     substeps : int, default 5
         Total number of dataset partitions that each model will train on
-    train_kwargs : Dict | None, default None
+    train_kwargs : Mapping | None, default None
         Additional arguments required for custom training function
-    eval_kwargs : Dict | None, default None
+    eval_kwargs : Mapping | None, default None
         Additional arguments required for custom evaluation function
     """
@@ -277,11 +404,11 @@ class Sufficiency:
         train_ds: Dataset,
         test_ds: Dataset,
         train_fn: Callable[[nn.Module, Dataset, Sequence[int]], None],
-        eval_fn: Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]],
+        eval_fn: Callable[[nn.Module, Dataset], Mapping[str, float] | Mapping[str, ArrayLike]],
         runs: int = 1,
         substeps: int = 5,
-        train_kwargs: dict[str, Any] | None = None,
-        eval_kwargs: dict[str, Any] | None = None,
+        train_kwargs: Mapping[str, Any] | None = None,
+        eval_kwargs: Mapping[str, Any] | None = None,
     ):
         self.model = model
         self.train_ds = train_ds
@@ -324,42 +451,42 @@ class Sufficiency:
     @property
     def eval_fn(
         self,
-    ) -> Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]]:
+    ) -> Callable[[nn.Module, Dataset], dict[str, float] | Mapping[str, ArrayLike]]:
         return self._eval_fn
     @eval_fn.setter
     def eval_fn(
         self,
-        value: Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]],
+        value: Callable[[nn.Module, Dataset], dict[str, float] | Mapping[str, ArrayLike]],
     ):
         if not callable(value):
             raise TypeError("Must provide a callable for eval_fn.")
         self._eval_fn = value
     @property
-    def train_kwargs(self) -> dict[str, Any]:
+    def train_kwargs(self) -> Mapping[str, Any]:
         return self._train_kwargs
     @train_kwargs.setter
-    def train_kwargs(self, value: dict[str, Any] | None):
+    def train_kwargs(self, value: Mapping[str, Any] | None):
         self._train_kwargs = {} if value is None else value
     @property
-    def eval_kwargs(self) -> dict[str, Any]:
+    def eval_kwargs(self) -> Mapping[str, Any]:
         return self._eval_kwargs
     @eval_kwargs.setter
-    def eval_kwargs(self, value: dict[str, Any] | None):
+    def eval_kwargs(self, value: Mapping[str, Any] | None):
         self._eval_kwargs = {} if value is None else value
     @set_metadata("dataeval.workflows", ["runs", "substeps"])
-    def evaluate(self, eval_at: NDArray | None = None, niter: int = 1000) -> SufficiencyOutput:
+    def evaluate(self, eval_at: int | Iterable[int] | None = None, niter: int = 1000) -> SufficiencyOutput:
         """
         Creates data indices, trains models, and returns plotting data
         Parameters
         ----------
-        eval_at : NDArray | None, default None
+        eval_at : int | Iterable[int] | None, default None
             Specify this to collect accuracies over a specific set of dataset lengths, rather
             than letting Sufficiency internally create the lengths to evaluate at.
         niter : int, default 1000
@@ -370,6 +497,11 @@ class Sufficiency:
         SufficiencyOutput
             Dataclass containing the average of each measure per substep
+        Raises
+        ------
+        ValueError
+            If `eval_at` is not numerical
         Examples
         --------
         >>> suff = Sufficiency(
@@ -379,7 +511,9 @@ class Sufficiency:
         SufficiencyOutput(steps=array([  1,   3,  10,  31, 100], dtype=uint32), params={'test': array([ 0., 42.,  0.])}, measures={'test': array([1., 1., 1., 1., 1.])})
         """  # noqa: E501
         if eval_at is not None:
-            ranges = eval_at
+            ranges = np.asarray(list(eval_at) if isinstance(eval_at, Iterable) else [eval_at])
+            if not np.issubdtype(ranges.dtype, np.number):
+                raise ValueError("'eval_at' must consist of numerical values")
         else:
             geomshape = (
                 0.01 * self._length,
@@ -421,136 +555,3 @@ class Sufficiency:
         measures = {k: (v / self.runs).T for k, v in measures.items()}
         params_output = get_curve_params(measures, ranges, niter)
         return SufficiencyOutput(ranges, params_output, measures)
-    @classmethod
-    def project(
-        cls,
-        data: SufficiencyOutput,
-        projection: int | Sequence[int] | NDArray[np.uint],
-    ) -> SufficiencyOutput:
-        """Projects the measures for each value of X
-        Parameters
-        ----------
-        data : SufficiencyOutput
-            Dataclass containing the average of each measure per substep
-        projection : int | Sequence[int] | NDArray[np.uint]
-            Step or steps to project
-        Returns
-        -------
-        SufficiencyOutput
-            Dataclass containing the projected measures per projection
-        Raises
-        ------
-        ValueError
-            If the length of data points in the measures do not match
-            If the steps are not int, Sequence[int] or an ndarray
-        """
-        projection = [projection] if isinstance(projection, int) else projection
-        projection = np.array(projection) if isinstance(projection, Sequence) else projection
-        if not isinstance(projection, np.ndarray):
-            raise ValueError("'steps' must be an int, Sequence[int] or ndarray")
-        output = {}
-        for name, measures in data.measures.items():
-            if measures.ndim > 1:
-                result = []
-                for i in range(len(measures)):
-                    projected = project_steps(data.params[name][i], projection)
-                    result.append(projected)
-                output[name] = np.array(result)
-            else:
-                output[name] = project_steps(data.params[name], projection)
-        return SufficiencyOutput(projection, data.params, output)
-    @classmethod
-    def plot(cls, data: SufficiencyOutput, class_names: Sequence[str] | None = None) -> list[Figure]:
-        """Plotting function for data sufficiency tasks
-        Parameters
-        ----------
-        data : SufficiencyOutput
-            Dataclass containing the average of each measure per substep
-        class_names : Sequence[str] | None, default None
-            List of class names
-        Returns
-        -------
-        List[plt.Figure]
-            List of Figures for each measure
-        Raises
-        ------
-        ValueError
-            If the length of data points in the measures do not match
-        """
-        # Extrapolation parameters
-        last_X = data.steps[-1]
-        geomshape = (0.01 * last_X, last_X * 4, len(data.steps))
-        extrapolated = np.geomspace(*geomshape).astype(np.int64)
-        # Stores all plots
-        plots = []
-        # Create a plot for each measure on one figure
-        for name, measures in data.measures.items():
-            if measures.ndim > 1:
-                if class_names is not None and len(measures) != len(class_names):
-                    raise IndexError("Class name count does not align with measures")
-                for i, measure in enumerate(measures):
-                    class_name = str(i) if class_names is None else class_names[i]
-                    fig = plot_measure(
-                        f"{name}_{class_name}",
-                        data.steps,
-                        measure,
-                        data.params[name][i],
-                        extrapolated,
-                    )
-                    plots.append(fig)
-            else:
-                fig = plot_measure(name, data.steps, measures, data.params[name], extrapolated)
-                plots.append(fig)
-        return plots
-    @classmethod
-    def inv_project(cls, targets: dict[str, NDArray], data: SufficiencyOutput) -> dict[str, NDArray]:
-        """
-        Calculate training samples needed to achieve target model metric values.
-        Parameters
-        ----------
-        targets : Dict[str, NDArray]
-            Dictionary of target metric scores (from 0.0 to 1.0) that we want
-            to achieve, where the key is the name of the metric.
-        data : SufficiencyOutput
-            Dataclass containing the average of each measure per substep
-        Returns
-        -------
-        Dict[str, NDArray]
-            List of the number of training samples needed to achieve each
-            corresponding entry in targets
-        """
-        projection = {}
-        for name, target in targets.items():
-            if name not in data.measures:
-                continue
-            measure = data.measures[name]
-            if measure.ndim > 1:
-                projection[name] = np.zeros((len(measure), len(target)))
-                for i in range(len(measure)):
-                    projection[name][i] = inv_project_steps(
-                        data.params[name][i], target[i] if target.ndim == measure.ndim else target
-                    )
-            else:
-                projection[name] = inv_project_steps(data.params[name], target)
-        return projection

dataeval/detectors/__init__.py CHANGED Viewed

@@ -1,3 +1,7 @@
+"""
+Detectors can determine if a dataset or individual images in a dataset are indicative of a specific issue.
+"""
 from dataeval import _IS_TENSORFLOW_AVAILABLE
 from . import drift, linters

dataeval/detectors/drift/__init__.py CHANGED Viewed

@@ -1,16 +1,21 @@
+"""
+Drift detectors identify if the statistical properties of the data has changed.
+"""
 from dataeval import _IS_TORCH_AVAILABLE
+from dataeval._internal.detectors.drift.base import DriftOutput
 from dataeval._internal.detectors.drift.cvm import DriftCVM
 from dataeval._internal.detectors.drift.ks import DriftKS
 from . import updates
-__all__ = ["DriftCVM", "DriftKS", "updates"]
+__all__ = ["DriftCVM", "DriftKS", "DriftOutput", "updates"]
 if _IS_TORCH_AVAILABLE:  # pragma: no cover
-    from dataeval._internal.detectors.drift.mmd import DriftMMD
+    from dataeval._internal.detectors.drift.mmd import DriftMMD, DriftMMDOutput
     from dataeval._internal.detectors.drift.torch import preprocess_drift
     from dataeval._internal.detectors.drift.uncertainty import DriftUncertainty
     from . import kernels
-    __all__ += ["DriftMMD", "DriftUncertainty", "kernels", "preprocess_drift"]
+    __all__ += ["DriftMMD", "DriftMMDOutput", "DriftUncertainty", "kernels", "preprocess_drift"]

dataeval/detectors/drift/kernels/__init__.py CHANGED Viewed

@@ -1,3 +1,7 @@
+"""
+Kernels are used to map non-linear data to a higher dimensional space.
+"""
 from dataeval import _IS_TORCH_AVAILABLE
 if _IS_TORCH_AVAILABLE:  # pragma: no cover

dataeval/detectors/drift/updates/__init__.py CHANGED Viewed

@@ -1,3 +1,7 @@
+"""
+Update strategies inform how the drift detector classes update the reference data when monitoring for drift.
+"""
 from dataeval._internal.detectors.drift.base import LastSeenUpdate, ReservoirSamplingUpdate
 __all__ = ["LastSeenUpdate", "ReservoirSamplingUpdate"]

dataeval/detectors/linters/__init__.py CHANGED Viewed

@@ -1,5 +1,16 @@
-from dataeval._internal.detectors.clusterer import Clusterer
-from dataeval._internal.detectors.duplicates import Duplicates
-from dataeval._internal.detectors.outliers import Outliers
+"""
+Linters help identify potential issues in training and test data and are an important aspect of data cleaning.
+"""
-__all__ = ["Clusterer", "Duplicates", "Outliers"]
+from dataeval._internal.detectors.clusterer import Clusterer, ClustererOutput
+from dataeval._internal.detectors.duplicates import Duplicates, DuplicatesOutput
+from dataeval._internal.detectors.outliers import Outliers, OutliersOutput
+__all__ = [
+    "Clusterer",
+    "ClustererOutput",
+    "Duplicates",
+    "DuplicatesOutput",
+    "Outliers",
+    "OutliersOutput",
+]

dataeval/detectors/ood/__init__.py CHANGED Viewed

@@ -1,11 +1,23 @@
+"""
+Out-of-distribution detectors identify data that is different from the data used to train a particular model.
+"""
 from dataeval import _IS_TENSORFLOW_AVAILABLE
 if _IS_TENSORFLOW_AVAILABLE:  # pragma: no cover
     from dataeval._internal.detectors.ood.ae import OOD_AE
     from dataeval._internal.detectors.ood.aegmm import OOD_AEGMM
-    from dataeval._internal.detectors.ood.base import OODOutput, OODScore
+    from dataeval._internal.detectors.ood.base import OODOutput, OODScoreOutput
     from dataeval._internal.detectors.ood.llr import OOD_LLR
     from dataeval._internal.detectors.ood.vae import OOD_VAE
     from dataeval._internal.detectors.ood.vaegmm import OOD_VAEGMM
-    __all__ = ["OODOutput", "OODScore", "OOD_AE", "OOD_AEGMM", "OOD_LLR", "OOD_VAE", "OOD_VAEGMM"]
+    __all__ = [
+        "OOD_AE",
+        "OOD_AEGMM",
+        "OOD_LLR",
+        "OOD_VAE",
+        "OOD_VAEGMM",
+        "OODOutput",
+        "OODScoreOutput",
+    ]

dataeval/metrics/__init__.py CHANGED Viewed

@@ -1,3 +1,8 @@
+"""
+Metrics are a way to measure the performance of your models or datasets that
+can then be analyzed in the context of a given problem.
+"""
 from . import bias, estimators, stats
 __all__ = ["bias", "estimators", "stats"]

dataeval 0.69.4__py3-none-any.whl → 0.70.1__py3-none-any.whl

dataeval 0.69.4py3-none-any.whl → 0.70.1py3-none-any.whl