PyPI - dataeval - Versions diffs - 0.76.1__py3-none-any.whl → 0.82.0__py3-none-any.whl - Mend

dataeval 0.76.1py3-none-any.whl → 0.82.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

dataeval/__init__.py +3 -3
dataeval/config.py +77 -0
dataeval/detectors/__init__.py +1 -1
dataeval/detectors/drift/__init__.py +6 -6
dataeval/detectors/drift/{base.py → _base.py} +40 -85
dataeval/detectors/drift/{cvm.py → _cvm.py} +21 -28
dataeval/detectors/drift/{ks.py → _ks.py} +20 -26
dataeval/detectors/drift/{mmd.py → _mmd.py} +31 -43
dataeval/detectors/drift/{torch.py → _torch.py} +2 -1
dataeval/detectors/drift/{uncertainty.py → _uncertainty.py} +24 -7
dataeval/detectors/drift/updates.py +20 -3
dataeval/detectors/linters/__init__.py +3 -5
dataeval/detectors/linters/duplicates.py +13 -36
dataeval/detectors/linters/outliers.py +23 -148
dataeval/detectors/ood/__init__.py +1 -1
dataeval/detectors/ood/ae.py +30 -9
dataeval/detectors/ood/base.py +5 -4
dataeval/detectors/ood/mixin.py +21 -7
dataeval/detectors/ood/vae.py +73 -0
dataeval/metadata/__init__.py +6 -0
dataeval/metadata/_distance.py +167 -0
dataeval/metadata/_ood.py +217 -0
dataeval/metadata/_utils.py +44 -0
dataeval/metrics/__init__.py +1 -1
dataeval/metrics/bias/__init__.py +6 -4
dataeval/metrics/bias/{balance.py → _balance.py} +15 -101
dataeval/metrics/bias/_coverage.py +98 -0
dataeval/metrics/bias/{diversity.py → _diversity.py} +18 -111
dataeval/metrics/bias/{parity.py → _parity.py} +39 -77
dataeval/metrics/estimators/__init__.py +15 -4
dataeval/metrics/estimators/{ber.py → _ber.py} +42 -29
dataeval/metrics/estimators/_clusterer.py +44 -0
dataeval/metrics/estimators/{divergence.py → _divergence.py} +18 -30
dataeval/metrics/estimators/{uap.py → _uap.py} +4 -18
dataeval/metrics/stats/__init__.py +16 -13
dataeval/metrics/stats/{base.py → _base.py} +82 -133
dataeval/metrics/stats/{boxratiostats.py → _boxratiostats.py} +15 -18
dataeval/metrics/stats/_dimensionstats.py +75 -0
dataeval/metrics/stats/{hashstats.py → _hashstats.py} +21 -37
dataeval/metrics/stats/_imagestats.py +94 -0
dataeval/metrics/stats/_labelstats.py +131 -0
dataeval/metrics/stats/{pixelstats.py → _pixelstats.py} +19 -50
dataeval/metrics/stats/{visualstats.py → _visualstats.py} +23 -54
dataeval/outputs/__init__.py +53 -0
dataeval/{output.py → outputs/_base.py} +55 -25
dataeval/outputs/_bias.py +381 -0
dataeval/outputs/_drift.py +83 -0
dataeval/outputs/_estimators.py +114 -0
dataeval/outputs/_linters.py +184 -0
dataeval/{detectors/ood/output.py → outputs/_ood.py} +22 -22
dataeval/outputs/_stats.py +387 -0
dataeval/outputs/_utils.py +44 -0
dataeval/outputs/_workflows.py +364 -0
dataeval/typing.py +234 -0
dataeval/utils/__init__.py +2 -2
dataeval/utils/_array.py +169 -0
dataeval/utils/_bin.py +199 -0
dataeval/utils/_clusterer.py +144 -0
dataeval/utils/_fast_mst.py +189 -0
dataeval/utils/{image.py → _image.py} +6 -4
dataeval/utils/_method.py +14 -0
dataeval/utils/{shared.py → _mst.py} +3 -65
dataeval/utils/{plot.py → _plot.py} +6 -6
dataeval/utils/data/__init__.py +26 -0
dataeval/utils/data/_dataset.py +217 -0
dataeval/utils/data/_embeddings.py +104 -0
dataeval/utils/data/_images.py +68 -0
dataeval/utils/data/_metadata.py +360 -0
dataeval/utils/data/_selection.py +126 -0
dataeval/utils/{dataset/split.py → data/_split.py} +12 -38
dataeval/utils/data/_targets.py +85 -0
dataeval/utils/data/collate.py +103 -0
dataeval/utils/data/datasets/__init__.py +17 -0
dataeval/utils/data/datasets/_base.py +254 -0
dataeval/utils/data/datasets/_cifar10.py +134 -0
dataeval/utils/data/datasets/_fileio.py +168 -0
dataeval/utils/data/datasets/_milco.py +153 -0
dataeval/utils/data/datasets/_mixin.py +56 -0
dataeval/utils/data/datasets/_mnist.py +183 -0
dataeval/utils/data/datasets/_ships.py +123 -0
dataeval/utils/data/datasets/_types.py +52 -0
dataeval/utils/data/datasets/_voc.py +352 -0
dataeval/utils/data/selections/__init__.py +15 -0
dataeval/utils/data/selections/_classfilter.py +57 -0
dataeval/utils/data/selections/_indices.py +26 -0
dataeval/utils/data/selections/_limit.py +26 -0
dataeval/utils/data/selections/_reverse.py +18 -0
dataeval/utils/data/selections/_shuffle.py +29 -0
dataeval/utils/metadata.py +51 -376
dataeval/utils/torch/{gmm.py → _gmm.py} +4 -2
dataeval/utils/torch/{internal.py → _internal.py} +21 -51
dataeval/utils/torch/models.py +43 -2
dataeval/workflows/__init__.py +2 -1
dataeval/workflows/sufficiency.py +11 -346
{dataeval-0.76.1.dist-info → dataeval-0.82.0.dist-info}/METADATA +5 -2
dataeval-0.82.0.dist-info/RECORD +104 -0
dataeval/detectors/linters/clusterer.py +0 -512
dataeval/detectors/linters/merged_stats.py +0 -49
dataeval/detectors/ood/metadata_ks_compare.py +0 -129
dataeval/detectors/ood/metadata_least_likely.py +0 -119
dataeval/interop.py +0 -69
dataeval/metrics/bias/coverage.py +0 -194
dataeval/metrics/stats/datasetstats.py +0 -202
dataeval/metrics/stats/dimensionstats.py +0 -115
dataeval/metrics/stats/labelstats.py +0 -210
dataeval/utils/dataset/__init__.py +0 -7
dataeval/utils/dataset/datasets.py +0 -412
dataeval/utils/dataset/read.py +0 -63
dataeval-0.76.1.dist-info/RECORD +0 -67
/dataeval/{log.py → _log.py} +0 -0
/dataeval/utils/torch/{blocks.py → _blocks.py} +0 -0
{dataeval-0.76.1.dist-info → dataeval-0.82.0.dist-info}/LICENSE.txt +0 -0
{dataeval-0.76.1.dist-info → dataeval-0.82.0.dist-info}/WHEEL +0 -0

dataeval/utils/torch/{internal.py → _internal.py} RENAMED Viewed

@@ -11,30 +11,7 @@ from numpy.typing import NDArray
 from torch.utils.data import DataLoader, TensorDataset
 from tqdm import tqdm
-def get_device(device: str | torch.device | None = None) -> torch.device:
-    """
-    Instantiates a PyTorch device object.
-    Parameters
-    ----------
-    device : str | torch.device | None, default None
-        Either ``None``, a str ('gpu' or 'cpu') indicating the device to choose, or an
-        already instantiated device object. If ``None``, the GPU is selected if it is
-        detected, otherwise the CPU is used as a fallback.
-    Returns
-    -------
-    The instantiated device object.
-    """
-    if isinstance(device, torch.device):  # Already a torch device
-        return device
-    else:  # Instantiate device
-        if device is None or device.lower() in ["gpu", "cuda"]:
-            torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        else:
-            torch_device = torch.device("cpu")
-    return torch_device
+from dataeval.config import get_device
 def predict_batch(
@@ -42,7 +19,7 @@ def predict_batch(
     model: Callable | torch.nn.Module | torch.nn.Sequential,
     device: torch.device | None = None,
     batch_size: int = int(1e10),
-    preprocess_fn: Callable | None = None,
+    preprocess_fn: Callable[[torch.Tensor], torch.Tensor] | None = None,
     dtype: type[np.generic] | torch.dtype = np.float32,
 ) -> NDArray[Any] | torch.Tensor | tuple[Any, ...]:
     """
@@ -71,11 +48,12 @@ def predict_batch(
     """
     device = get_device(device)
     if isinstance(x, np.ndarray):
-        x = torch.from_numpy(x).to(device)
+        x = torch.tensor(x, device=device)
     n = len(x)
     n_minibatch = int(np.ceil(n / batch_size))
     return_np = not isinstance(dtype, torch.dtype)
-    preds = []
+    preds_tuple = None
+    preds_array = []
     with torch.no_grad():
         for i in range(n_minibatch):
             istart, istop = i * batch_size, min((i + 1) * batch_size, n)
@@ -83,23 +61,17 @@ def predict_batch(
             if isinstance(preprocess_fn, Callable):
                 x_batch = preprocess_fn(x_batch)
-            preds_tmp = model(x_batch.to(torch.float32).to(device))
+            preds_tmp = model(x_batch.to(dtype=torch.float32))
             if isinstance(preds_tmp, (list, tuple)):
-                if len(preds) == 0:  # init tuple with lists to store predictions
-                    preds = tuple([] for _ in range(len(preds_tmp)))
+                if preds_tuple is None:  # init tuple with lists to store predictions
+                    preds_tuple = tuple([] for _ in range(len(preds_tmp)))
                 for j, p in enumerate(preds_tmp):
-                    if isinstance(p, torch.Tensor):
-                        p = p.cpu()
-                    preds[j].append(p if not return_np or isinstance(p, np.ndarray) else p.numpy())
+                    p = p.cpu() if isinstance(p, torch.Tensor) else p
+                    preds_tuple[j].append(p if not return_np or isinstance(p, np.ndarray) else p.numpy())
             elif isinstance(preds_tmp, (np.ndarray, torch.Tensor)):
-                if isinstance(preds_tmp, torch.Tensor):
-                    preds_tmp = preds_tmp.cpu()
-                if isinstance(preds, tuple):
-                    preds = list(preds)
-                preds.append(
-                    preds_tmp
-                    if not return_np or isinstance(preds_tmp, np.ndarray)  # type: ignore
-                    else preds_tmp.numpy()
+                preds_tmp = preds_tmp.cpu() if isinstance(preds_tmp, torch.Tensor) else preds_tmp
+                preds_array.append(
+                    preds_tmp if not return_np or isinstance(preds_tmp, np.ndarray) else preds_tmp.numpy()
                 )
             else:
                 raise TypeError(
@@ -108,9 +80,7 @@ def predict_batch(
                     torch.Tensor."
                 )
     concat = partial(np.concatenate, axis=0) if return_np else partial(torch.cat, dim=0)
-    out: tuple | np.ndarray | torch.Tensor = (
-        tuple(concat(p) for p in preds) if isinstance(preds, tuple) else concat(preds)  # type: ignore
-    )
+    out = tuple(concat(p) for p in preds_tuple) if preds_tuple is not None else concat(preds_array)
     return out
@@ -154,18 +124,18 @@ def trainer(
     verbose
         Whether to print training progress.
     """
+    if loss_fn is None:
+        loss_fn = torch.nn.MSELoss()
     if optimizer is None:
         optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
     if y_train is None:
-        dataset = TensorDataset(torch.from_numpy(x_train).to(torch.float32))
+        dataset = TensorDataset(torch.tensor(x_train, dtype=torch.float32))
     else:
-        dataset = TensorDataset(
-            torch.from_numpy(x_train).to(torch.float32), torch.from_numpy(y_train).to(torch.float32)
-        )
+        dataset = TensorDataset(torch.tensor(x_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
-    loader = DataLoader(dataset=dataset)
+    loader = DataLoader(dataset=dataset, batch_size=batch_size)
     model = model.to(device)
@@ -186,7 +156,7 @@ def trainer(
             y_hat = model(x)
             y = x if y is None else y
-            loss = loss_fn(y, y_hat)  # type: ignore
+            loss = loss_fn(y, *y_hat) if isinstance(y_hat, tuple) else loss_fn(y, y_hat)  # type: ignore
             optimizer.zero_grad()
             loss.backward()

dataeval/utils/torch/models.py CHANGED Viewed

@@ -2,13 +2,19 @@
 from __future__ import annotations
-__all__ = ["Autoencoder", "Encoder", "Decoder"]
+__all__ = ["Autoencoder", "Encoder", "Decoder", "ResNet18"]
 import math
-from typing import Any
+from typing import Any, Protocol, runtime_checkable
 import torch
 import torch.nn as nn
+from torchvision.models import ResNet18_Weights, resnet18
+@runtime_checkable
+class SupportsEncode(Protocol):
+    def encode(self, x: Any) -> Any: ...
 class Autoencoder(nn.Module):
@@ -330,3 +336,38 @@ class Decoder_AE(nn.Module):
         x = self.decoder(x)
         x = x.reshape((-1, *self.input_shape))
         return x
+class ResNet18(nn.Module):
+    """
+    A wrapper class for the torchvision.models.resnet18 model
+    Note
+    ----
+    This class is provided for the use of DataEval documentation and excludes many features
+    of the torchvision implementation.
+    Warning
+    -------
+    This class has been thoroughly tested for the purposes
+    of DataEval's documentation but not for operational use.
+    Please use with caution if deploying this class or subclasses.
+    """
+    def __init__(self, embedding_size: int = 128):
+        super().__init__()
+        self.model: nn.Module = resnet18(weights=ResNet18_Weights.DEFAULT, progress=False)
+        self.model.fc = nn.Linear(self.model.fc.in_features, embedding_size)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.model(x)
+    @staticmethod
+    def transforms() -> Any:
+        """(Returns) the default ResNet18 IMAGENET1K_V1 transforms"""
+        return ResNet18_Weights.DEFAULT.transforms()
+    def __str__(self) -> str:
+        return str(self.model)

dataeval/workflows/__init__.py CHANGED Viewed

@@ -4,4 +4,5 @@ Workflows perform a sequence of actions to analyze the dataset and make predicti
 __all__ = ["Sufficiency", "SufficiencyOutput"]
-from dataeval.workflows.sufficiency import Sufficiency, SufficiencyOutput
+from dataeval.outputs._workflows import SufficiencyOutput
+from dataeval.workflows.sufficiency import Sufficiency

dataeval/workflows/sufficiency.py CHANGED Viewed

@@ -2,260 +2,16 @@ from __future__ import annotations
 __all__ = []
-import contextlib
-import warnings
-from dataclasses import dataclass
-from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, TypeVar, cast
+from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, Sized, TypeVar
 import numpy as np
 import torch
 import torch.nn as nn
-from numpy.typing import ArrayLike, NDArray
-from scipy.optimize import basinhopping
 from torch.utils.data import Dataset
-from dataeval.interop import as_numpy
-from dataeval.output import Output, set_metadata
-with contextlib.suppress(ImportError):
-    from matplotlib.figure import Figure
-@dataclass(frozen=True)
-class SufficiencyOutput(Output):
-    """
-    Output class for :class:`Sufficiency` workflow.
-    Attributes
-    ----------
-    steps : NDArray
-        Array of sample sizes
-    params : Dict[str, NDArray]
-        Inverse power curve coefficients for the line of best fit for each measure
-    measures : Dict[str, NDArray]
-        Average of values observed for each sample size step for each measure
-    """
-    steps: NDArray[np.uint32]
-    params: dict[str, NDArray[np.float64]]
-    measures: dict[str, NDArray[np.float64]]
-    def __post_init__(self) -> None:
-        c = len(self.steps)
-        if set(self.params) != set(self.measures):
-            raise ValueError("params and measures have a key mismatch")
-        for m, v in self.measures.items():
-            c_v = v.shape[1] if v.ndim > 1 else len(v)
-            if c != c_v:
-                raise ValueError(f"{m} does not contain the expected number ({c}) of data points.")
-    @set_metadata
-    def project(
-        self,
-        projection: int | Iterable[int],
-    ) -> SufficiencyOutput:
-        """Projects the measures for each value of X
-        Parameters
-        ----------
-        projection : int | Iterable[int]
-            Step or steps to project
-        Returns
-        -------
-        SufficiencyOutput
-            Dataclass containing the projected measures per projection
-        Raises
-        ------
-        ValueError
-            If the length of data points in the measures do not match
-            If `projection` is not numerical
-        """
-        projection = np.asarray(list(projection) if isinstance(projection, Iterable) else [projection])
-        if not np.issubdtype(projection.dtype, np.number):
-            raise ValueError("'projection' must consist of numerical values")
-        output = {}
-        for name, measures in self.measures.items():
-            if measures.ndim > 1:
-                result = []
-                for i in range(len(measures)):
-                    projected = project_steps(self.params[name][i], projection)
-                    result.append(projected)
-                output[name] = np.array(result)
-            else:
-                output[name] = project_steps(self.params[name], projection)
-        return SufficiencyOutput(projection, self.params, output)
-    def plot(self, class_names: Sequence[str] | None = None) -> list[Figure]:
-        """Plotting function for data :term:`sufficience<Sufficiency>` tasks
-        Parameters
-        ----------
-        class_names : Sequence[str] | None, default None
-            List of class names
-        Returns
-        -------
-        list[Figure]
-            List of Figures for each measure
-        Raises
-        ------
-        ValueError
-            If the length of data points in the measures do not match
-        """
-        # Extrapolation parameters
-        last_X = self.steps[-1]
-        geomshape = (0.01 * last_X, last_X * 4, len(self.steps))
-        extrapolated = np.geomspace(*geomshape).astype(np.int64)
-        # Stores all plots
-        plots = []
-        # Create a plot for each measure on one figure
-        for name, measures in self.measures.items():
-            if measures.ndim > 1:
-                if class_names is not None and len(measures) != len(class_names):
-                    raise IndexError("Class name count does not align with measures")
-                for i, measure in enumerate(measures):
-                    class_name = str(i) if class_names is None else class_names[i]
-                    fig = plot_measure(
-                        f"{name}_{class_name}",
-                        self.steps,
-                        measure,
-                        self.params[name][i],
-                        extrapolated,
-                    )
-                    plots.append(fig)
-            else:
-                fig = plot_measure(name, self.steps, measures, self.params[name], extrapolated)
-                plots.append(fig)
-        return plots
-    def inv_project(self, targets: Mapping[str, ArrayLike]) -> dict[str, NDArray[np.float64]]:
-        """
-        Calculate training samples needed to achieve target model metric values.
-        Parameters
-        ----------
-        targets : Mapping[str, ArrayLike]
-            Mapping of target metric scores (from 0.0 to 1.0) that we want
-            to achieve, where the key is the name of the metric.
-        Returns
-        -------
-        dict[str, NDArray]
-            List of the number of training samples needed to achieve each
-            corresponding entry in targets
-        """
-        projection = {}
-        for name, target in targets.items():
-            tarray = as_numpy(target)
-            if name not in self.measures:
-                continue
-            measure = self.measures[name]
-            if measure.ndim > 1:
-                projection[name] = np.zeros((len(measure), len(tarray)))
-                for i in range(len(measure)):
-                    projection[name][i] = inv_project_steps(
-                        self.params[name][i], tarray[i] if tarray.ndim == measure.ndim else tarray
-                    )
-            else:
-                projection[name] = inv_project_steps(self.params[name], tarray)
-        return projection
-def f_out(n_i: NDArray[Any], x: NDArray[Any]) -> NDArray[Any]:
-    """
-    Calculates the line of best fit based on its free parameters
-    Parameters
-    ----------
-    n_i : NDArray
-        Array of sample sizes
-    x : NDArray
-        Array of inverse power curve coefficients
-    Returns
-    -------
-    NDArray
-        Data points for the line of best fit
-    """
-    return x[0] * n_i ** (-x[1]) + x[2]
-def f_inv_out(y_i: NDArray[Any], x: NDArray[Any]) -> NDArray[np.uint64]:
-    """
-    Inverse function for f_out()
-    Parameters
-    ----------
-    y_i : NDArray
-        Data points for the line of best fit
-    x : NDArray
-        Array of inverse power curve coefficients
-    Returns
-    -------
-    NDArray
-        Array of sample sizes
-    """
-    n_i = ((y_i - x[2]) / x[0]) ** (-1 / x[1])
-    return np.asarray(n_i, dtype=np.uint64)
-def calc_params(p_i: NDArray[Any], n_i: NDArray[Any], niter: int) -> NDArray[Any]:
-    """
-    Retrieves the inverse power curve coefficients for the line of best fit.
-    Global minimization is done via basin hopping. More info on this algorithm
-    can be found here: https://arxiv.org/abs/cond-mat/9803344 .
-    Parameters
-    ----------
-    p_i : NDArray
-        Array of corresponding losses
-    n_i : NDArray
-        Array of sample sizes
-    niter : int
-        Number of iterations to perform in the basin-hopping
-        numerical process to curve-fit p_i
-    Returns
-    -------
-    NDArray
-        Array of parameters to recreate line of best fit
-    """
-    def is_valid(f_new, x_new, f_old, x_old):
-        return f_new != np.nan
-    def f(x):
-        try:
-            return np.sum(np.square(p_i - f_out(n_i, x)))
-        except RuntimeWarning:
-            return np.nan
-    with warnings.catch_warnings():
-        warnings.filterwarnings("error", category=RuntimeWarning)
-        res = basinhopping(
-            f,
-            np.array([0.5, 0.5, 0.1]),
-            niter=niter,
-            stepsize=1.0,
-            minimizer_kwargs={"method": "Powell"},
-            accept_test=is_valid,
-            niter_success=200,
-        )
-    return res.x
+from dataeval.outputs import SufficiencyOutput
+from dataeval.outputs._base import set_metadata
+from dataeval.typing import ArrayLike
 def reset_parameters(model: nn.Module) -> nn.Module:
@@ -277,102 +33,14 @@ def reset_parameters(model: nn.Module) -> nn.Module:
 def validate_dataset_len(dataset: Dataset[Any]) -> int:
-    if not hasattr(dataset, "__len__"):
+    if not isinstance(dataset, Sized):
         raise TypeError("Must provide a dataset with a length attribute")
-    length: int = dataset.__len__()  # type: ignore
+    length: int = len(dataset)
     if length <= 0:
         raise ValueError("Dataset length must be greater than 0")
     return length
-def project_steps(params: NDArray[Any], projection: NDArray[Any]) -> NDArray[Any]:
-    """Projects the measures for each value of X
-    Parameters
-    ----------
-    params : NDArray
-        Inverse power curve coefficients used to calculate projection
-    projection : NDArray
-        Steps to extrapolate
-    Returns
-    -------
-    NDArray
-        Extrapolated measure values at each projection step
-    """
-    return 1 - f_out(projection, params)
-def inv_project_steps(params: NDArray[Any], targets: NDArray[Any]) -> NDArray[np.uint64]:
-    """Inverse function for project_steps()
-    Parameters
-    ----------
-    params : NDArray
-        Inverse power curve coefficients used to calculate projection
-    targets : NDArray
-        Desired measure values
-    Returns
-    -------
-    NDArray
-        Array of sample sizes, or 0 if overflow
-    """
-    steps = f_inv_out(1 - np.array(targets), params)
-    steps[np.isnan(steps)] = 0
-    return np.ceil(steps)
-def get_curve_params(measures: dict[str, NDArray[Any]], ranges: NDArray[Any], niter: int) -> dict[str, NDArray[Any]]:
-    """Calculates and aggregates parameters for both single and multi-class metrics"""
-    output = {}
-    for name, measure in measures.items():
-        measure = cast(np.ndarray, measure)
-        if measure.ndim > 1:
-            result = []
-            for value in measure:
-                result.append(calc_params(1 - value, ranges, niter))
-            output[name] = np.array(result)
-        else:
-            output[name] = calc_params(1 - measure, ranges, niter)
-    return output
-def plot_measure(
-    name: str,
-    steps: NDArray[Any],
-    measure: NDArray[Any],
-    params: NDArray[Any],
-    projection: NDArray[Any],
-) -> Figure:
-    import matplotlib.pyplot
-    fig = matplotlib.pyplot.figure()
-    fig = cast(Figure, fig)
-    fig.tight_layout()
-    ax = fig.add_subplot(111)
-    ax.set_title(f"{name} Sufficiency")
-    ax.set_ylabel(f"{name}")
-    ax.set_xlabel("Steps")
-    # Plot measure over each step
-    ax.scatter(steps, measure, label=f"Model Results ({name})", s=15, c="black")
-    # Plot extrapolation
-    ax.plot(
-        projection,
-        project_steps(params, projection),
-        linestyle="dashed",
-        label=f"Potential Model Results ({name})",
-    )
-    ax.legend()
-    return fig
 T = TypeVar("T")
@@ -460,13 +128,13 @@ class Sufficiency(Generic[T]):
     @property
     def eval_fn(
         self,
-    ) -> Callable[[nn.Module, Dataset[T]], dict[str, float] | Mapping[str, ArrayLike]]:
+    ) -> Callable[[nn.Module, Dataset[T]], Mapping[str, float] | Mapping[str, ArrayLike]]:
         return self._eval_fn
     @eval_fn.setter
     def eval_fn(
         self,
-        value: Callable[[nn.Module, Dataset[T]], dict[str, float] | Mapping[str, ArrayLike]],
+        value: Callable[[nn.Module, Dataset[T]], Mapping[str, float] | Mapping[str, ArrayLike]],
     ) -> None:
         if not callable(value):
             raise TypeError("Must provide a callable for eval_fn.")
@@ -489,7 +157,7 @@ class Sufficiency(Generic[T]):
         self._eval_kwargs = {} if value is None else value
     @set_metadata(state=["runs", "substeps"])
-    def evaluate(self, eval_at: int | Iterable[int] | None = None, niter: int = 1000) -> SufficiencyOutput:
+    def evaluate(self, eval_at: int | Iterable[int] | None = None) -> SufficiencyOutput:
         """
         Creates data indices, trains models, and returns plotting data
@@ -498,8 +166,6 @@ class Sufficiency(Generic[T]):
         eval_at : int | Iterable[int] | None, default None
             Specify this to collect accuracies over a specific set of dataset lengths, rather
             than letting :term:`sufficiency<Sufficiency>` internally create the lengths to evaluate at.
-        niter : int, default 1000
-            Iterations to perform when using the basin-hopping method to curve-fit measure(s).
         Returns
         -------
@@ -523,7 +189,7 @@ class Sufficiency(Generic[T]):
         ...     substeps=5,
         ... )
         >>> suff.evaluate()
-        SufficiencyOutput(steps=array([  1,   3,  10,  31, 100], dtype=uint32), params={'test': array([ 0., 42.,  0.])}, measures={'test': array([1., 1., 1., 1., 1.])})
+        SufficiencyOutput(steps=array([  1,   3,  10,  31, 100], dtype=uint32), measures={'test': array([1., 1., 1., 1., 1.])}, n_iter=1000)
         """  # noqa: E501
         if eval_at is not None:
             ranges = np.asarray(list(eval_at) if isinstance(eval_at, Iterable) else [eval_at])
@@ -568,5 +234,4 @@ class Sufficiency(Generic[T]):
         # The mean for each measure must be calculated before being returned
         measures = {k: (v / self.runs).T for k, v in measures.items()}
-        params_output = get_curve_params(measures, ranges, niter)
-        return SufficiencyOutput(ranges, params_output, measures)
+        return SufficiencyOutput(ranges, measures)

{dataeval-0.76.1.dist-info → dataeval-0.82.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dataeval
-Version: 0.76.1
+Version: 0.82.0
 Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
 Home-page: https://dataeval.ai/
 License: MIT
@@ -21,7 +21,10 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Topic :: Scientific/Engineering
 Provides-Extra: all
+Requires-Dist: defusedxml (>=0.7.1)
+Requires-Dist: fast_hdbscan (==0.2.0)
 Requires-Dist: matplotlib (>=3.7.1) ; extra == "all"
+Requires-Dist: numba (>=0.59.1)
 Requires-Dist: numpy (>=1.24.2)
 Requires-Dist: pandas (>=2.0) ; extra == "all"
 Requires-Dist: pillow (>=10.3.0)
@@ -71,7 +74,7 @@ DataEval is easy to install, supports a wide range of Python versions, and is
 compatible with many of the most popular packages in the scientific and T&E
 communities.
-DataEval also has native interopability between JATIC's suite of tools when
+DataEval also has native interoperability between JATIC's suite of tools when
 using MAITE-compliant datasets and models.
 <!-- end JATIC interop -->

dataeval 0.76.1__py3-none-any.whl → 0.82.0__py3-none-any.whl

dataeval 0.76.1py3-none-any.whl → 0.82.0py3-none-any.whl