PyPI - dataeval - Versions diffs - 0.81.0__py3-none-any.whl → 0.82.1__py3-none-any.whl - Mend

dataeval 0.81.0py3-none-any.whl → 0.82.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

dataeval/__init__.py +1 -1
dataeval/config.py +68 -11
dataeval/detectors/drift/__init__.py +2 -2
dataeval/detectors/drift/_base.py +8 -64
dataeval/detectors/drift/_mmd.py +12 -38
dataeval/detectors/drift/_torch.py +7 -7
dataeval/detectors/drift/_uncertainty.py +6 -5
dataeval/detectors/drift/updates.py +20 -3
dataeval/detectors/linters/__init__.py +3 -2
dataeval/detectors/linters/duplicates.py +14 -46
dataeval/detectors/linters/outliers.py +25 -159
dataeval/detectors/ood/__init__.py +1 -1
dataeval/detectors/ood/ae.py +6 -5
dataeval/detectors/ood/base.py +2 -2
dataeval/detectors/ood/metadata_ood_mi.py +4 -6
dataeval/detectors/ood/mixin.py +3 -4
dataeval/detectors/ood/vae.py +3 -2
dataeval/metadata/__init__.py +2 -1
dataeval/metadata/_distance.py +134 -0
dataeval/metadata/_ood.py +30 -49
dataeval/metadata/_utils.py +44 -0
dataeval/metrics/bias/__init__.py +5 -4
dataeval/metrics/bias/_balance.py +17 -149
dataeval/metrics/bias/_coverage.py +4 -106
dataeval/metrics/bias/_diversity.py +12 -107
dataeval/metrics/bias/_parity.py +7 -71
dataeval/metrics/estimators/__init__.py +5 -4
dataeval/metrics/estimators/_ber.py +2 -20
dataeval/metrics/estimators/_clusterer.py +1 -61
dataeval/metrics/estimators/_divergence.py +2 -19
dataeval/metrics/estimators/_uap.py +2 -16
dataeval/metrics/stats/__init__.py +15 -12
dataeval/metrics/stats/_base.py +41 -128
dataeval/metrics/stats/_boxratiostats.py +13 -13
dataeval/metrics/stats/_dimensionstats.py +17 -58
dataeval/metrics/stats/_hashstats.py +19 -35
dataeval/metrics/stats/_imagestats.py +94 -0
dataeval/metrics/stats/_labelstats.py +42 -121
dataeval/metrics/stats/_pixelstats.py +19 -51
dataeval/metrics/stats/_visualstats.py +19 -51
dataeval/outputs/__init__.py +57 -0
dataeval/outputs/_base.py +182 -0
dataeval/outputs/_bias.py +381 -0
dataeval/outputs/_drift.py +83 -0
dataeval/outputs/_estimators.py +114 -0
dataeval/outputs/_linters.py +186 -0
dataeval/outputs/_metadata.py +54 -0
dataeval/{detectors/ood/output.py → outputs/_ood.py} +22 -22
dataeval/outputs/_stats.py +393 -0
dataeval/outputs/_utils.py +44 -0
dataeval/outputs/_workflows.py +364 -0
dataeval/typing.py +187 -7
dataeval/utils/_method.py +1 -5
dataeval/utils/_plot.py +2 -2
dataeval/utils/data/__init__.py +5 -1
dataeval/utils/data/_dataset.py +217 -0
dataeval/utils/data/_embeddings.py +12 -14
dataeval/utils/data/_images.py +30 -27
dataeval/utils/data/_metadata.py +28 -11
dataeval/utils/data/_selection.py +25 -22
dataeval/utils/data/_split.py +5 -29
dataeval/utils/data/_targets.py +14 -2
dataeval/utils/data/datasets/_base.py +5 -5
dataeval/utils/data/datasets/_cifar10.py +1 -1
dataeval/utils/data/datasets/_milco.py +1 -1
dataeval/utils/data/datasets/_mnist.py +1 -1
dataeval/utils/data/datasets/_ships.py +1 -1
dataeval/utils/data/{_types.py → datasets/_types.py} +10 -16
dataeval/utils/data/datasets/_voc.py +1 -1
dataeval/utils/data/selections/_classfilter.py +4 -5
dataeval/utils/data/selections/_indices.py +2 -2
dataeval/utils/data/selections/_limit.py +2 -2
dataeval/utils/data/selections/_reverse.py +2 -2
dataeval/utils/data/selections/_shuffle.py +2 -2
dataeval/utils/torch/_internal.py +5 -5
dataeval/utils/torch/trainer.py +8 -8
dataeval/workflows/__init__.py +2 -1
dataeval/workflows/sufficiency.py +6 -342
{dataeval-0.81.0.dist-info → dataeval-0.82.1.dist-info}/METADATA +2 -2
dataeval-0.82.1.dist-info/RECORD +105 -0
dataeval/_output.py +0 -137
dataeval/detectors/ood/metadata_ks_compare.py +0 -129
dataeval/metrics/stats/_datasetstats.py +0 -198
dataeval-0.81.0.dist-info/RECORD +0 -94
{dataeval-0.81.0.dist-info → dataeval-0.82.1.dist-info}/LICENSE.txt +0 -0
{dataeval-0.81.0.dist-info → dataeval-0.82.1.dist-info}/WHEEL +0 -0

dataeval/utils/data/datasets/_cifar10.py CHANGED Viewed

@@ -9,9 +9,9 @@ import numpy as np
 from numpy.typing import NDArray
 from PIL import Image
-from dataeval.utils.data._types import Transform
 from dataeval.utils.data.datasets._base import BaseICDataset, DataLocation
 from dataeval.utils.data.datasets._mixin import BaseDatasetNumpyMixin
+from dataeval.utils.data.datasets._types import Transform
 CIFARClassStringMap = Literal["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
 TCIFARClassMap = TypeVar("TCIFARClassMap", CIFARClassStringMap, int, list[CIFARClassStringMap], list[int])

dataeval/utils/data/datasets/_milco.py CHANGED Viewed

@@ -9,8 +9,8 @@ from typing import Any, Sequence
 from numpy.typing import NDArray
-from dataeval.utils.data._types import Transform
 from dataeval.utils.data.datasets._base import BaseODDataset, DataLocation
+from dataeval.utils.data.datasets._types import Transform
 class MILCO(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):

dataeval/utils/data/datasets/_mnist.py CHANGED Viewed

@@ -8,9 +8,9 @@ from typing import Any, Literal, Sequence, TypeVar
 import numpy as np
 from numpy.typing import NDArray
-from dataeval.utils.data._types import Transform
 from dataeval.utils.data.datasets._base import BaseICDataset, DataLocation
 from dataeval.utils.data.datasets._mixin import BaseDatasetNumpyMixin
+from dataeval.utils.data.datasets._types import Transform
 MNISTClassStringMap = Literal["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]
 TMNISTClassMap = TypeVar("TMNISTClassMap", MNISTClassStringMap, int, list[MNISTClassStringMap], list[int])

dataeval/utils/data/datasets/_ships.py CHANGED Viewed

@@ -8,9 +8,9 @@ from typing import Any, Sequence
 import numpy as np
 from numpy.typing import NDArray
-from dataeval.utils.data._types import Transform
 from dataeval.utils.data.datasets._base import BaseICDataset, DataLocation
 from dataeval.utils.data.datasets._mixin import BaseDatasetNumpyMixin
+from dataeval.utils.data.datasets._types import Transform
 class Ships(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):

dataeval/utils/data/{_types.py → datasets/_types.py} RENAMED Viewed

@@ -2,20 +2,11 @@ from __future__ import annotations
 __all__ = []
-import sys
 from dataclasses import dataclass
 from typing import Any, Generic, Protocol, TypedDict, TypeVar
-if sys.version_info >= (3, 11):
-    from typing import NotRequired, Required
-else:
-    from typing_extensions import NotRequired, Required
-from torch.utils.data import Dataset as _Dataset
-_TArray = TypeVar("_TArray")
-_TData = TypeVar("_TData", covariant=True)
-_TTarget = TypeVar("_TTarget", covariant=True)
+from torch.utils.data import Dataset
+from typing_extensions import NotRequired, Required
 class DatasetMetadata(TypedDict):
@@ -24,14 +15,17 @@ class DatasetMetadata(TypedDict):
     split: NotRequired[str]
-class Dataset(_Dataset[tuple[_TData, _TTarget, dict[str, Any]]]):
+_TDatum = TypeVar("_TDatum")
+_TArray = TypeVar("_TArray")
+class AnnotatedDataset(Dataset[_TDatum]):
     metadata: DatasetMetadata
-    def __getitem__(self, index: Any) -> tuple[_TData, _TTarget, dict[str, Any]]: ...
     def __len__(self) -> int: ...
-class ImageClassificationDataset(Dataset[_TArray, _TArray]): ...
+class ImageClassificationDataset(AnnotatedDataset[tuple[_TArray, _TArray, dict[str, Any]]]): ...
 @dataclass
@@ -41,7 +35,7 @@ class ObjectDetectionTarget(Generic[_TArray]):
     scores: _TArray
-class ObjectDetectionDataset(Dataset[_TArray, ObjectDetectionTarget[_TArray]]): ...
+class ObjectDetectionDataset(AnnotatedDataset[tuple[_TArray, ObjectDetectionTarget[_TArray], dict[str, Any]]]): ...
 @dataclass
@@ -51,7 +45,7 @@ class SegmentationTarget(Generic[_TArray]):
     scores: _TArray
-class SegmentationDataset(Dataset[_TArray, SegmentationTarget[_TArray]]): ...
+class SegmentationDataset(AnnotatedDataset[tuple[_TArray, SegmentationTarget[_TArray], dict[str, Any]]]): ...
 class Transform(Generic[_TArray], Protocol):

dataeval/utils/data/datasets/_voc.py CHANGED Viewed

@@ -9,7 +9,6 @@ import torch
 from defusedxml.ElementTree import parse
 from numpy.typing import NDArray
-from dataeval.utils.data._types import ObjectDetectionTarget, SegmentationTarget, Transform
 from dataeval.utils.data.datasets._base import (
     BaseDataset,
     BaseODDataset,
@@ -17,6 +16,7 @@ from dataeval.utils.data.datasets._base import (
     DataLocation,
 )
 from dataeval.utils.data.datasets._mixin import BaseDatasetNumpyMixin, BaseDatasetTorchMixin
+from dataeval.utils.data.datasets._types import ObjectDetectionTarget, SegmentationTarget, Transform
 _TArray = TypeVar("_TArray")
 _TTarget = TypeVar("_TTarget")

dataeval/utils/data/selections/_classfilter.py CHANGED Viewed

@@ -6,15 +6,14 @@ from typing import Sequence, TypeVar
 import numpy as np
-from dataeval.typing import Array
+from dataeval.typing import Array, ImageClassificationDatum
 from dataeval.utils._array import as_numpy
 from dataeval.utils.data._selection import Select, Selection, SelectionStage
-_TData = TypeVar("_TData")
-_TTarget = TypeVar("_TTarget", bound=Array)
+TImageClassificationDatum = TypeVar("TImageClassificationDatum", bound=ImageClassificationDatum, covariant=True)
-class ClassFilter(Selection[_TData, _TTarget]):
+class ClassFilter(Selection[TImageClassificationDatum]):
     """
     Filter and balance the dataset by class.
@@ -37,7 +36,7 @@ class ClassFilter(Selection[_TData, _TTarget]):
         self.classes = classes
         self.balance = balance
-    def __call__(self, dataset: Select[_TData, _TTarget]) -> None:
+    def __call__(self, dataset: Select[TImageClassificationDatum]) -> None:
         if self.classes is None and not self.balance:
             return

dataeval/utils/data/selections/_indices.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import Any, Sequence
 from dataeval.utils.data._selection import Select, Selection, SelectionStage
-class Indices(Selection[Any, Any]):
+class Indices(Selection[Any]):
     """
     Selects specific indices from the dataset.
@@ -22,5 +22,5 @@ class Indices(Selection[Any, Any]):
     def __init__(self, indices: Sequence[int]) -> None:
         self.indices = indices
-    def __call__(self, dataset: Select[Any, Any]) -> None:
+    def __call__(self, dataset: Select[Any]) -> None:
         dataset._selection = [index for index in self.indices if index in dataset._selection]

dataeval/utils/data/selections/_limit.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import Any
 from dataeval.utils.data._selection import Select, Selection, SelectionStage
-class Limit(Selection[Any, Any]):
+class Limit(Selection[Any]):
     """
     Limit the size of the dataset.
@@ -22,5 +22,5 @@ class Limit(Selection[Any, Any]):
     def __init__(self, size: int) -> None:
         self.size = size
-    def __call__(self, dataset: Select[Any, Any]) -> None:
+    def __call__(self, dataset: Select[Any]) -> None:
         dataset._size_limit = self.size

dataeval/utils/data/selections/_reverse.py CHANGED Viewed

@@ -7,12 +7,12 @@ from typing import Any
 from dataeval.utils.data._selection import Select, Selection, SelectionStage
-class Reverse(Selection[Any, Any]):
+class Reverse(Selection[Any]):
     """
     Reverse the selection order of the dataset.
     """
     stage = SelectionStage.ORDER
-    def __call__(self, dataset: Select[Any, Any]) -> None:
+    def __call__(self, dataset: Select[Any]) -> None:
         dataset._selection.reverse()

dataeval/utils/data/selections/_shuffle.py CHANGED Viewed

@@ -9,7 +9,7 @@ import numpy as np
 from dataeval.utils.data._selection import Select, Selection, SelectionStage
-class Shuffle(Selection[Any, Any]):
+class Shuffle(Selection[Any]):
     """
     Shuffle the dataset using a seed.
@@ -24,6 +24,6 @@ class Shuffle(Selection[Any, Any]):
     def __init__(self, seed: int):
         self.seed = seed
-    def __call__(self, dataset: Select[Any, Any]) -> None:
+    def __call__(self, dataset: Select[Any]) -> None:
         rng = np.random.default_rng(self.seed)
         rng.shuffle(dataset._selection)

dataeval/utils/torch/_internal.py CHANGED Viewed

@@ -11,13 +11,13 @@ from numpy.typing import NDArray
 from torch.utils.data import DataLoader, TensorDataset
 from tqdm import tqdm
-from dataeval.config import get_device
+from dataeval.config import DeviceLike, get_device
 def predict_batch(
     x: NDArray[Any] | torch.Tensor,
     model: Callable | torch.nn.Module | torch.nn.Sequential,
-    device: torch.device | None = None,
+    device: DeviceLike | None = None,
     batch_size: int = int(1e10),
     preprocess_fn: Callable[[torch.Tensor], torch.Tensor] | None = None,
     dtype: type[np.generic] | torch.dtype = np.float32,
@@ -31,9 +31,9 @@ def predict_batch(
         Batch of instances.
     model : Callable | nn.Module | nn.Sequential
         PyTorch model.
-    device : torch.device | None, default None
-        Device type used. The default None tries to use the GPU and falls back on CPU.
-        Can be specified by passing either torch.device('cuda') or torch.device('cpu').
+    device : DeviceLike or None, default None
+        The hardware device to use if specified, otherwise uses the DataEval
+        default or torch default.
     batch_size : int, default 1e10
         Batch size used during prediction.
     preprocess_fn : Callable | None, default None

dataeval/utils/torch/trainer.py CHANGED Viewed

@@ -2,6 +2,8 @@
 from __future__ import annotations
+from dataeval.config import DeviceLike, get_device
 __all__ = ["AETrainer"]
 from typing import Any
@@ -25,9 +27,9 @@ class AETrainer:
     ----------
     model : nn.Module
         The model to be trained.
-    device : str or torch.device, default "auto"
-        The hardware device to use for training.
-        If "auto", the device will be set to "cuda" if available, otherwise "cpu".
+    device : DeviceLike or None, default None
+        The hardware device to use if specified, otherwise uses the DataEval
+        default or torch default.
     batch_size : int, default 8
         The number of images to process in a batch.
     """
@@ -35,13 +37,11 @@ class AETrainer:
     def __init__(
         self,
         model: nn.Module,
-        device: str | torch.device = "auto",
+        device: DeviceLike | None = None,
         batch_size: int = 8,
     ):
-        if device == "auto":
-            device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.device: torch.device = torch.device(device)
-        self.model: nn.Module = model.to(device)
+        self.device: torch.device = get_device(device)
+        self.model: nn.Module = model.to(self.device)
         self.batch_size = batch_size
     def train(self, dataset: Dataset[Any], epochs: int = 25) -> list[float]:

dataeval/workflows/__init__.py CHANGED Viewed

@@ -4,4 +4,5 @@ Workflows perform a sequence of actions to analyze the dataset and make predicti
 __all__ = ["Sufficiency", "SufficiencyOutput"]
-from dataeval.workflows.sufficiency import Sufficiency, SufficiencyOutput
+from dataeval.outputs._workflows import SufficiencyOutput
+from dataeval.workflows.sufficiency import Sufficiency

dataeval/workflows/sufficiency.py CHANGED Viewed

@@ -2,261 +2,16 @@ from __future__ import annotations
 __all__ = []
-import contextlib
-import warnings
-from dataclasses import dataclass
-from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, Sized, TypeVar, cast
+from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, Sized, TypeVar
 import numpy as np
 import torch
 import torch.nn as nn
-from numpy.typing import NDArray
-from scipy.optimize import basinhopping
 from torch.utils.data import Dataset
-from dataeval._output import Output, set_metadata
+from dataeval.outputs import SufficiencyOutput
+from dataeval.outputs._base import set_metadata
 from dataeval.typing import ArrayLike
-from dataeval.utils._array import as_numpy
-with contextlib.suppress(ImportError):
-    from matplotlib.figure import Figure
-@dataclass(frozen=True)
-class SufficiencyOutput(Output):
-    """
-    Output class for :class:`.Sufficiency` workflow.
-    Attributes
-    ----------
-    steps : NDArray
-        Array of sample sizes
-    params : Dict[str, NDArray]
-        Inverse power curve coefficients for the line of best fit for each measure
-    measures : Dict[str, NDArray]
-        Average of values observed for each sample size step for each measure
-    """
-    steps: NDArray[np.uint32]
-    params: dict[str, NDArray[np.float64]]
-    measures: dict[str, NDArray[np.float64]]
-    def __post_init__(self) -> None:
-        c = len(self.steps)
-        if set(self.params) != set(self.measures):
-            raise ValueError("params and measures have a key mismatch")
-        for m, v in self.measures.items():
-            c_v = v.shape[1] if v.ndim > 1 else len(v)
-            if c != c_v:
-                raise ValueError(f"{m} does not contain the expected number ({c}) of data points.")
-    @set_metadata
-    def project(
-        self,
-        projection: int | Iterable[int],
-    ) -> SufficiencyOutput:
-        """Projects the measures for each value of X
-        Parameters
-        ----------
-        projection : int | Iterable[int]
-            Step or steps to project
-        Returns
-        -------
-        SufficiencyOutput
-            Dataclass containing the projected measures per projection
-        Raises
-        ------
-        ValueError
-            If the length of data points in the measures do not match
-            If `projection` is not numerical
-        """
-        projection = np.asarray(list(projection) if isinstance(projection, Iterable) else [projection])
-        if not np.issubdtype(projection.dtype, np.number):
-            raise ValueError("'projection' must consist of numerical values")
-        output = {}
-        for name, measures in self.measures.items():
-            if measures.ndim > 1:
-                result = []
-                for i in range(len(measures)):
-                    projected = project_steps(self.params[name][i], projection)
-                    result.append(projected)
-                output[name] = np.array(result)
-            else:
-                output[name] = project_steps(self.params[name], projection)
-        return SufficiencyOutput(projection, self.params, output)
-    def plot(self, class_names: Sequence[str] | None = None) -> list[Figure]:
-        """Plotting function for data :term:`sufficience<Sufficiency>` tasks
-        Parameters
-        ----------
-        class_names : Sequence[str] | None, default None
-            List of class names
-        Returns
-        -------
-        list[Figure]
-            List of Figures for each measure
-        Raises
-        ------
-        ValueError
-            If the length of data points in the measures do not match
-        """
-        # Extrapolation parameters
-        last_X = self.steps[-1]
-        geomshape = (0.01 * last_X, last_X * 4, len(self.steps))
-        extrapolated = np.geomspace(*geomshape).astype(np.int64)
-        # Stores all plots
-        plots = []
-        # Create a plot for each measure on one figure
-        for name, measures in self.measures.items():
-            if measures.ndim > 1:
-                if class_names is not None and len(measures) != len(class_names):
-                    raise IndexError("Class name count does not align with measures")
-                for i, measure in enumerate(measures):
-                    class_name = str(i) if class_names is None else class_names[i]
-                    fig = plot_measure(
-                        f"{name}_{class_name}",
-                        self.steps,
-                        measure,
-                        self.params[name][i],
-                        extrapolated,
-                    )
-                    plots.append(fig)
-            else:
-                fig = plot_measure(name, self.steps, measures, self.params[name], extrapolated)
-                plots.append(fig)
-        return plots
-    def inv_project(self, targets: Mapping[str, ArrayLike]) -> dict[str, NDArray[np.float64]]:
-        """
-        Calculate training samples needed to achieve target model metric values.
-        Parameters
-        ----------
-        targets : Mapping[str, ArrayLike]
-            Mapping of target metric scores (from 0.0 to 1.0) that we want
-            to achieve, where the key is the name of the metric.
-        Returns
-        -------
-        dict[str, NDArray]
-            List of the number of training samples needed to achieve each
-            corresponding entry in targets
-        """
-        projection = {}
-        for name, target in targets.items():
-            tarray = as_numpy(target)
-            if name not in self.measures:
-                continue
-            measure = self.measures[name]
-            if measure.ndim > 1:
-                projection[name] = np.zeros((len(measure), len(tarray)))
-                for i in range(len(measure)):
-                    projection[name][i] = inv_project_steps(
-                        self.params[name][i], tarray[i] if tarray.ndim == measure.ndim else tarray
-                    )
-            else:
-                projection[name] = inv_project_steps(self.params[name], tarray)
-        return projection
-def f_out(n_i: NDArray[Any], x: NDArray[Any]) -> NDArray[Any]:
-    """
-    Calculates the line of best fit based on its free parameters
-    Parameters
-    ----------
-    n_i : NDArray
-        Array of sample sizes
-    x : NDArray
-        Array of inverse power curve coefficients
-    Returns
-    -------
-    NDArray
-        Data points for the line of best fit
-    """
-    return x[0] * n_i ** (-x[1]) + x[2]
-def f_inv_out(y_i: NDArray[Any], x: NDArray[Any]) -> NDArray[np.uint64]:
-    """
-    Inverse function for f_out()
-    Parameters
-    ----------
-    y_i : NDArray
-        Data points for the line of best fit
-    x : NDArray
-        Array of inverse power curve coefficients
-    Returns
-    -------
-    NDArray
-        Array of sample sizes
-    """
-    n_i = ((y_i - x[2]) / x[0]) ** (-1 / x[1])
-    return np.asarray(n_i, dtype=np.uint64)
-def calc_params(p_i: NDArray[Any], n_i: NDArray[Any], niter: int) -> NDArray[Any]:
-    """
-    Retrieves the inverse power curve coefficients for the line of best fit.
-    Global minimization is done via basin hopping. More info on this algorithm
-    can be found here: https://arxiv.org/abs/cond-mat/9803344 .
-    Parameters
-    ----------
-    p_i : NDArray
-        Array of corresponding losses
-    n_i : NDArray
-        Array of sample sizes
-    niter : int
-        Number of iterations to perform in the basin-hopping
-        numerical process to curve-fit p_i
-    Returns
-    -------
-    NDArray
-        Array of parameters to recreate line of best fit
-    """
-    def is_valid(f_new, x_new, f_old, x_old):
-        return f_new != np.nan
-    def f(x):
-        try:
-            return np.sum(np.square(p_i - f_out(n_i, x)))
-        except RuntimeWarning:
-            return np.nan
-    with warnings.catch_warnings():
-        warnings.filterwarnings("error", category=RuntimeWarning)
-        res = basinhopping(
-            f,
-            np.array([0.5, 0.5, 0.1]),
-            niter=niter,
-            stepsize=1.0,
-            minimizer_kwargs={"method": "Powell"},
-            accept_test=is_valid,
-            niter_success=200,
-        )
-    return res.x
 def reset_parameters(model: nn.Module) -> nn.Module:
@@ -286,94 +41,6 @@ def validate_dataset_len(dataset: Dataset[Any]) -> int:
     return length
-def project_steps(params: NDArray[Any], projection: NDArray[Any]) -> NDArray[Any]:
-    """Projects the measures for each value of X
-    Parameters
-    ----------
-    params : NDArray
-        Inverse power curve coefficients used to calculate projection
-    projection : NDArray
-        Steps to extrapolate
-    Returns
-    -------
-    NDArray
-        Extrapolated measure values at each projection step
-    """
-    return 1 - f_out(projection, params)
-def inv_project_steps(params: NDArray[Any], targets: NDArray[Any]) -> NDArray[np.uint64]:
-    """Inverse function for project_steps()
-    Parameters
-    ----------
-    params : NDArray
-        Inverse power curve coefficients used to calculate projection
-    targets : NDArray
-        Desired measure values
-    Returns
-    -------
-    NDArray
-        Array of sample sizes, or 0 if overflow
-    """
-    steps = f_inv_out(1 - np.array(targets), params)
-    steps[np.isnan(steps)] = 0
-    return np.ceil(steps)
-def get_curve_params(measures: dict[str, NDArray[Any]], ranges: NDArray[Any], niter: int) -> dict[str, NDArray[Any]]:
-    """Calculates and aggregates parameters for both single and multi-class metrics"""
-    output = {}
-    for name, measure in measures.items():
-        measure = cast(np.ndarray, measure)
-        if measure.ndim > 1:
-            result = []
-            for value in measure:
-                result.append(calc_params(1 - value, ranges, niter))
-            output[name] = np.array(result)
-        else:
-            output[name] = calc_params(1 - measure, ranges, niter)
-    return output
-def plot_measure(
-    name: str,
-    steps: NDArray[Any],
-    measure: NDArray[Any],
-    params: NDArray[Any],
-    projection: NDArray[Any],
-) -> Figure:
-    import matplotlib.pyplot
-    fig = matplotlib.pyplot.figure()
-    fig = cast(Figure, fig)
-    fig.tight_layout()
-    ax = fig.add_subplot(111)
-    ax.set_title(f"{name} Sufficiency")
-    ax.set_ylabel(f"{name}")
-    ax.set_xlabel("Steps")
-    # Plot measure over each step
-    ax.scatter(steps, measure, label=f"Model Results ({name})", s=15, c="black")
-    # Plot extrapolation
-    ax.plot(
-        projection,
-        project_steps(params, projection),
-        linestyle="dashed",
-        label=f"Potential Model Results ({name})",
-    )
-    ax.legend()
-    return fig
 T = TypeVar("T")
@@ -490,7 +157,7 @@ class Sufficiency(Generic[T]):
         self._eval_kwargs = {} if value is None else value
     @set_metadata(state=["runs", "substeps"])
-    def evaluate(self, eval_at: int | Iterable[int] | None = None, niter: int = 1000) -> SufficiencyOutput:
+    def evaluate(self, eval_at: int | Iterable[int] | None = None) -> SufficiencyOutput:
         """
         Creates data indices, trains models, and returns plotting data
@@ -499,8 +166,6 @@ class Sufficiency(Generic[T]):
         eval_at : int | Iterable[int] | None, default None
             Specify this to collect accuracies over a specific set of dataset lengths, rather
             than letting :term:`sufficiency<Sufficiency>` internally create the lengths to evaluate at.
-        niter : int, default 1000
-            Iterations to perform when using the basin-hopping method to curve-fit measure(s).
         Returns
         -------
@@ -524,7 +189,7 @@ class Sufficiency(Generic[T]):
         ...     substeps=5,
         ... )
         >>> suff.evaluate()
-        SufficiencyOutput(steps=array([  1,   3,  10,  31, 100], dtype=uint32), params={'test': array([ 0., 42.,  0.])}, measures={'test': array([1., 1., 1., 1., 1.])})
+        SufficiencyOutput(steps=array([  1,   3,  10,  31, 100], dtype=uint32), measures={'test': array([1., 1., 1., 1., 1.])}, n_iter=1000)
         """  # noqa: E501
         if eval_at is not None:
             ranges = np.asarray(list(eval_at) if isinstance(eval_at, Iterable) else [eval_at])
@@ -569,5 +234,4 @@ class Sufficiency(Generic[T]):
         # The mean for each measure must be calculated before being returned
         measures = {k: (v / self.runs).T for k, v in measures.items()}
-        params_output = get_curve_params(measures, ranges, niter)
-        return SufficiencyOutput(ranges, params_output, measures)
+        return SufficiencyOutput(ranges, measures)

dataeval 0.81.0__py3-none-any.whl → 0.82.1__py3-none-any.whl

dataeval 0.81.0py3-none-any.whl → 0.82.1py3-none-any.whl