dataeval 0.71.1__py3-none-any.whl → 0.72.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/_internal/datasets.py +1 -1
- dataeval/_internal/detectors/clusterer.py +6 -6
- dataeval/_internal/detectors/drift/base.py +15 -14
- dataeval/_internal/detectors/drift/cvm.py +5 -5
- dataeval/_internal/detectors/drift/ks.py +7 -7
- dataeval/_internal/detectors/drift/mmd.py +10 -9
- dataeval/_internal/detectors/drift/torch.py +2 -2
- dataeval/_internal/detectors/drift/uncertainty.py +5 -4
- dataeval/_internal/detectors/duplicates.py +1 -1
- dataeval/_internal/detectors/ood/ae.py +2 -2
- dataeval/_internal/detectors/ood/aegmm.py +2 -2
- dataeval/_internal/detectors/ood/base.py +3 -3
- dataeval/_internal/detectors/ood/llr.py +3 -3
- dataeval/_internal/detectors/ood/vae.py +1 -1
- dataeval/_internal/detectors/ood/vaegmm.py +1 -1
- dataeval/_internal/detectors/outliers.py +5 -5
- dataeval/_internal/metrics/balance.py +5 -5
- dataeval/_internal/metrics/ber.py +6 -6
- dataeval/_internal/metrics/coverage.py +4 -4
- dataeval/_internal/metrics/divergence.py +4 -4
- dataeval/_internal/metrics/diversity.py +6 -6
- dataeval/_internal/metrics/parity.py +9 -7
- dataeval/_internal/metrics/stats/base.py +7 -5
- dataeval/_internal/metrics/stats/boxratiostats.py +1 -1
- dataeval/_internal/metrics/stats/datasetstats.py +1 -1
- dataeval/_internal/metrics/stats/dimensionstats.py +3 -3
- dataeval/_internal/metrics/stats/hashstats.py +3 -3
- dataeval/_internal/metrics/stats/labelstats.py +3 -3
- dataeval/_internal/metrics/stats/pixelstats.py +3 -3
- dataeval/_internal/metrics/stats/visualstats.py +4 -4
- dataeval/_internal/metrics/uap.py +3 -3
- dataeval/_internal/metrics/utils.py +8 -8
- dataeval/_internal/models/pytorch/autoencoder.py +8 -8
- dataeval/_internal/models/pytorch/utils.py +3 -3
- dataeval/_internal/models/tensorflow/autoencoder.py +4 -4
- dataeval/_internal/models/tensorflow/losses.py +1 -1
- dataeval/_internal/models/tensorflow/pixelcnn.py +2 -2
- dataeval/_internal/models/tensorflow/utils.py +1 -1
- dataeval/_internal/split_dataset.py +421 -0
- dataeval/_internal/workflows/sufficiency.py +3 -3
- dataeval/detectors/drift/__init__.py +1 -1
- dataeval/detectors/drift/updates/__init__.py +2 -1
- dataeval/detectors/ood/__init__.py +2 -10
- dataeval/utils/__init__.py +1 -1
- dataeval/utils/tensorflow/__init__.py +2 -1
- {dataeval-0.71.1.dist-info → dataeval-0.72.1.dist-info}/METADATA +7 -6
- dataeval-0.72.1.dist-info/RECORD +81 -0
- dataeval-0.71.1.dist-info/RECORD +0 -80
- {dataeval-0.71.1.dist-info → dataeval-0.72.1.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.71.1.dist-info → dataeval-0.72.1.dist-info}/WHEEL +0 -0
dataeval/__init__.py
CHANGED
dataeval/_internal/datasets.py
CHANGED
@@ -185,7 +185,7 @@ class MNIST(Dataset):
|
|
185
185
|
unit_interval : bool, default False
|
186
186
|
Shift the data values to the unit interval [0-1].
|
187
187
|
dtype : type | None, default None
|
188
|
-
Change the
|
188
|
+
Change the :term:`NumPy` dtype - data is loaded as np.uint8
|
189
189
|
channels : Literal['channels_first' | 'channels_last'] | None, default None
|
190
190
|
Location of channel axis if desired, default has no channels (N, 28, 28)
|
191
191
|
flatten : bool, default False
|
@@ -25,7 +25,7 @@ class ClustererOutput(OutputMetadata):
|
|
25
25
|
potential_outliers : List[int]
|
26
26
|
Indices which are near the border between belonging in the cluster and being an outlier
|
27
27
|
duplicates : List[List[int]]
|
28
|
-
Groups of indices that are exact duplicates
|
28
|
+
Groups of indices that are exact :term:`duplicates<Duplicates>`
|
29
29
|
potential_duplicates : List[List[int]]
|
30
30
|
Groups of indices which are not exact but closely related data points
|
31
31
|
"""
|
@@ -129,7 +129,7 @@ class ClusterMergeEntry:
|
|
129
129
|
|
130
130
|
class Clusterer:
|
131
131
|
"""
|
132
|
-
Uses hierarchical clustering to flag dataset properties of interest like
|
132
|
+
Uses hierarchical clustering to flag dataset properties of interest like Outliers and :term:`duplicates<Duplicates>`
|
133
133
|
|
134
134
|
Parameters
|
135
135
|
----------
|
@@ -401,7 +401,7 @@ class Clusterer:
|
|
401
401
|
|
402
402
|
def find_outliers(self, last_merge_levels: dict[int, int]) -> tuple[list[int], list[int]]:
|
403
403
|
"""
|
404
|
-
Retrieves
|
404
|
+
Retrieves Outliers based on when the sample was added to the cluster
|
405
405
|
and how far it was from the cluster when it was added
|
406
406
|
|
407
407
|
Parameters
|
@@ -470,7 +470,7 @@ class Clusterer:
|
|
470
470
|
Returns
|
471
471
|
-------
|
472
472
|
Tuple[List[List[int]], List[List[int]]]
|
473
|
-
The exact duplicates and near duplicates as lists of related indices
|
473
|
+
The exact :term:`duplicates<Duplicates>` and near duplicates as lists of related indices
|
474
474
|
"""
|
475
475
|
|
476
476
|
duplicates_std = []
|
@@ -495,12 +495,12 @@ class Clusterer:
|
|
495
495
|
# TODO: Move data input to evaluate from class
|
496
496
|
@set_metadata("dataeval.detectors", ["data"])
|
497
497
|
def evaluate(self) -> ClustererOutput:
|
498
|
-
"""Finds and flags indices of the data for
|
498
|
+
"""Finds and flags indices of the data for Outliers and :term:`duplicates<Duplicates>`
|
499
499
|
|
500
500
|
Returns
|
501
501
|
-------
|
502
502
|
ClustererOutput
|
503
|
-
The
|
503
|
+
The Outliers and duplicate indices found in the data
|
504
504
|
|
505
505
|
Example
|
506
506
|
-------
|
@@ -47,7 +47,7 @@ class DriftOutput(DriftBaseOutput):
|
|
47
47
|
Attributes
|
48
48
|
----------
|
49
49
|
is_drift : bool
|
50
|
-
Drift prediction for the images
|
50
|
+
:term:`Drift` prediction for the images
|
51
51
|
threshold : float
|
52
52
|
Threshold after multivariate correction if needed
|
53
53
|
feature_drift : NDArray
|
@@ -103,7 +103,7 @@ def preprocess_x(fn):
|
|
103
103
|
|
104
104
|
class UpdateStrategy(ABC):
|
105
105
|
"""
|
106
|
-
Updates reference dataset for drift detector
|
106
|
+
Updates reference dataset for :term:`drift<Drift>` detector
|
107
107
|
|
108
108
|
Parameters
|
109
109
|
----------
|
@@ -121,7 +121,7 @@ class UpdateStrategy(ABC):
|
|
121
121
|
|
122
122
|
class LastSeenUpdate(UpdateStrategy):
|
123
123
|
"""
|
124
|
-
Updates reference dataset for drift detector using last seen method.
|
124
|
+
Updates reference dataset for :term:`drift<Drift>` detector using last seen method.
|
125
125
|
|
126
126
|
Parameters
|
127
127
|
----------
|
@@ -136,7 +136,7 @@ class LastSeenUpdate(UpdateStrategy):
|
|
136
136
|
|
137
137
|
class ReservoirSamplingUpdate(UpdateStrategy):
|
138
138
|
"""
|
139
|
-
Updates reference dataset for drift detector using reservoir sampling method.
|
139
|
+
Updates reference dataset for :term:`drift<Drift>` detector using reservoir sampling method.
|
140
140
|
|
141
141
|
Parameters
|
142
142
|
----------
|
@@ -167,7 +167,7 @@ class ReservoirSamplingUpdate(UpdateStrategy):
|
|
167
167
|
|
168
168
|
class BaseDrift:
|
169
169
|
"""
|
170
|
-
A generic drift detection component for preprocessing data and applying statistical correction.
|
170
|
+
A generic :term:`drift<Drift>` detection component for preprocessing data and applying statistical correction.
|
171
171
|
|
172
172
|
This class handles common tasks related to drift detection, such as preprocessing
|
173
173
|
the reference data (`x_ref`), performing statistical correction (e.g., Bonferroni, FDR),
|
@@ -266,7 +266,7 @@ class BaseDrift:
|
|
266
266
|
|
267
267
|
def _preprocess(self, x: ArrayLike) -> ArrayLike:
|
268
268
|
"""
|
269
|
-
Preprocess the given data before computing the drift scores.
|
269
|
+
Preprocess the given data before computing the :term:`drift<Drift>` scores.
|
270
270
|
|
271
271
|
Parameters
|
272
272
|
----------
|
@@ -285,12 +285,13 @@ class BaseDrift:
|
|
285
285
|
|
286
286
|
class BaseDriftUnivariate(BaseDrift):
|
287
287
|
"""
|
288
|
-
Base class for drift detection methods using univariate statistical tests.
|
288
|
+
Base class for :term:`drift<Drift>` detection methods using univariate statistical tests.
|
289
289
|
|
290
290
|
This class inherits from `BaseDrift` and serves as a generic component for detecting
|
291
291
|
distribution drift in univariate features. If the number of features `n_features` is greater
|
292
292
|
than 1, a multivariate correction method (e.g., Bonferroni or FDR) is applied to control
|
293
|
-
the false positive rate
|
293
|
+
the :term:`false positive rate<False Positive Rate (FP)>`, ensuring it does not exceed the specified
|
294
|
+
:term:`p-value<P-Value>`.
|
294
295
|
|
295
296
|
Parameters
|
296
297
|
----------
|
@@ -318,7 +319,7 @@ class BaseDriftUnivariate(BaseDrift):
|
|
318
319
|
p_val : float
|
319
320
|
The significance level for drift detection.
|
320
321
|
correction : str
|
321
|
-
The method for controlling the
|
322
|
+
The method for controlling the :term:`False Discovery Rate (FDR)` or applying a Bonferroni correction.
|
322
323
|
update_x_ref : UpdateStrategy | None
|
323
324
|
Strategy for updating the reference data if applicable.
|
324
325
|
preprocess_fn : Callable | None
|
@@ -393,19 +394,19 @@ class BaseDriftUnivariate(BaseDrift):
|
|
393
394
|
Parameters
|
394
395
|
----------
|
395
396
|
x : ArrayLike
|
396
|
-
The batch of data to calculate univariate drift scores for each feature.
|
397
|
+
The batch of data to calculate univariate :term:`drift<Drift>` scores for each feature.
|
397
398
|
|
398
399
|
Returns
|
399
400
|
-------
|
400
401
|
tuple[NDArray, NDArray]
|
401
|
-
A tuple containing p-values and distance statistics for each feature.
|
402
|
+
A tuple containing p-values and distance :term:`statistics<Statistics>` for each feature.
|
402
403
|
"""
|
403
404
|
|
404
405
|
def _apply_correction(self, p_vals: NDArray) -> tuple[bool, float]:
|
405
406
|
"""
|
406
407
|
Apply the specified correction method (Bonferroni or FDR) to the p-values.
|
407
408
|
|
408
|
-
If the correction method is Bonferroni, the threshold for detecting drift
|
409
|
+
If the correction method is Bonferroni, the threshold for detecting :term:`drift<Drift>`
|
409
410
|
is divided by the number of features. For FDR, the correction is applied
|
410
411
|
using the Benjamini-Hochberg procedure.
|
411
412
|
|
@@ -457,8 +458,8 @@ class BaseDriftUnivariate(BaseDrift):
|
|
457
458
|
Returns
|
458
459
|
-------
|
459
460
|
DriftOutput
|
460
|
-
Dictionary containing the drift prediction and optionally the feature level
|
461
|
-
p-values, threshold after multivariate correction if needed and test statistics
|
461
|
+
Dictionary containing the :term:`drift<Drift>` prediction and optionally the feature level
|
462
|
+
p-values, threshold after multivariate correction if needed and test :term:`statistics<Statistics>`.
|
462
463
|
"""
|
463
464
|
# compute drift scores
|
464
465
|
p_vals, dist = self.score(x)
|
@@ -21,19 +21,19 @@ from .base import BaseDriftUnivariate, UpdateStrategy, preprocess_x
|
|
21
21
|
|
22
22
|
class DriftCVM(BaseDriftUnivariate):
|
23
23
|
"""
|
24
|
-
Drift detector employing the Cramér-von Mises (CVM)
|
24
|
+
:term:`Drift` detector employing the :term:`Cramér-von Mises (CVM) Drift Detection` test.
|
25
25
|
|
26
26
|
The CVM test detects changes in the distribution of continuous
|
27
27
|
univariate data. For multivariate data, a separate CVM test is applied to each
|
28
28
|
feature, and the obtained p-values are aggregated via the Bonferroni or
|
29
|
-
False Discovery Rate (FDR) corrections.
|
29
|
+
:term:`False Discovery Rate (FDR)` corrections.
|
30
30
|
|
31
31
|
Parameters
|
32
32
|
----------
|
33
33
|
x_ref : ArrayLike
|
34
34
|
Data used as reference distribution.
|
35
35
|
p_val : float | None, default 0.05
|
36
|
-
p-value used for significance of the statistical test for each feature.
|
36
|
+
:term:`p-value<P-Value>` used for significance of the statistical test for each feature.
|
37
37
|
If the FDR correction method is used, this corresponds to the acceptable
|
38
38
|
q-value.
|
39
39
|
x_ref_preprocessed : bool, default False
|
@@ -46,7 +46,7 @@ class DriftCVM(BaseDriftUnivariate):
|
|
46
46
|
or via reservoir sampling with ReservoirSamplingUpdateStrategy.
|
47
47
|
preprocess_fn : Callable | None, default None
|
48
48
|
Function to preprocess the data before computing the data drift metrics.
|
49
|
-
Typically a dimensionality reduction technique.
|
49
|
+
Typically a :term:`dimensionality reduction<Dimensionality Reduction>` technique.
|
50
50
|
correction : "bonferroni" | "fdr", default "bonferroni"
|
51
51
|
Correction type for multivariate data. Either 'bonferroni' or 'fdr' (False
|
52
52
|
Discovery Rate).
|
@@ -79,7 +79,7 @@ class DriftCVM(BaseDriftUnivariate):
|
|
79
79
|
@preprocess_x
|
80
80
|
def score(self, x: ArrayLike) -> tuple[NDArray[np.float32], NDArray[np.float32]]:
|
81
81
|
"""
|
82
|
-
Performs the two-sample Cramér-von Mises test(s), computing the p-value and
|
82
|
+
Performs the two-sample Cramér-von Mises test(s), computing the :term:`p-value<P-value>` and
|
83
83
|
test statistic per feature.
|
84
84
|
|
85
85
|
Parameters
|
@@ -21,10 +21,10 @@ from .base import BaseDriftUnivariate, UpdateStrategy, preprocess_x
|
|
21
21
|
|
22
22
|
class DriftKS(BaseDriftUnivariate):
|
23
23
|
"""
|
24
|
-
Drift detector employing the Kolmogorov-Smirnov (KS) distribution test.
|
24
|
+
:term:`Drift` detector employing the Kolmogorov-Smirnov (KS) distribution test.
|
25
25
|
|
26
26
|
The KS test detects changes in the maximum distance between two data
|
27
|
-
distributions with Bonferroni or False Discovery Rate (FDR) correction
|
27
|
+
distributions with Bonferroni or :term:`False Discovery Rate (FDR)` correction
|
28
28
|
for multivariate data.
|
29
29
|
|
30
30
|
Parameters
|
@@ -32,7 +32,7 @@ class DriftKS(BaseDriftUnivariate):
|
|
32
32
|
x_ref : ArrayLike
|
33
33
|
Data used as reference distribution.
|
34
34
|
p_val : float | None, default 0.05
|
35
|
-
p-value used for significance of the statistical test for each feature.
|
35
|
+
:term:`p-value<P-Value>` used for significance of the statistical test for each feature.
|
36
36
|
If the FDR correction method is used, this corresponds to the acceptable
|
37
37
|
q-value.
|
38
38
|
x_ref_preprocessed : bool, default False
|
@@ -44,8 +44,8 @@ class DriftKS(BaseDriftUnivariate):
|
|
44
44
|
using the last n instances seen by the detector with LastSeenUpdateStrategy
|
45
45
|
or via reservoir sampling with ReservoirSamplingUpdateStrategy.
|
46
46
|
preprocess_fn : Callable | None, default None
|
47
|
-
Function to preprocess the data before computing the data drift metrics.
|
48
|
-
Typically a dimensionality reduction technique.
|
47
|
+
Function to preprocess the data before computing the data :term:`drift<Drift>` metrics.
|
48
|
+
Typically a :term:`dimensionality reduction<Dimensionality Reduction>` technique.
|
49
49
|
correction : "bonferroni" | "fdr", default "bonferroni"
|
50
50
|
Correction type for multivariate data. Either 'bonferroni' or 'fdr' (False
|
51
51
|
Discovery Rate).
|
@@ -85,7 +85,7 @@ class DriftKS(BaseDriftUnivariate):
|
|
85
85
|
@preprocess_x
|
86
86
|
def score(self, x: ArrayLike) -> tuple[NDArray[np.float32], NDArray[np.float32]]:
|
87
87
|
"""
|
88
|
-
Compute KS scores and
|
88
|
+
Compute KS scores and :term:Statistics` per feature.
|
89
89
|
|
90
90
|
Parameters
|
91
91
|
----------
|
@@ -95,7 +95,7 @@ class DriftKS(BaseDriftUnivariate):
|
|
95
95
|
Returns
|
96
96
|
-------
|
97
97
|
tuple[NDArray, NDArray]
|
98
|
-
Feature level p-values and KS statistic
|
98
|
+
Feature level :term:p-values and KS statistic
|
99
99
|
"""
|
100
100
|
x = to_numpy(x)
|
101
101
|
x = x.reshape(x.shape[0], -1)
|
@@ -24,14 +24,14 @@ from .torch import GaussianRBF, get_device, mmd2_from_kernel_matrix
|
|
24
24
|
@dataclass(frozen=True)
|
25
25
|
class DriftMMDOutput(DriftBaseOutput):
|
26
26
|
"""
|
27
|
-
Output class for :class:`DriftMMD` drift detector
|
27
|
+
Output class for :class:`DriftMMD` :term:`drift<Drift>` detector
|
28
28
|
|
29
29
|
Attributes
|
30
30
|
----------
|
31
31
|
is_drift : bool
|
32
32
|
Drift prediction for the images
|
33
33
|
threshold : float
|
34
|
-
P-
|
34
|
+
:term:`P-Value` used for significance of the permutation test
|
35
35
|
p_val : float
|
36
36
|
P-value obtained from the permutation test
|
37
37
|
distance : float
|
@@ -49,14 +49,14 @@ class DriftMMDOutput(DriftBaseOutput):
|
|
49
49
|
|
50
50
|
class DriftMMD(BaseDrift):
|
51
51
|
"""
|
52
|
-
Maximum Mean Discrepancy (MMD)
|
52
|
+
:term:`Maximum Mean Discrepancy (MMD) Drift Detection` algorithm using a permutation test.
|
53
53
|
|
54
54
|
Parameters
|
55
55
|
----------
|
56
56
|
x_ref : ArrayLike
|
57
57
|
Data used as reference distribution.
|
58
58
|
p_val : float | None, default 0.05
|
59
|
-
|
59
|
+
:term:`P-value` used for significance of the statistical test for each feature.
|
60
60
|
If the FDR correction method is used, this corresponds to the acceptable
|
61
61
|
q-value.
|
62
62
|
x_ref_preprocessed : bool, default False
|
@@ -69,7 +69,7 @@ class DriftMMD(BaseDrift):
|
|
69
69
|
or via reservoir sampling with ReservoirSamplingUpdateStrategy.
|
70
70
|
preprocess_fn : Callable | None, default None
|
71
71
|
Function to preprocess the data before computing the data drift metrics.
|
72
|
-
Typically a dimensionality reduction technique.
|
72
|
+
Typically a :term:`dimensionality reduction<Dimensionality Reduction>` technique.
|
73
73
|
kernel : Callable, default GaussianRBF
|
74
74
|
Kernel used for the MMD computation, defaults to Gaussian RBF kernel.
|
75
75
|
sigma : ArrayLike | None, default None
|
@@ -132,7 +132,7 @@ class DriftMMD(BaseDrift):
|
|
132
132
|
@preprocess_x
|
133
133
|
def score(self, x: ArrayLike) -> tuple[float, float, float]:
|
134
134
|
"""
|
135
|
-
Compute the p-value resulting from a permutation test using the maximum mean
|
135
|
+
Compute the :term:`p-value<P-Value>` resulting from a permutation test using the maximum mean
|
136
136
|
discrepancy as a distance measure between the reference data and the data to
|
137
137
|
be tested.
|
138
138
|
|
@@ -145,7 +145,7 @@ class DriftMMD(BaseDrift):
|
|
145
145
|
-------
|
146
146
|
tuple(float, float, float)
|
147
147
|
p-value obtained from the permutation test, MMD^2 between the reference and test set,
|
148
|
-
and MMD^2 threshold above which drift is flagged
|
148
|
+
and MMD^2 threshold above which :term:`drift<Drift>` is flagged
|
149
149
|
"""
|
150
150
|
x = as_numpy(x)
|
151
151
|
x_ref = torch.from_numpy(self.x_ref).to(self.device)
|
@@ -161,7 +161,7 @@ class DriftMMD(BaseDrift):
|
|
161
161
|
# compute distance threshold
|
162
162
|
idx_threshold = int(self.p_val * len(mmd2_permuted))
|
163
163
|
distance_threshold = torch.sort(mmd2_permuted, descending=True).values[idx_threshold]
|
164
|
-
return p_val.numpy().item(), mmd2.numpy().item(), distance_threshold.numpy()
|
164
|
+
return p_val.numpy().item(), mmd2.numpy().item(), distance_threshold.numpy().item()
|
165
165
|
|
166
166
|
@set_metadata("dataeval.detectors")
|
167
167
|
@preprocess_x
|
@@ -179,7 +179,8 @@ class DriftMMD(BaseDrift):
|
|
179
179
|
Returns
|
180
180
|
-------
|
181
181
|
DriftMMDOutput
|
182
|
-
Output class containing the drift prediction, p-value
|
182
|
+
Output class containing the :term:`drift<Drift>` prediction, :term:`p-value<P-Value>`,
|
183
|
+
threshold and MMD metric.
|
183
184
|
"""
|
184
185
|
# compute drift scores
|
185
186
|
p_val, dist, distance_threshold = self.score(x)
|
@@ -102,7 +102,7 @@ def predict_batch(
|
|
102
102
|
preprocess_fn : Callable | None, default None
|
103
103
|
Optional preprocessing function for each batch.
|
104
104
|
dtype : np.dtype | torch.dtype, default np.float32
|
105
|
-
Model output type, either a
|
105
|
+
Model output type, either a :term:`NumPy` or torch dtype, e.g. np.float32 or torch.float32.
|
106
106
|
|
107
107
|
Returns
|
108
108
|
-------
|
@@ -179,7 +179,7 @@ def preprocess_drift(
|
|
179
179
|
batch_size : int, default 1e10
|
180
180
|
Batch size used during prediction.
|
181
181
|
dtype : np.dtype | torch.dtype, default np.float32
|
182
|
-
Model output type, either a
|
182
|
+
Model output type, either a :term:`NumPy` or torch dtype, e.g. np.float32 or torch.float32.
|
183
183
|
|
184
184
|
Returns
|
185
185
|
-------
|
@@ -34,7 +34,7 @@ def classifier_uncertainty(
|
|
34
34
|
x : np.ndarray
|
35
35
|
Batch of instances.
|
36
36
|
model_fn : Callable
|
37
|
-
Function that evaluates a classification model on x in a single call (contains
|
37
|
+
Function that evaluates a :term:`classification<Classification>` model on x in a single call (contains
|
38
38
|
batching logic if necessary).
|
39
39
|
preds_type : "probs" | "logits", default "probs"
|
40
40
|
Type of prediction output by the model. Options are 'probs' (in [0,1]) or
|
@@ -73,9 +73,9 @@ class DriftUncertainty:
|
|
73
73
|
x_ref : ArrayLike
|
74
74
|
Data used as reference distribution.
|
75
75
|
model : Callable
|
76
|
-
Classification model outputting class probabilities (or logits)
|
76
|
+
:term:`Classification` model outputting class probabilities (or logits)
|
77
77
|
p_val : float, default 0.05
|
78
|
-
|
78
|
+
:term:`P-Value` used for the significance of the test.
|
79
79
|
x_ref_preprocessed : bool, default False
|
80
80
|
Whether the given reference data ``x_ref`` has been preprocessed yet.
|
81
81
|
If ``True``, only the test data ``x`` will be preprocessed at prediction time.
|
@@ -145,6 +145,7 @@ class DriftUncertainty:
|
|
145
145
|
Returns
|
146
146
|
-------
|
147
147
|
DriftUnvariateOutput
|
148
|
-
Dictionary containing the drift prediction, p-value
|
148
|
+
Dictionary containing the drift prediction, :term:`p-value<P-Value>`, and threshold
|
149
|
+
statistics.
|
149
150
|
"""
|
150
151
|
return self._detector.predict(x)
|
@@ -37,7 +37,7 @@ class DuplicatesOutput(Generic[TIndexCollection], OutputMetadata):
|
|
37
37
|
|
38
38
|
class Duplicates:
|
39
39
|
"""
|
40
|
-
Finds the duplicate images in a dataset using xxhash for exact duplicates
|
40
|
+
Finds the duplicate images in a dataset using xxhash for exact :term:`duplicates<Duplicates>`
|
41
41
|
and pchash for near duplicates
|
42
42
|
|
43
43
|
Attributes
|
@@ -24,12 +24,12 @@ from dataeval._internal.output import set_metadata
|
|
24
24
|
|
25
25
|
class OOD_AE(OODBase):
|
26
26
|
"""
|
27
|
-
Autoencoder
|
27
|
+
Autoencoder-based :term:`out of distribution<Out-of-distribution (OOD)>` detector.
|
28
28
|
|
29
29
|
Parameters
|
30
30
|
----------
|
31
31
|
model : AE
|
32
|
-
|
32
|
+
An :term:`autoencoder<Autoencoder>` model.
|
33
33
|
"""
|
34
34
|
|
35
35
|
def __init__(self, model: AE) -> None:
|
@@ -30,7 +30,7 @@ class OOD_AEGMM(OODGMMBase):
|
|
30
30
|
Parameters
|
31
31
|
----------
|
32
32
|
model : AEGMM
|
33
|
-
|
33
|
+
An AEGMM model.
|
34
34
|
"""
|
35
35
|
|
36
36
|
def __init__(self, model: AEGMM) -> None:
|
@@ -53,7 +53,7 @@ class OOD_AEGMM(OODGMMBase):
|
|
53
53
|
@set_metadata("dataeval.detectors")
|
54
54
|
def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
55
55
|
"""
|
56
|
-
Compute the out-of-distribution (OOD) score for a given dataset.
|
56
|
+
Compute the :term:`out of distribution<Out-of-distribution (OOD)>` score for a given dataset.
|
57
57
|
|
58
58
|
Parameters
|
59
59
|
----------
|
@@ -32,7 +32,7 @@ class OODOutput(OutputMetadata):
|
|
32
32
|
Attributes
|
33
33
|
----------
|
34
34
|
is_ood : NDArray
|
35
|
-
Array of images that are detected as
|
35
|
+
Array of images that are detected as :term:Out-of-Distribution (OOD)`
|
36
36
|
instance_score : NDArray
|
37
37
|
Instance score of the evaluated dataset
|
38
38
|
feature_score : NDArray | None
|
@@ -109,7 +109,7 @@ class OODBase(ABC):
|
|
109
109
|
@abstractmethod
|
110
110
|
def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
111
111
|
"""
|
112
|
-
Compute the out-of-distribution (OOD) scores for a given dataset.
|
112
|
+
Compute the :term:`out of distribution<Out-of-distribution (OOD)>` scores for a given dataset.
|
113
113
|
|
114
114
|
Parameters
|
115
115
|
----------
|
@@ -182,7 +182,7 @@ class OODBase(ABC):
|
|
182
182
|
ood_type: Literal["feature", "instance"] = "instance",
|
183
183
|
) -> OODOutput:
|
184
184
|
"""
|
185
|
-
Predict whether instances are out-of-distribution or not.
|
185
|
+
Predict whether instances are :term:`out of distribution<Out-of-distribution (OOD)>` or not.
|
186
186
|
|
187
187
|
Parameters
|
188
188
|
----------
|
@@ -35,7 +35,7 @@ def build_model(
|
|
35
35
|
Parameters
|
36
36
|
----------
|
37
37
|
dist
|
38
|
-
TensorFlow distribution.
|
38
|
+
:term:`TensorFlow` distribution.
|
39
39
|
input_shape
|
40
40
|
Input shape of the model.
|
41
41
|
filepath
|
@@ -230,7 +230,7 @@ class OOD_LLR(OODBase):
|
|
230
230
|
batch_size: int = int(1e10),
|
231
231
|
) -> NDArray:
|
232
232
|
"""
|
233
|
-
Compute log probability of a batch of instances under the generative model
|
233
|
+
Compute log probability of a batch of instances under the :term:`generative model<Generative Model>`.
|
234
234
|
"""
|
235
235
|
logp_fn = partial(dist.log_prob, return_per_feature=return_per_feature)
|
236
236
|
# TODO: TBD: can this be any of the other types from predict_batch? i.e. tf.Tensor or tuple
|
@@ -269,7 +269,7 @@ class OOD_LLR(OODBase):
|
|
269
269
|
return_per_feature
|
270
270
|
Return likelihood ratio per feature.
|
271
271
|
batch_size
|
272
|
-
Batch size for the generative model evaluations.
|
272
|
+
Batch size for the :term:`generative model<Generative Model>` evaluations.
|
273
273
|
|
274
274
|
Returns
|
275
275
|
-------
|
@@ -45,7 +45,7 @@ class OOD_VAE(OODBase):
|
|
45
45
|
|
46
46
|
>>> metric.fit(dataset, threshold_perc=85, batch_size=128, verbose=False)
|
47
47
|
|
48
|
-
Detect out of distribution samples at the 'feature' level
|
48
|
+
Detect :term:`out of distribution<Out-of-Distribution (OOD)>` samples at the 'feature' level
|
49
49
|
|
50
50
|
>>> result = metric.predict(dataset, ood_type="feature")
|
51
51
|
"""
|
@@ -57,7 +57,7 @@ class OOD_VAEGMM(OODGMMBase):
|
|
57
57
|
@set_metadata("dataeval.detectors")
|
58
58
|
def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
59
59
|
"""
|
60
|
-
Compute the out-of-distribution (OOD) score for a given dataset.
|
60
|
+
Compute the out of distributuion<Out-of-distribution (OOD)>` score for a given dataset.
|
61
61
|
|
62
62
|
Parameters
|
63
63
|
----------
|
@@ -27,7 +27,7 @@ class OutliersOutput(Generic[TIndexIssueMap], OutputMetadata):
|
|
27
27
|
Attributes
|
28
28
|
----------
|
29
29
|
issues : dict[int, dict[str, float]] | list[dict[int, dict[str, float]]]
|
30
|
-
Indices of image
|
30
|
+
Indices of image Outliers with their associated issue type and calculated values.
|
31
31
|
|
32
32
|
- For a single dataset, a dictionary containing the indices of outliers and
|
33
33
|
a dictionary showing the issues and calculated values for the given index.
|
@@ -69,7 +69,7 @@ def _get_outlier_mask(
|
|
69
69
|
|
70
70
|
class Outliers:
|
71
71
|
r"""
|
72
|
-
Calculates statistical
|
72
|
+
Calculates statistical Outliers of a dataset using various statistical tests applied to each image
|
73
73
|
|
74
74
|
Parameters
|
75
75
|
----------
|
@@ -86,7 +86,7 @@ class Outliers:
|
|
86
86
|
|
87
87
|
See Also
|
88
88
|
--------
|
89
|
-
Duplicates
|
89
|
+
:term:`Duplicates`
|
90
90
|
|
91
91
|
Note
|
92
92
|
----
|
@@ -162,7 +162,7 @@ class Outliers:
|
|
162
162
|
self, stats: OutlierStatsOutput | DatasetStatsOutput | Sequence[OutlierStatsOutput]
|
163
163
|
) -> OutliersOutput:
|
164
164
|
"""
|
165
|
-
Returns indices of
|
165
|
+
Returns indices of Outliers with the issues identified for each
|
166
166
|
|
167
167
|
Parameters
|
168
168
|
----------
|
@@ -238,7 +238,7 @@ class Outliers:
|
|
238
238
|
)
|
239
239
|
def evaluate(self, data: Iterable[ArrayLike]) -> OutliersOutput[IndexIssueMap]:
|
240
240
|
"""
|
241
|
-
Returns indices of
|
241
|
+
Returns indices of Outliers with the issues identified for each
|
242
242
|
|
243
243
|
Parameters
|
244
244
|
----------
|
@@ -15,12 +15,12 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
15
15
|
@dataclass(frozen=True)
|
16
16
|
class BalanceOutput(OutputMetadata):
|
17
17
|
"""
|
18
|
-
Output class for :func:`balance`
|
18
|
+
Output class for :func:`balance` :term:`Bias` metric
|
19
19
|
|
20
20
|
Attributes
|
21
21
|
----------
|
22
22
|
balance : NDArray[np.float64]
|
23
|
-
Estimate of mutual information between metadata factors and class label
|
23
|
+
Estimate of :term:`mutual information<Mutual Information (MI)>` between metadata factors and class label
|
24
24
|
factors : NDArray[np.float64]
|
25
25
|
Estimate of inter/intra-factor mutual information
|
26
26
|
classwise : NDArray[np.float64]
|
@@ -55,7 +55,7 @@ def validate_num_neighbors(num_neighbors: int) -> int:
|
|
55
55
|
@set_metadata("dataeval.metrics")
|
56
56
|
def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neighbors: int = 5) -> BalanceOutput:
|
57
57
|
"""
|
58
|
-
Mutual information (MI) between factors (class label, metadata, label/image properties)
|
58
|
+
:term:`Mutual information (MI)` between factors (class label, metadata, label/image properties)
|
59
59
|
|
60
60
|
Parameters
|
61
61
|
----------
|
@@ -70,7 +70,7 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
|
|
70
70
|
Returns
|
71
71
|
-------
|
72
72
|
BalanceOutput
|
73
|
-
(num_factors+1) x (num_factors+1) estimate of mutual information
|
73
|
+
(num_factors+1) x (num_factors+1) estimate of :term:`mutual information<Mutual Information (MI)>`
|
74
74
|
between num_factors metadata factors and class label. Symmetry is enforced.
|
75
75
|
|
76
76
|
Note
|
@@ -83,7 +83,7 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
|
|
83
83
|
|
84
84
|
Example
|
85
85
|
-------
|
86
|
-
Return balance (mutual information) of factors with class_labels
|
86
|
+
Return :term:`balance<Balance>` (:term:`mutual information<Mutual Information (MI)>`) of factors with class_labels
|
87
87
|
|
88
88
|
>>> bal = balance(class_labels, metadata)
|
89
89
|
>>> bal.balance
|
@@ -1,7 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
This module contains the implementation of the
|
3
3
|
FR Test Statistic based estimate and the
|
4
|
-
KNN based estimate for the Bayes Error Rate
|
4
|
+
KNN based estimate for the :term:`Bayes error rate<Bayes Error Rate (BER)>`
|
5
5
|
|
6
6
|
Learning to Bound the Multi-class Bayes Error (Th. 3 and Th. 4)
|
7
7
|
https://arxiv.org/abs/1811.06419
|
@@ -30,7 +30,7 @@ class BEROutput(OutputMetadata):
|
|
30
30
|
Attributes
|
31
31
|
----------
|
32
32
|
ber : float
|
33
|
-
The upper bounds of the Bayes Error Rate
|
33
|
+
The upper bounds of the :term:`Bayes error rate<Bayes Error Rate (BER)>`
|
34
34
|
ber_lower : float
|
35
35
|
The lower bounds of the Bayes Error Rate
|
36
36
|
"""
|
@@ -40,7 +40,7 @@ class BEROutput(OutputMetadata):
|
|
40
40
|
|
41
41
|
|
42
42
|
def ber_mst(X: NDArray, y: NDArray) -> tuple[float, float]:
|
43
|
-
"""Calculates the Bayes Error Rate using a minimum spanning tree
|
43
|
+
"""Calculates the :term:`Bayes error rate<Bayes Error Rate (BER)>` using a minimum spanning tree
|
44
44
|
|
45
45
|
Parameters
|
46
46
|
----------
|
@@ -65,7 +65,7 @@ def ber_mst(X: NDArray, y: NDArray) -> tuple[float, float]:
|
|
65
65
|
|
66
66
|
|
67
67
|
def ber_knn(X: NDArray, y: NDArray, k: int) -> tuple[float, float]:
|
68
|
-
"""Calculates the Bayes Error Rate using K-nearest neighbors
|
68
|
+
"""Calculates the :term:`Bayes error rate<Bayes Error Rate (BER)>` using K-nearest neighbors
|
69
69
|
|
70
70
|
Parameters
|
71
71
|
----------
|
@@ -114,12 +114,12 @@ BER_FN_MAP = {"KNN": ber_knn, "MST": ber_mst}
|
|
114
114
|
@set_metadata("dataeval.metrics")
|
115
115
|
def ber(images: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN", "MST"] = "KNN") -> BEROutput:
|
116
116
|
"""
|
117
|
-
An estimator for Multi-class Bayes Error Rate using FR or KNN test statistic basis
|
117
|
+
An estimator for Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` using FR or KNN test statistic basis
|
118
118
|
|
119
119
|
Parameters
|
120
120
|
----------
|
121
121
|
images : ArrayLike (N, ... )
|
122
|
-
Array of images or image embeddings
|
122
|
+
Array of images or image :term:`embeddings<Embeddings>`
|
123
123
|
labels : ArrayLike (N, 1)
|
124
124
|
Array of labels for each image or image embedding
|
125
125
|
k : int, default 1
|