dataeval 0.88.1__py3-none-any.whl → 0.89.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/_version.py +2 -2
- dataeval/data/_embeddings.py +2 -2
- dataeval/data/_metadata.py +2 -1
- dataeval/detectors/drift/_base.py +152 -27
- dataeval/detectors/drift/_cvm.py +44 -25
- dataeval/detectors/drift/_ks.py +56 -28
- dataeval/detectors/drift/_mmd.py +44 -18
- dataeval/detectors/drift/_uncertainty.py +119 -45
- dataeval/outputs/_drift.py +67 -29
- dataeval/outputs/_workflows.py +19 -5
- dataeval/typing.py +23 -4
- {dataeval-0.88.1.dist-info → dataeval-0.89.0.dist-info}/METADATA +1 -1
- {dataeval-0.88.1.dist-info → dataeval-0.89.0.dist-info}/RECORD +15 -15
- {dataeval-0.88.1.dist-info → dataeval-0.89.0.dist-info}/WHEEL +0 -0
- {dataeval-0.88.1.dist-info → dataeval-0.89.0.dist-info}/licenses/LICENSE +0 -0
dataeval/_version.py
CHANGED
dataeval/data/_embeddings.py
CHANGED
@@ -5,7 +5,7 @@ __all__ = []
|
|
5
5
|
import logging
|
6
6
|
import math
|
7
7
|
import os
|
8
|
-
from collections.abc import Iterator, Sequence
|
8
|
+
from collections.abc import Iterable, Iterator, Sequence
|
9
9
|
from pathlib import Path
|
10
10
|
from typing import Any, cast
|
11
11
|
|
@@ -80,7 +80,7 @@ class Embeddings:
|
|
80
80
|
# Technically more permissive than ImageClassificationDataset or ObjectDetectionDataset
|
81
81
|
dataset: Dataset[tuple[ArrayLike, Any, Any]] | Dataset[ArrayLike],
|
82
82
|
batch_size: int,
|
83
|
-
transforms: Transform[torch.Tensor] |
|
83
|
+
transforms: Transform[torch.Tensor] | Iterable[Transform[torch.Tensor]] | None = None,
|
84
84
|
model: torch.nn.Module | None = None,
|
85
85
|
device: DeviceLike | None = None,
|
86
86
|
cache: Path | str | bool = False,
|
dataeval/data/_metadata.py
CHANGED
@@ -15,6 +15,7 @@ from tqdm.auto import tqdm
|
|
15
15
|
from dataeval.typing import (
|
16
16
|
AnnotatedDataset,
|
17
17
|
Array,
|
18
|
+
DatumMetadata,
|
18
19
|
ObjectDetectionTarget,
|
19
20
|
)
|
20
21
|
from dataeval.utils._array import as_numpy
|
@@ -76,7 +77,7 @@ class Metadata:
|
|
76
77
|
|
77
78
|
def __init__(
|
78
79
|
self,
|
79
|
-
dataset: AnnotatedDataset[tuple[Any, Any,
|
80
|
+
dataset: AnnotatedDataset[tuple[Any, Any, DatumMetadata]],
|
80
81
|
*,
|
81
82
|
continuous_factor_bins: Mapping[str, int | Sequence[float]] | None = None,
|
82
83
|
auto_bin_method: Literal["uniform_width", "uniform_count", "clusters"] = "uniform_width",
|
@@ -55,6 +55,42 @@ def update_strategy(fn: Callable[..., R]) -> Callable[..., R]:
|
|
55
55
|
|
56
56
|
|
57
57
|
class BaseDrift:
|
58
|
+
"""Base class for drift detection algorithms.
|
59
|
+
|
60
|
+
Provides common functionality for drift detectors including reference data
|
61
|
+
management, encoding of input data, and statistical correction methods.
|
62
|
+
Subclasses implement specific drift detection algorithms.
|
63
|
+
|
64
|
+
Parameters
|
65
|
+
----------
|
66
|
+
data : Embeddings or Array
|
67
|
+
Reference dataset used as baseline for drift detection.
|
68
|
+
Can be image embeddings or raw arrays.
|
69
|
+
p_val : float, default 0.05
|
70
|
+
Significance threshold for drift detection, between 0 and 1.
|
71
|
+
Default 0.05 limits false drift alerts to 5% when no drift exists (Type I error rate).
|
72
|
+
update_strategy : UpdateStrategy or None, default None
|
73
|
+
Strategy for updating reference data when new data arrives.
|
74
|
+
When None, reference data remains fixed throughout detection.
|
75
|
+
Default None maintains stable baseline for consistent comparison.
|
76
|
+
correction : {"bonferroni", "fdr"}, default "bonferroni"
|
77
|
+
Multiple testing correction method for multivariate drift detection.
|
78
|
+
"bonferroni" provides conservative family-wise error control.
|
79
|
+
"fdr" (False Discovery Rate) offers less conservative control.
|
80
|
+
Default "bonferroni" minimizes false positive drift detections.
|
81
|
+
|
82
|
+
Attributes
|
83
|
+
----------
|
84
|
+
p_val : float
|
85
|
+
Significance threshold for statistical tests.
|
86
|
+
update_strategy : UpdateStrategy or None
|
87
|
+
Reference data update strategy.
|
88
|
+
correction : {"bonferroni", "fdr"}
|
89
|
+
Multiple testing correction method.
|
90
|
+
n : int
|
91
|
+
Number of samples in the reference dataset.
|
92
|
+
"""
|
93
|
+
|
58
94
|
p_val: float
|
59
95
|
update_strategy: UpdateStrategy | None
|
60
96
|
correction: Literal["bonferroni", "fdr"]
|
@@ -83,19 +119,43 @@ class BaseDrift:
|
|
83
119
|
|
84
120
|
@property
|
85
121
|
def x_ref(self) -> NDArray[np.float32]:
|
86
|
-
"""
|
87
|
-
|
122
|
+
"""Reference data for drift detection.
|
123
|
+
|
124
|
+
Lazily encodes the reference dataset on first access.
|
125
|
+
Data is flattened and converted to 32-bit floating point for
|
126
|
+
consistent numerical processing across different input types.
|
88
127
|
|
89
128
|
Returns
|
90
129
|
-------
|
91
130
|
NDArray[np.float32]
|
92
|
-
|
131
|
+
Reference data as flattened 32-bit floating point array.
|
132
|
+
Shape is (n_samples, n_features_flattened).
|
133
|
+
|
134
|
+
Notes
|
135
|
+
-----
|
136
|
+
Data is cached after first access to avoid repeated encoding overhead.
|
93
137
|
"""
|
94
138
|
if self._x_ref is None:
|
95
139
|
self._x_ref = self._encode(self._data)
|
96
140
|
return self._x_ref
|
97
141
|
|
98
142
|
def _encode(self, data: Embeddings | Array) -> NDArray[np.float32]:
|
143
|
+
"""
|
144
|
+
Encode input data to consistent numpy format.
|
145
|
+
|
146
|
+
Handles different input types (Embeddings, Arrays) and converts
|
147
|
+
them to flattened 32-bit floating point arrays for drift detection.
|
148
|
+
|
149
|
+
Parameters
|
150
|
+
----------
|
151
|
+
data : Embeddings or Array
|
152
|
+
Input data to encode.
|
153
|
+
|
154
|
+
Returns
|
155
|
+
-------
|
156
|
+
NDArray[np.float32]
|
157
|
+
Encoded data as flattened 32-bit floating point array.
|
158
|
+
"""
|
99
159
|
array = (
|
100
160
|
data.to_numpy().astype(np.float32)
|
101
161
|
if isinstance(data, Embeddings)
|
@@ -107,6 +167,46 @@ class BaseDrift:
|
|
107
167
|
|
108
168
|
|
109
169
|
class BaseDriftUnivariate(BaseDrift):
|
170
|
+
"""
|
171
|
+
Base class for univariate drift detection algorithms.
|
172
|
+
|
173
|
+
Extends BaseDrift with feature-wise drift detection capabilities.
|
174
|
+
Applies statistical tests independently to each feature (pixel) and
|
175
|
+
uses multiple testing correction to control false discovery rates.
|
176
|
+
|
177
|
+
Parameters
|
178
|
+
----------
|
179
|
+
data : Embeddings or Array
|
180
|
+
Reference dataset used as baseline for drift detection.
|
181
|
+
p_val : float, default 0.05
|
182
|
+
Significance threshold for drift detection, between 0 and 1.
|
183
|
+
Default 0.05 limits false drift alerts to 5% when no drift exists (Type I error rate).
|
184
|
+
update_strategy : UpdateStrategy or None, default None
|
185
|
+
Strategy for updating reference data when new data arrives.
|
186
|
+
When None, reference data remains fixed throughout detection.
|
187
|
+
Default None maintains stable baseline for consistent comparison.
|
188
|
+
correction : {"bonferroni", "fdr"}, default "bonferroni"
|
189
|
+
Multiple testing correction method for controlling false positives
|
190
|
+
across multiple features. "bonferroni" divides significance level
|
191
|
+
by number of features. "fdr" uses Benjamini-Hochberg procedure.
|
192
|
+
Default "bonferroni" provides conservative family-wise error control.
|
193
|
+
n_features : int or None, default None
|
194
|
+
Number of features to analyze. When None, automatically inferred
|
195
|
+
from the first sample's flattened shape. Default None enables
|
196
|
+
automatic feature detection for flexible input handling.
|
197
|
+
|
198
|
+
Attributes
|
199
|
+
----------
|
200
|
+
p_val : float
|
201
|
+
Significance threshold for statistical tests.
|
202
|
+
update_strategy : UpdateStrategy or None
|
203
|
+
Reference data update strategy.
|
204
|
+
correction : {"bonferroni", "fdr"}
|
205
|
+
Multiple testing correction method.
|
206
|
+
n : int
|
207
|
+
Number of samples in the reference dataset.
|
208
|
+
"""
|
209
|
+
|
110
210
|
def __init__(
|
111
211
|
self,
|
112
212
|
data: Embeddings | Array,
|
@@ -121,16 +221,22 @@ class BaseDriftUnivariate(BaseDrift):
|
|
121
221
|
|
122
222
|
@property
|
123
223
|
def n_features(self) -> int:
|
124
|
-
"""
|
125
|
-
Get the number of features in the reference data.
|
224
|
+
"""Number of features in the reference data.
|
126
225
|
|
127
|
-
|
128
|
-
|
226
|
+
Lazily computes the number of features from the first data sample
|
227
|
+
if not provided during initialization. Features correspond to the
|
228
|
+
flattened dimensionality of the input data (e.g., pixels for images).
|
129
229
|
|
130
230
|
Returns
|
131
231
|
-------
|
132
232
|
int
|
133
|
-
Number of features in the reference data.
|
233
|
+
Number of features (flattened dimensions) in the reference data.
|
234
|
+
Always > 0 for valid datasets.
|
235
|
+
|
236
|
+
Notes
|
237
|
+
-----
|
238
|
+
For image data, this equals C x H x W.
|
239
|
+
Computed once and cached for efficiency.
|
134
240
|
"""
|
135
241
|
# lazy process n_features as needed
|
136
242
|
if self._n_features is None:
|
@@ -139,18 +245,27 @@ class BaseDriftUnivariate(BaseDrift):
|
|
139
245
|
return self._n_features
|
140
246
|
|
141
247
|
def score(self, data: Embeddings | Array) -> tuple[NDArray[np.float32], NDArray[np.float32]]:
|
142
|
-
"""
|
143
|
-
|
248
|
+
"""Calculate feature-wise p-values and test statistics.
|
249
|
+
|
250
|
+
Applies the detector's statistical test independently to each feature,
|
251
|
+
comparing the distribution of each feature between reference and test data.
|
144
252
|
|
145
253
|
Parameters
|
146
254
|
----------
|
147
255
|
data : Embeddings or Array
|
148
|
-
|
256
|
+
Test dataset to compare against reference data.
|
149
257
|
|
150
258
|
Returns
|
151
259
|
-------
|
152
|
-
tuple[NDArray, NDArray]
|
153
|
-
|
260
|
+
tuple[NDArray[np.float32], NDArray[np.float32]]
|
261
|
+
First array contains p-values for each feature (all between 0 and 1).
|
262
|
+
Second array contains test statistics for each feature (all >= 0).
|
263
|
+
Both arrays have shape (n_features,).
|
264
|
+
|
265
|
+
Notes
|
266
|
+
-----
|
267
|
+
Lower p-values indicate stronger evidence of drift for that feature.
|
268
|
+
Higher test statistics indicate greater distributional differences.
|
154
269
|
"""
|
155
270
|
x_np = self._encode(data)
|
156
271
|
p_val = np.zeros(self.n_features, dtype=np.float32)
|
@@ -164,22 +279,29 @@ class BaseDriftUnivariate(BaseDrift):
|
|
164
279
|
|
165
280
|
def _apply_correction(self, p_vals: NDArray[np.float32]) -> tuple[bool, float]:
|
166
281
|
"""
|
167
|
-
Apply
|
282
|
+
Apply multiple testing correction to feature-wise p-values.
|
168
283
|
|
169
|
-
|
170
|
-
|
171
|
-
|
284
|
+
Corrects for multiple comparisons across features to control
|
285
|
+
false positive rates. Bonferroni correction divides the significance
|
286
|
+
threshold by the number of features. FDR correction uses the
|
287
|
+
Benjamini-Hochberg procedure for less conservative control.
|
172
288
|
|
173
289
|
Parameters
|
174
290
|
----------
|
175
|
-
p_vals : NDArray
|
176
|
-
Array of p-values from
|
291
|
+
p_vals : NDArray[np.float32]
|
292
|
+
Array of p-values from univariate tests for each feature.
|
293
|
+
All values should be between 0 and 1.
|
177
294
|
|
178
295
|
Returns
|
179
296
|
-------
|
180
297
|
tuple[bool, float]
|
181
|
-
|
182
|
-
threshold
|
298
|
+
Boolean indicating whether drift was detected after correction.
|
299
|
+
Float is the effective threshold used for detection.
|
300
|
+
|
301
|
+
Notes
|
302
|
+
-----
|
303
|
+
Bonferroni correction: threshold = p_val / n_features
|
304
|
+
FDR correction: Uses Benjamini-Hochberg step-up procedure
|
183
305
|
"""
|
184
306
|
if self.correction == "bonferroni":
|
185
307
|
threshold = self.p_val / self.n_features
|
@@ -201,21 +323,24 @@ class BaseDriftUnivariate(BaseDrift):
|
|
201
323
|
@set_metadata
|
202
324
|
@update_strategy
|
203
325
|
def predict(self, data: Embeddings | Array) -> DriftOutput:
|
204
|
-
"""
|
205
|
-
|
206
|
-
|
326
|
+
"""Predict drift and update reference data using specified strategy.
|
327
|
+
|
328
|
+
Performs feature-wise drift detection, applies multiple testing
|
329
|
+
correction, and optionally updates the reference dataset based
|
330
|
+
on the configured update strategy.
|
207
331
|
|
208
332
|
Parameters
|
209
333
|
----------
|
210
334
|
data : Embeddings or Array
|
211
|
-
|
335
|
+
Test dataset to analyze for drift against reference data.
|
212
336
|
|
213
337
|
Returns
|
214
338
|
-------
|
215
339
|
DriftOutput
|
216
|
-
|
217
|
-
|
340
|
+
Complete drift detection results including overall :term:`drift<Drift>` prediction,
|
341
|
+
corrected thresholds, feature-level analysis, and summary :term:`statistics<Statistics>`.
|
218
342
|
"""
|
343
|
+
|
219
344
|
# compute drift scores
|
220
345
|
p_vals, dist = self.score(data)
|
221
346
|
|
dataeval/detectors/drift/_cvm.py
CHANGED
@@ -22,47 +22,66 @@ from dataeval.typing import Array
|
|
22
22
|
|
23
23
|
|
24
24
|
class DriftCVM(BaseDriftUnivariate):
|
25
|
-
"""
|
26
|
-
|
25
|
+
""":term:`Drift` detector using the :term:`Cramér-von Mises (CVM) Test`.
|
26
|
+
|
27
|
+
Detects distributional changes in continuous data by comparing empirical
|
28
|
+
cumulative distribution functions between reference and test datasets.
|
29
|
+
For multivariate data, applies CVM test independently to each feature
|
30
|
+
and aggregates results using either the Bonferroni or
|
31
|
+
:term:`False Discovery Rate (FDR)` correction.
|
27
32
|
|
28
|
-
The CVM test
|
29
|
-
|
30
|
-
|
31
|
-
:term:`False Discovery Rate (FDR)` corrections.
|
33
|
+
The CVM test is particularly effective at detecting subtle
|
34
|
+
distributional shifts throughout the entire domain, providing higher
|
35
|
+
power than Kolmogorov-Smirnov for many types of drift.
|
32
36
|
|
33
37
|
Parameters
|
34
38
|
----------
|
35
39
|
data : Embeddings or Array
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
40
|
+
Reference dataset used as baseline distribution for drift detection.
|
41
|
+
Should represent the expected data distribution.
|
42
|
+
p_val : float, default 0.05
|
43
|
+
Significance threshold for drift detection, between 0 and 1.
|
44
|
+
Default 0.05 limits false drift alerts to 5% when no drift exists (Type I error rate).
|
41
45
|
update_strategy : UpdateStrategy or None, default None
|
42
|
-
|
43
|
-
|
44
|
-
or via reservoir sampling with ReservoirSamplingUpdateStrategy.
|
46
|
+
Strategy for updating reference data when new data arrives.
|
47
|
+
When None, reference data remains fixed throughout detection.
|
45
48
|
correction : "bonferroni" or "fdr", default "bonferroni"
|
46
|
-
|
47
|
-
|
49
|
+
Multiple testing correction method for multivariate drift detection.
|
50
|
+
"bonferroni" provides conservative family-wise error control by
|
51
|
+
dividing significance threshold by number of features.
|
52
|
+
"fdr" uses Benjamini-Hochberg procedure for less conservative control.
|
53
|
+
Default "bonferroni" minimizes false positive drift detections.
|
48
54
|
n_features : int or None, default None
|
49
|
-
Number of features
|
50
|
-
|
51
|
-
|
55
|
+
Number of features to analyze in univariate tests.
|
56
|
+
When None, automatically inferred from the flattened shape of first data sample.
|
52
57
|
|
53
58
|
Example
|
54
59
|
-------
|
60
|
+
Basic drift detection with image embeddings
|
61
|
+
|
55
62
|
>>> from dataeval.data import Embeddings
|
63
|
+
>>> train_emb = Embeddings(train_images, model=encoder, batch_size=64)
|
64
|
+
>>> drift_detector = DriftCVM(train_emb)
|
56
65
|
|
57
|
-
|
66
|
+
Test incoming images for distributional drift
|
58
67
|
|
59
|
-
>>>
|
60
|
-
>>>
|
68
|
+
>>> result = drift_detector.predict(test_images)
|
69
|
+
>>> print(f"Drift detected: {result.drifted}")
|
70
|
+
Drift detected: True
|
71
|
+
|
72
|
+
>>> print(f"Mean CVM statistic: {result.distance:.4f}")
|
73
|
+
Mean CVM statistic: 24.1325
|
74
|
+
|
75
|
+
Using different correction methods
|
76
|
+
|
77
|
+
>>> drift_fdr = DriftCVM(train_emb, correction="fdr", p_val=0.1)
|
78
|
+
>>> result = drift_fdr.predict(test_images)
|
61
79
|
|
62
|
-
|
80
|
+
Access feature level results
|
63
81
|
|
64
|
-
>>>
|
65
|
-
|
82
|
+
>>> n_features = result.feature_drift
|
83
|
+
>>> print(f"Features showing drift: {n_features.sum()} / {len(n_features)}")
|
84
|
+
Features showing drift: 576 / 576
|
66
85
|
"""
|
67
86
|
|
68
87
|
def __init__(
|
dataeval/detectors/drift/_ks.py
CHANGED
@@ -22,49 +22,77 @@ from dataeval.typing import Array
|
|
22
22
|
|
23
23
|
|
24
24
|
class DriftKS(BaseDriftUnivariate):
|
25
|
-
"""
|
26
|
-
:term:`Drift` detector employing the :term:`Kolmogorov-Smirnov (KS) \
|
25
|
+
""":term:`Drift` detector employing the :term:`Kolmogorov-Smirnov (KS) \
|
27
26
|
distribution<Kolmogorov-Smirnov (K-S) test>` test.
|
28
27
|
|
29
|
-
|
30
|
-
|
31
|
-
|
28
|
+
Detects distributional changes by measuring the maximum distance between
|
29
|
+
empirical cumulative distribution functions of reference and test datasets.
|
30
|
+
For multivariate data, applies KS test independently to each feature
|
31
|
+
and aggregates results using multiple testing correction.
|
32
|
+
|
33
|
+
The Kolmogorov-Smirnov test is particularly sensitive to differences in
|
34
|
+
the middle portions of distributions but has reduced power in the tails
|
35
|
+
where cumulative distribution functions are constrained near 0 and 1.
|
32
36
|
|
33
37
|
Parameters
|
34
38
|
----------
|
35
39
|
data : Embeddings or Array
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
40
|
+
Reference dataset used as baseline distribution for drift detection.
|
41
|
+
Should represent the expected data distribution.
|
42
|
+
p_val : float, default 0.05
|
43
|
+
Significance threshold for drift detection, between 0 and 1.
|
44
|
+
Default 0.05 limits false drift alerts to 5% when no drift exists (Type I error rate).
|
41
45
|
update_strategy : UpdateStrategy or None, default None
|
42
|
-
|
43
|
-
|
44
|
-
or via reservoir sampling with ReservoirSamplingUpdateStrategy.
|
46
|
+
Strategy for updating reference data when new data arrives.
|
47
|
+
When None, reference data remains fixed throughout detection.
|
45
48
|
correction : "bonferroni" or "fdr", default "bonferroni"
|
46
|
-
|
47
|
-
|
49
|
+
Multiple testing correction method for multivariate drift detection.
|
50
|
+
"bonferroni" provides conservative family-wise error control by
|
51
|
+
dividing significance threshold by number of features.
|
52
|
+
"fdr" uses Benjamini-Hochberg procedure for less conservative control.
|
53
|
+
Default "bonferroni" minimizes false positive drift detections.
|
48
54
|
alternative : "two-sided", "less" or "greater", default "two-sided"
|
49
|
-
|
50
|
-
|
55
|
+
Alternative hypothesis for the statistical test. "two-sided" detects
|
56
|
+
any distributional difference. "less" tests if test distribution is
|
57
|
+
stochastically smaller. "greater" tests if test distribution is
|
58
|
+
stochastically larger. Default "two-sided" provides most general
|
59
|
+
drift detection without directional assumptions.
|
51
60
|
n_features : int | None, default None
|
52
|
-
Number of features
|
53
|
-
|
61
|
+
Number of features to analyze in univariate tests.
|
62
|
+
When None, automatically inferred from the flattened shape of first data sample.
|
54
63
|
|
55
64
|
Example
|
56
65
|
-------
|
57
|
-
|
58
|
-
|
59
|
-
Use Embeddings to encode images before testing for drift
|
66
|
+
Basic drift detection with image embeddings:
|
60
67
|
|
68
|
+
>>> from dataeval.data import Embeddings
|
61
69
|
>>> train_emb = Embeddings(train_images, model=encoder, batch_size=64)
|
62
|
-
>>>
|
63
|
-
|
64
|
-
Test incoming images for drift
|
65
|
-
|
66
|
-
>>>
|
67
|
-
|
70
|
+
>>> drift_detector = DriftKS(train_emb)
|
71
|
+
|
72
|
+
Test incoming images for distributional drift
|
73
|
+
|
74
|
+
>>> result = drift_detector.predict(test_images)
|
75
|
+
>>> print(f"Drift detected: {result.drifted}")
|
76
|
+
Drift detected: True
|
77
|
+
|
78
|
+
>>> print(f"Mean KS statistic: {result.distance:.4f}")
|
79
|
+
Mean KS statistic: 0.8750
|
80
|
+
|
81
|
+
Detect if test data has systematically higher values
|
82
|
+
|
83
|
+
>>> drift_greater = DriftKS(train_emb, alternative="greater")
|
84
|
+
>>> result = drift_greater.predict(test_images)
|
85
|
+
|
86
|
+
Using different correction methods
|
87
|
+
|
88
|
+
>>> drift_fdr = DriftKS(train_emb, correction="fdr", p_val=0.1)
|
89
|
+
>>> result = drift_fdr.predict(test_images)
|
90
|
+
|
91
|
+
Access feature-level results
|
92
|
+
|
93
|
+
>>> n_features = result.feature_drift
|
94
|
+
>>> print(f"Features showing drift: {n_features.sum()} / {len(n_features)}")
|
95
|
+
Features showing drift: 576 / 576
|
68
96
|
"""
|
69
97
|
|
70
98
|
def __init__(
|
dataeval/detectors/drift/_mmd.py
CHANGED
@@ -24,31 +24,57 @@ from dataeval.typing import Array
|
|
24
24
|
|
25
25
|
|
26
26
|
class DriftMMD(BaseDrift):
|
27
|
-
"""
|
28
|
-
|
29
|
-
|
27
|
+
"""Drift detector using :term:`Maximum Mean Discrepancy (MMD) Drift Detection` with permutation test.
|
28
|
+
|
29
|
+
Detects distributional differences by comparing kernel embeddings of reference
|
30
|
+
and test datasets in a reproducing kernel Hilbert space (RKHS). Uses permutation
|
31
|
+
testing to assess statistical significance of the observed MMD^2 statistic.
|
32
|
+
|
33
|
+
MMD is particularly effective for high-dimensional data like images as it can
|
34
|
+
capture complex distributional differences that univariate tests might miss.
|
35
|
+
The kernel-based approach enables detection of both marginal and dependency
|
36
|
+
changes between features.
|
30
37
|
|
31
38
|
Parameters
|
32
39
|
----------
|
33
40
|
data : Embeddings or Array
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
41
|
+
Reference dataset used as baseline distribution for drift detection.
|
42
|
+
Should represent the expected data distribution.
|
43
|
+
p_val : float, default 0.05
|
44
|
+
Significance threshold for statistical tests, between 0 and 1.
|
45
|
+
For FDR correction, this represents the acceptable false discovery rate.
|
46
|
+
Default 0.05 provides 95% confidence level for drift detection.
|
39
47
|
update_strategy : UpdateStrategy or None, default None
|
40
|
-
|
41
|
-
|
42
|
-
or via reservoir sampling with ReservoirSamplingUpdateStrategy.
|
48
|
+
Strategy for updating reference data when new data arrives.
|
49
|
+
When None, reference data remains fixed throughout detection.
|
43
50
|
sigma : Array or None, default None
|
44
|
-
|
45
|
-
|
46
|
-
|
51
|
+
Bandwidth parameter(s) for the Gaussian RBF kernel. Controls the
|
52
|
+
kernel's sensitivity to distance between data points. When None,
|
53
|
+
automatically selects bandwidth using median heuristic. Can provide
|
54
|
+
multiple values as array to average over different scales.
|
47
55
|
n_permutations : int, default 100
|
48
|
-
Number of permutations used in the permutation test
|
56
|
+
Number of random permutations used in the permutation test to estimate
|
57
|
+
the null distribution of MMD² under no drift. Higher values provide
|
58
|
+
more accurate p-value estimates but increase computation time.
|
59
|
+
Default 100 balances statistical accuracy with computational efficiency.
|
49
60
|
device : DeviceLike or None, default None
|
50
|
-
|
51
|
-
|
61
|
+
Hardware device for computation. When None, automatically selects
|
62
|
+
DataEval's configured device, falling back to PyTorch's default.
|
63
|
+
|
64
|
+
Attributes
|
65
|
+
----------
|
66
|
+
p_val : float
|
67
|
+
Significance threshold for statistical tests.
|
68
|
+
update_strategy : UpdateStrategy or None
|
69
|
+
Reference data update strategy.
|
70
|
+
n : int
|
71
|
+
Number of samples in the reference dataset.
|
72
|
+
sigma : Array or None
|
73
|
+
Gaussian RBF kernel bandwidth parameter(s).
|
74
|
+
n_permutations : int
|
75
|
+
Number of permutations for statistical testing.
|
76
|
+
device : torch.device
|
77
|
+
Hardware device used for computations.
|
52
78
|
|
53
79
|
Example
|
54
80
|
-------
|
@@ -56,7 +82,7 @@ class DriftMMD(BaseDrift):
|
|
56
82
|
|
57
83
|
Use Embeddings to encode images before testing for drift
|
58
84
|
|
59
|
-
>>> train_emb = Embeddings(train_images, model=encoder, batch_size=
|
85
|
+
>>> train_emb = Embeddings(train_images, model=encoder, batch_size=16)
|
60
86
|
>>> drift = DriftMMD(train_emb)
|
61
87
|
|
62
88
|
Test incoming images for drift
|
@@ -31,24 +31,42 @@ def classifier_uncertainty(
|
|
31
31
|
preds: Array,
|
32
32
|
preds_type: Literal["probs", "logits"] = "probs",
|
33
33
|
) -> torch.Tensor:
|
34
|
-
"""
|
35
|
-
|
34
|
+
"""Convert model predictions to uncertainty scores using entropy.
|
35
|
+
|
36
|
+
Computes prediction uncertainty as the entropy of the predicted class
|
37
|
+
probability distribution. Higher entropy indicates greater model uncertainty,
|
38
|
+
with maximum uncertainty at uniform distributions and minimum at confident
|
39
|
+
single-class predictions.
|
36
40
|
|
37
41
|
Parameters
|
38
42
|
----------
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
43
|
+
preds : Array
|
44
|
+
Model predictions for a batch of instances. For "probs" type, should
|
45
|
+
contain class probabilities that sum to 1 across the last dimension.
|
46
|
+
For "logits" type, contains raw model outputs before softmax.
|
47
|
+
preds_type : "probs" or "logits", default "probs"
|
48
|
+
Type of prediction values. "probs" expects probabilities in [0,1] that
|
49
|
+
sum to 1. "logits" expects raw outputs in [-inf,inf] and applies softmax.
|
50
|
+
Default "probs" assumes model outputs normalized probabilities.
|
47
51
|
|
48
52
|
Returns
|
49
53
|
-------
|
50
|
-
|
51
|
-
|
54
|
+
torch.Tensor
|
55
|
+
Uncertainty scores for each instance with shape (n_samples, 1).
|
56
|
+
Values are always >= 0, with higher values indicating greater uncertainty.
|
57
|
+
|
58
|
+
Raises
|
59
|
+
------
|
60
|
+
ValueError
|
61
|
+
If preds_type is "probs" but probabilities don't sum to 1 within tolerance.
|
62
|
+
NotImplementedError
|
63
|
+
If preds_type is not "probs" or "logits".
|
64
|
+
|
65
|
+
Notes
|
66
|
+
-----
|
67
|
+
Uncertainty is computed as Shannon entropy: -sum(p * log(p)) where p are
|
68
|
+
the predicted class probabilities. This provides a principled measure of
|
69
|
+
model confidence that is widely used in uncertainty quantification.
|
52
70
|
"""
|
53
71
|
preds_np = as_numpy(preds)
|
54
72
|
if preds_type == "probs":
|
@@ -65,53 +83,98 @@ def classifier_uncertainty(
|
|
65
83
|
|
66
84
|
|
67
85
|
class DriftUncertainty(BaseDrift):
|
68
|
-
"""
|
69
|
-
Test for a change in the number of instances falling into regions on which \
|
70
|
-
the model is uncertain.
|
86
|
+
"""Drift detector using model prediction uncertainty.
|
71
87
|
|
72
|
-
|
88
|
+
Detects drift by monitoring changes in the distribution of model prediction
|
89
|
+
uncertainties (entropy) rather than input features directly. Uses
|
90
|
+
:term:`Kolmogorov-Smirnov (K-S) Test` to compare uncertainty distributions
|
91
|
+
between reference and test data.
|
92
|
+
|
93
|
+
This approach is particularly effective for detecting drift that affects model
|
94
|
+
confidence even when input features remain statistically similar, such as
|
95
|
+
out-of-domain samples or adversarial examples.
|
73
96
|
|
74
97
|
Parameters
|
75
98
|
----------
|
76
|
-
data : Array
|
77
|
-
|
78
|
-
|
79
|
-
:term:`Classification` model outputting class probabilities (or logits)
|
99
|
+
data : Embeddings or Array
|
100
|
+
Reference dataset used as baseline distribution for drift detection.
|
101
|
+
Should represent the expected "normal" data distribution.
|
80
102
|
p_val : float, default 0.05
|
81
|
-
|
103
|
+
Significance threshold for statistical tests, between 0 and 1.
|
104
|
+
For FDR correction, this represents the acceptable false discovery rate.
|
105
|
+
Default 0.05 provides 95% confidence level for drift detection.
|
82
106
|
update_strategy : UpdateStrategy or None, default None
|
83
|
-
|
84
|
-
|
85
|
-
or via reservoir sampling with ReservoirSamplingUpdateStrategy.
|
107
|
+
Strategy for updating reference data when new data arrives.
|
108
|
+
When None, reference data remains fixed throughout detection.
|
86
109
|
correction : "bonferroni" or "fdr", default "bonferroni"
|
87
|
-
|
88
|
-
|
110
|
+
Multiple testing correction method for multivariate drift detection.
|
111
|
+
"bonferroni" provides conservative family-wise error control by
|
112
|
+
dividing significance threshold by number of features.
|
113
|
+
"fdr" uses Benjamini-Hochberg procedure for less conservative control.
|
114
|
+
Default "bonferroni" minimizes false positive drift detections.
|
89
115
|
preds_type : "probs" or "logits", default "probs"
|
90
|
-
|
91
|
-
|
116
|
+
Format of model prediction outputs. "probs" expects normalized
|
117
|
+
probabilities summing to 1. "logits" expects raw model outputs
|
118
|
+
and applies softmax normalization internally.
|
119
|
+
Default "probs" assumes standard classification model outputs.
|
92
120
|
batch_size : int, default 32
|
93
|
-
Batch size
|
94
|
-
|
121
|
+
Batch size for model inference during uncertainty computation.
|
122
|
+
Larger batches improve GPU utilization but require more memory.
|
123
|
+
Default 32 balances efficiency and memory usage.
|
95
124
|
transforms : Transform, Sequence[Transform] or None, default None
|
96
|
-
|
125
|
+
Data transformations applied before model inference. Should match
|
126
|
+
preprocessing used during model training for consistent predictions.
|
127
|
+
When None, uses raw input data without preprocessing.
|
97
128
|
device : DeviceLike or None, default None
|
98
|
-
|
99
|
-
|
129
|
+
Hardware device for computation. When None, automatically selects
|
130
|
+
DataEval's configured device, falling back to PyTorch's default.
|
131
|
+
|
132
|
+
Attributes
|
133
|
+
----------
|
134
|
+
model : torch.nn.Module
|
135
|
+
Classification model used for uncertainty computation.
|
136
|
+
device : torch.device
|
137
|
+
Hardware device used for model inference.
|
138
|
+
batch_size : int
|
139
|
+
Batch size for model predictions.
|
140
|
+
preds_type : {"probs", "logits"}
|
141
|
+
Format of model prediction outputs.
|
100
142
|
|
101
143
|
Example
|
102
144
|
-------
|
103
145
|
>>> model = ClassificationModel()
|
104
|
-
>>>
|
146
|
+
>>> drift_detector = DriftUncertainty(x_ref, model=model, batch_size=16)
|
105
147
|
|
106
148
|
Verify reference images have not drifted
|
107
149
|
|
108
|
-
>>>
|
109
|
-
|
150
|
+
>>> result = drift_detector.predict(x_test)
|
151
|
+
>>> print(f"Drift detected: {result.drifted}")
|
152
|
+
Drift detected: True
|
110
153
|
|
111
|
-
|
154
|
+
>>> print(f"Mean uncertainty change: {result.distance:.4f}")
|
155
|
+
Mean uncertainty change: 0.8160
|
112
156
|
|
113
|
-
|
114
|
-
|
157
|
+
With data preprocessing
|
158
|
+
|
159
|
+
>>> import torchvision.transforms.v2 as T
|
160
|
+
>>> transforms = T.Compose([T.ToDtype(torch.float32)])
|
161
|
+
>>> drift_detector = DriftUncertainty(x_ref, model=model, batch_size=16, transforms=transforms)
|
162
|
+
|
163
|
+
Notes
|
164
|
+
-----
|
165
|
+
Uncertainty-based drift detection is complementary to feature-based methods.
|
166
|
+
It can detect semantic drift (changes in data meaning) that may not be
|
167
|
+
apparent in raw feature statistics, making it valuable for monitoring
|
168
|
+
model performance in production environments.
|
169
|
+
|
170
|
+
The method assumes that model uncertainty is a reliable indicator of
|
171
|
+
data quality. This works best with well-calibrated models trained on
|
172
|
+
representative data. Poorly calibrated models may produce misleading
|
173
|
+
uncertainty estimates.
|
174
|
+
|
175
|
+
For optimal performance, ensure the model and transforms match those used
|
176
|
+
during training, and that the reference data represents the expected
|
177
|
+
operational distribution where the model performs reliably.
|
115
178
|
"""
|
116
179
|
|
117
180
|
def __init__(
|
@@ -142,27 +205,38 @@ class DriftUncertainty(BaseDrift):
|
|
142
205
|
)
|
143
206
|
|
144
207
|
def _transform(self, x: torch.Tensor) -> torch.Tensor:
|
208
|
+
"""Apply preprocessing transforms to input data."""
|
145
209
|
for transform in self._transforms:
|
146
210
|
x = transform(x)
|
147
211
|
return x
|
148
212
|
|
149
213
|
def _preprocess(self, x: Array) -> torch.Tensor:
|
214
|
+
"""Convert input data to uncertainty scores via model predictions."""
|
150
215
|
preds = predict_batch(x, self.model, self.device, self.batch_size, self._transform)
|
151
216
|
return classifier_uncertainty(preds, self.preds_type)
|
152
217
|
|
153
218
|
def predict(self, x: Array) -> DriftOutput:
|
154
|
-
"""
|
155
|
-
|
219
|
+
"""Predict whether model uncertainty distribution has drifted.
|
220
|
+
|
221
|
+
Computes prediction uncertainties for the input data and tests
|
222
|
+
whether their distribution significantly differs from the reference
|
223
|
+
uncertainty distribution using Kolmogorov-Smirnov test.
|
156
224
|
|
157
225
|
Parameters
|
158
226
|
----------
|
159
227
|
x : Array
|
160
|
-
Batch of instances.
|
228
|
+
Batch of instances to test for uncertainty drift.
|
161
229
|
|
162
230
|
Returns
|
163
231
|
-------
|
164
|
-
|
165
|
-
|
166
|
-
statistics.
|
232
|
+
DriftOutput
|
233
|
+
Drift detection results including overall prediction, p-values,
|
234
|
+
test statistics, and feature-level analysis of uncertainty values.
|
235
|
+
|
236
|
+
Notes
|
237
|
+
-----
|
238
|
+
The returned DriftOutput treats uncertainty values as "features" for
|
239
|
+
consistency with the underlying KS test implementation, even though
|
240
|
+
uncertainty-based drift typically involves univariate analysis.
|
167
241
|
"""
|
168
242
|
return self._detector.predict(self._preprocess(x).cpu().numpy())
|
dataeval/outputs/_drift.py
CHANGED
@@ -18,8 +18,28 @@ from dataeval.outputs._base import Output
|
|
18
18
|
|
19
19
|
@dataclass(frozen=True)
|
20
20
|
class DriftBaseOutput(Output):
|
21
|
-
"""
|
22
|
-
|
21
|
+
"""Base output class for drift detector classes.
|
22
|
+
|
23
|
+
Provides common fields returned by all drift detection methods, containing
|
24
|
+
instance-level drift predictions and summary statistics. Subclasses extend
|
25
|
+
this with detector-specific additional fields.
|
26
|
+
|
27
|
+
Attributes
|
28
|
+
----------
|
29
|
+
drifted : bool
|
30
|
+
Whether drift was detected in the analyzed data. True indicates
|
31
|
+
significant drift from reference distribution.
|
32
|
+
threshold : float
|
33
|
+
Significance threshold used for drift detection, typically between 0 and 1.
|
34
|
+
For multivariate methods, this is the corrected threshold after
|
35
|
+
Bonferroni or FDR correction.
|
36
|
+
p_val : float
|
37
|
+
Instance-level p-value from statistical test, between 0 and 1.
|
38
|
+
For univariate methods, this is the mean p-value across all features.
|
39
|
+
distance : float
|
40
|
+
Instance-level test statistic or distance metric, always >= 0.
|
41
|
+
For univariate methods, this is the mean distance across all features.
|
42
|
+
Higher values indicate greater deviation from reference distribution.
|
23
43
|
"""
|
24
44
|
|
25
45
|
drifted: bool
|
@@ -31,58 +51,76 @@ class DriftBaseOutput(Output):
|
|
31
51
|
@dataclass(frozen=True)
|
32
52
|
class DriftMMDOutput(DriftBaseOutput):
|
33
53
|
"""
|
34
|
-
Output class for :class:`.DriftMMD`
|
54
|
+
Output class for :class:`.DriftMMD` (Maximum Mean Discrepancy) drift detector.
|
55
|
+
|
56
|
+
Extends :class:`.DriftBaseOutput` with MMD-specific distance threshold information.
|
57
|
+
Used by MMD-based drift detectors that compare kernel embeddings between
|
58
|
+
reference and test distributions.
|
35
59
|
|
36
60
|
Attributes
|
37
61
|
----------
|
38
62
|
drifted : bool
|
39
|
-
|
63
|
+
Whether drift was detected based on MMD permutation test.
|
40
64
|
threshold : float
|
41
|
-
|
65
|
+
P-value threshold used for significance of the permutation test.
|
42
66
|
p_val : float
|
43
|
-
P-value obtained from the permutation test
|
67
|
+
P-value obtained from the MMD permutation test, between 0 and 1.
|
44
68
|
distance : float
|
45
|
-
|
69
|
+
Squared Maximum Mean Discrepancy between reference and test set.
|
70
|
+
Always >= 0, with higher values indicating greater distributional difference.
|
46
71
|
distance_threshold : float
|
47
|
-
|
72
|
+
Squared Maximum Mean Discrepancy threshold above which drift is flagged, always >= 0.
|
73
|
+
Determined from permutation test at specified significance level.
|
74
|
+
|
75
|
+
Notes
|
76
|
+
-----
|
77
|
+
MMD uses kernel methods to compare distributions in reproducing kernel
|
78
|
+
Hilbert spaces, making it effective for high-dimensional data like images.
|
48
79
|
"""
|
49
80
|
|
50
|
-
# drifted: bool
|
51
|
-
# threshold: float
|
52
|
-
# p_val: float
|
53
|
-
# distance: float
|
54
81
|
distance_threshold: float
|
55
82
|
|
56
83
|
|
57
84
|
@dataclass(frozen=True)
|
58
85
|
class DriftOutput(DriftBaseOutput):
|
59
|
-
"""
|
60
|
-
|
86
|
+
"""Output class for univariate drift detectors.
|
87
|
+
|
88
|
+
Extends :class:`.DriftBaseOutput` with feature-level (per-pixel) drift information.
|
89
|
+
Used by Kolmogorov-Smirnov, Cramér-von Mises, and uncertainty-based
|
90
|
+
drift detectors that analyze each feature independently.
|
61
91
|
|
62
92
|
Attributes
|
63
93
|
----------
|
64
94
|
drifted : bool
|
65
|
-
|
95
|
+
Overall drift prediction after multivariate correction.
|
66
96
|
threshold : float
|
67
|
-
|
97
|
+
Corrected threshold after Bonferroni or FDR correction for multiple testing.
|
68
98
|
p_val : float
|
69
|
-
|
99
|
+
Mean p-value across all features, between 0 and 1.
|
100
|
+
For descriptive purposes only; individual feature p-values are used
|
101
|
+
for drift detection decisions. Can appear high even when drifted=True
|
102
|
+
if only a subset of features show drift.
|
70
103
|
distance : float
|
71
|
-
|
72
|
-
feature_drift : NDArray
|
73
|
-
|
104
|
+
Mean test statistic across all features, always >= 0.
|
105
|
+
feature_drift : NDArray[bool]
|
106
|
+
Boolean array indicating which features (pixels) show drift.
|
107
|
+
Shape matches the number of features in the input data.
|
74
108
|
feature_threshold : float
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
109
|
+
Uncorrected p-value threshold used for individual feature testing.
|
110
|
+
Typically the original p_val before multivariate correction.
|
111
|
+
p_vals : NDArray[np.float32]
|
112
|
+
P-values for each feature, all values between 0 and 1.
|
113
|
+
Shape matches the number of features in the input data.
|
114
|
+
distances : NDArray[np.float32]
|
115
|
+
Test statistics for each feature, all values >= 0.
|
116
|
+
Shape matches the number of features in the input data.
|
117
|
+
|
118
|
+
Notes
|
119
|
+
-----
|
120
|
+
Feature-level analysis enables identification of specific pixels or regions
|
121
|
+
that contribute most to detected drift, useful for interpretability.
|
80
122
|
"""
|
81
123
|
|
82
|
-
# drifted: bool
|
83
|
-
# threshold: float
|
84
|
-
# p_val: float
|
85
|
-
# distance: float
|
86
124
|
feature_drift: NDArray[np.bool_]
|
87
125
|
feature_threshold: float
|
88
126
|
p_vals: NDArray[np.float32]
|
dataeval/outputs/_workflows.py
CHANGED
@@ -108,7 +108,13 @@ def plot_measure(
|
|
108
108
|
zorder=3,
|
109
109
|
)
|
110
110
|
else:
|
111
|
-
ax.scatter(
|
111
|
+
ax.scatter(
|
112
|
+
steps,
|
113
|
+
averaged_measure,
|
114
|
+
label=f"Model Results ({name})",
|
115
|
+
zorder=3,
|
116
|
+
c="black",
|
117
|
+
)
|
112
118
|
# Plot extrapolation
|
113
119
|
ax.plot(
|
114
120
|
projection,
|
@@ -149,7 +155,9 @@ def f_inv_out(y_i: NDArray[Any], x: NDArray[Any]) -> NDArray[np.int64]:
|
|
149
155
|
"Number of samples could not be determined for target(s): "
|
150
156
|
f"""{
|
151
157
|
np.array2string(
|
152
|
-
1 - y_i[unachievable_targets],
|
158
|
+
1 - y_i[unachievable_targets],
|
159
|
+
separator=", ",
|
160
|
+
formatter={"float": lambda x: f"{x}"},
|
153
161
|
)
|
154
162
|
}""",
|
155
163
|
UserWarning,
|
@@ -223,7 +231,9 @@ def calc_params(p_i: NDArray[Any], n_i: NDArray[Any], niter: int) -> NDArray[np.
|
|
223
231
|
|
224
232
|
|
225
233
|
def get_curve_params(
|
226
|
-
averaged_measures: MutableMapping[str, NDArray[Any]],
|
234
|
+
averaged_measures: MutableMapping[str, NDArray[Any]],
|
235
|
+
ranges: NDArray[Any],
|
236
|
+
niter: int,
|
227
237
|
) -> Mapping[str, NDArray[np.float64]]:
|
228
238
|
"""Calculates and aggregates parameters for both single and multi-class metrics"""
|
229
239
|
output = {}
|
@@ -324,7 +334,10 @@ class SufficiencyOutput(Output):
|
|
324
334
|
return proj
|
325
335
|
|
326
336
|
def plot(
|
327
|
-
self,
|
337
|
+
self,
|
338
|
+
class_names: Sequence[str] | None = None,
|
339
|
+
error_bars: bool = False,
|
340
|
+
asymptote: bool = False,
|
328
341
|
) -> Sequence[Figure]:
|
329
342
|
"""
|
330
343
|
Plotting function for data :term:`sufficience<Sufficiency>` tasks.
|
@@ -426,7 +439,8 @@ class SufficiencyOutput(Output):
|
|
426
439
|
projection[name] = np.zeros((len(measure), len(tarray)))
|
427
440
|
for i in range(len(measure)):
|
428
441
|
projection[name][i] = inv_project_steps(
|
429
|
-
self.params[name][i],
|
442
|
+
self.params[name][i],
|
443
|
+
tarray[i] if tarray.ndim == measure.ndim else tarray,
|
430
444
|
)
|
431
445
|
else:
|
432
446
|
projection[name] = inv_project_steps(self.params[name], tarray)
|
dataeval/typing.py
CHANGED
@@ -21,7 +21,7 @@ __all__ = [
|
|
21
21
|
]
|
22
22
|
|
23
23
|
|
24
|
-
from collections.abc import Iterator
|
24
|
+
from collections.abc import Iterator
|
25
25
|
from typing import (
|
26
26
|
Any,
|
27
27
|
Generic,
|
@@ -94,6 +94,7 @@ class Array(Protocol):
|
|
94
94
|
|
95
95
|
_T = TypeVar("_T")
|
96
96
|
_T_co = TypeVar("_T_co", covariant=True)
|
97
|
+
_T_cn = TypeVar("_T_cn", contravariant=True)
|
97
98
|
|
98
99
|
|
99
100
|
class DatasetMetadata(TypedDict, total=False):
|
@@ -128,6 +129,19 @@ class ModelMetadata(TypedDict, total=False):
|
|
128
129
|
index2label: NotRequired[ReadOnly[dict[int, str]]]
|
129
130
|
|
130
131
|
|
132
|
+
class DatumMetadata(TypedDict, total=False):
|
133
|
+
"""
|
134
|
+
Datum level metadata required for all `AnnotatedDataset` classes.
|
135
|
+
|
136
|
+
Attributes
|
137
|
+
----------
|
138
|
+
id : Required[str]
|
139
|
+
A unique identifier for the datum
|
140
|
+
"""
|
141
|
+
|
142
|
+
id: Required[ReadOnly[str]]
|
143
|
+
|
144
|
+
|
131
145
|
@runtime_checkable
|
132
146
|
class Dataset(Generic[_T_co], Protocol):
|
133
147
|
"""
|
@@ -173,7 +187,7 @@ class AnnotatedDataset(Dataset[_T_co], Generic[_T_co], Protocol):
|
|
173
187
|
# ========== IMAGE CLASSIFICATION DATASETS ==========
|
174
188
|
|
175
189
|
|
176
|
-
ImageClassificationDatum: TypeAlias = tuple[ArrayLike, ArrayLike,
|
190
|
+
ImageClassificationDatum: TypeAlias = tuple[ArrayLike, ArrayLike, DatumMetadata]
|
177
191
|
"""
|
178
192
|
Type alias for an image classification datum tuple.
|
179
193
|
|
@@ -213,7 +227,7 @@ class ObjectDetectionTarget(Protocol):
|
|
213
227
|
def scores(self) -> ArrayLike: ...
|
214
228
|
|
215
229
|
|
216
|
-
ObjectDetectionDatum: TypeAlias = tuple[ArrayLike, ObjectDetectionTarget,
|
230
|
+
ObjectDetectionDatum: TypeAlias = tuple[ArrayLike, ObjectDetectionTarget, DatumMetadata]
|
217
231
|
"""
|
218
232
|
Type alias for an object detection datum tuple.
|
219
233
|
|
@@ -254,7 +268,7 @@ class SegmentationTarget(Protocol):
|
|
254
268
|
def scores(self) -> ArrayLike: ...
|
255
269
|
|
256
270
|
|
257
|
-
SegmentationDatum: TypeAlias = tuple[ArrayLike, SegmentationTarget,
|
271
|
+
SegmentationDatum: TypeAlias = tuple[ArrayLike, SegmentationTarget, DatumMetadata]
|
258
272
|
"""
|
259
273
|
Type alias for an image classification datum tuple.
|
260
274
|
|
@@ -311,3 +325,8 @@ class Transform(Generic[_T], Protocol):
|
|
311
325
|
"""
|
312
326
|
|
313
327
|
def __call__(self, data: _T, /) -> _T: ...
|
328
|
+
|
329
|
+
|
330
|
+
@runtime_checkable
|
331
|
+
class Action(Generic[_T_cn, _T_co], Protocol):
|
332
|
+
def __call__(self, evaluator: _T_cn) -> _T_co: ...
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.89.0
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Project-URL: Homepage, https://dataeval.ai/
|
6
6
|
Project-URL: Repository, https://github.com/aria-ml/dataeval/
|
@@ -1,13 +1,13 @@
|
|
1
1
|
dataeval/__init__.py,sha256=aFzX3SLx8wgc763RY772P41ZLqeHcUHRKW9XAN0KfHQ,1793
|
2
2
|
dataeval/_log.py,sha256=Q2d6oqYKXyn1wkgMdNX9iswod4Jq0jPADShrCFVgJI0,374
|
3
|
-
dataeval/_version.py,sha256=
|
3
|
+
dataeval/_version.py,sha256=WrO2EvGpE352dBNSCRePHfYFYuFHG0OvoJpgjI_9VSQ,513
|
4
4
|
dataeval/config.py,sha256=lL73s_xa9pBxHHCnBKi59D_tl4vS7ig1rfWbIYkM_ac,3839
|
5
5
|
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
dataeval/typing.py,sha256=
|
6
|
+
dataeval/typing.py,sha256=pInHYviGxKbX4WQKVdB3CVtd5PRrTj6aH1SMONMDs3M,7854
|
7
7
|
dataeval/data/__init__.py,sha256=wzQ6uUFLNB3VJR0a2QnRBYwEmwXT93q0WpHu7FmFW1E,486
|
8
|
-
dataeval/data/_embeddings.py,sha256=
|
8
|
+
dataeval/data/_embeddings.py,sha256=Bnl7KLw7waMJNxwpS_-W5dJl0uu4tjBLUsbIXICyyjI,14741
|
9
9
|
dataeval/data/_images.py,sha256=9q0O5Zurf-5727rMC7DB_i3TtXcL67D9a5GGx5qlov8,3875
|
10
|
-
dataeval/data/_metadata.py,sha256
|
10
|
+
dataeval/data/_metadata.py,sha256=M5OobFuhxyksjWwvyV-1PnWjFUYHcNfjejgxa203d8s,24178
|
11
11
|
dataeval/data/_selection.py,sha256=4qI-GwSdEGiRCyr3kqxr6uOiyRRKsPBRzYHmpgdWLY0,5301
|
12
12
|
dataeval/data/_split.py,sha256=aCkXFvkCw8VkWICdCmY9tHiEvkQI5j9jUa7QLjm-gZE,16759
|
13
13
|
dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
|
@@ -20,12 +20,12 @@ dataeval/data/selections/_reverse.py,sha256=FqYlpPg-0Vz75kbEhGFrJlzIGELSmDZxPlBM
|
|
20
20
|
dataeval/data/selections/_shuffle.py,sha256=uW_Zss773ob2swqwTdL6G-CzMElCq8TO2TScvABQR1U,1268
|
21
21
|
dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
|
22
22
|
dataeval/detectors/drift/__init__.py,sha256=Jqv98oOVeC2tvHlNGxQ8RJ6De2q4SyS5lTpaYlb4ocM,756
|
23
|
-
dataeval/detectors/drift/_base.py,sha256=
|
24
|
-
dataeval/detectors/drift/_cvm.py,sha256=
|
25
|
-
dataeval/detectors/drift/_ks.py,sha256=
|
26
|
-
dataeval/detectors/drift/_mmd.py,sha256=
|
23
|
+
dataeval/detectors/drift/_base.py,sha256=w1sUQlfFy6Wi5xIXStpnBm7L_Cxmtprm7LoPQbcVMME,13037
|
24
|
+
dataeval/detectors/drift/_cvm.py,sha256=6E0-XIgVl40ivqBLqalMGTpG6sIGpC4AKOcLMoEpGE8,3990
|
25
|
+
dataeval/detectors/drift/_ks.py,sha256=65I4gNjpkxXHMukEMU26ctF-4uLvIzPq3TNMsp0_yFs,4736
|
26
|
+
dataeval/detectors/drift/_mmd.py,sha256=3oDFUUW6aVqs-T_Oxx-2iBa24H2hSN215lTEh8v-N7k,12943
|
27
27
|
dataeval/detectors/drift/_mvdc.py,sha256=WMN6aDOWCh1q1MtdRXFIZlFcfnVi4XgBHsS0A6L5UuY,2942
|
28
|
-
dataeval/detectors/drift/_uncertainty.py,sha256
|
28
|
+
dataeval/detectors/drift/_uncertainty.py,sha256=yAaoEnH231DnWCHyODsr2UCtOf8Shs6zSbvu0efkv2g,9950
|
29
29
|
dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
|
30
30
|
dataeval/detectors/drift/_nml/__init__.py,sha256=MNyKyZlfTjr5uQql2uBBfRkUdsuduie_WJdn09GYmqg,137
|
31
31
|
dataeval/detectors/drift/_nml/_base.py,sha256=wMqegfa92Tldqix1RL6dLMdiKgX0GqHmTiFxO38ja_c,2672
|
@@ -69,14 +69,14 @@ dataeval/metrics/stats/_visualstats.py,sha256=SbXvNWxfKrw-2wCu5FXMsnpsMUVaQzdJkj
|
|
69
69
|
dataeval/outputs/__init__.py,sha256=geHB5M3QOiFFaQGV4ZwDTTKpqZPvPePbqG7lzaPhaXQ,1741
|
70
70
|
dataeval/outputs/_base.py,sha256=lVC7xmBgv3JYY2wVLaGBMPlkRE_KV9UloaeQn0nQydA,5875
|
71
71
|
dataeval/outputs/_bias.py,sha256=gj2AgSKOdq6bj59RMiHpha4Skld6ZMB8cW5KesOZ6T4,10483
|
72
|
-
dataeval/outputs/_drift.py,sha256=
|
72
|
+
dataeval/outputs/_drift.py,sha256=_c41lUtEIg_NveYL9fxnYJA-nFqu09414Qb6XYYYFkU,7119
|
73
73
|
dataeval/outputs/_estimators.py,sha256=SUjur5jI6OU9C7GpsAuA_qqO1PRnS-8eZN-otsaV5q0,3120
|
74
74
|
dataeval/outputs/_linters.py,sha256=N4nP5HMoeN2zLndWzhoIT5QB1Ujxbs8Gx5pWPKhl3yc,6683
|
75
75
|
dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
|
76
76
|
dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
|
77
77
|
dataeval/outputs/_stats.py,sha256=PsDV0uw41aTy-X9tjz-PqOj78TTnH4JQVpOrU3OThAE,17423
|
78
78
|
dataeval/outputs/_utils.py,sha256=KJ1P8tcMFIkGi2A6VfqbZwLcT1cD0c2YssTbWbHALjE,938
|
79
|
-
dataeval/outputs/_workflows.py,sha256=
|
79
|
+
dataeval/outputs/_workflows.py,sha256=IdMjeyGKegJkChJWdyx8CiFneSaLx37voHR-X7O6qPk,13950
|
80
80
|
dataeval/utils/__init__.py,sha256=sjelzMPaTImF6isiRcp8UGDE3tppEpWS5GoR8WKPZ1k,242
|
81
81
|
dataeval/utils/_array.py,sha256=P4_gyH3kkksUJm9Vqx-oPtLWxFmqMacUJzhj0vmrUd8,6361
|
82
82
|
dataeval/utils/_bin.py,sha256=QjlRCB5mOauETdxSbvRxRG17riO6gScsMd_lNnnvqxs,7391
|
@@ -99,7 +99,7 @@ dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Z
|
|
99
99
|
dataeval/utils/torch/trainer.py,sha256=kBdgxd9TL1Pvz-dyZbS__POAKeFrDiQ4vKFh8ltJApc,5543
|
100
100
|
dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
|
101
101
|
dataeval/workflows/sufficiency.py,sha256=m3Z8VquGxefai6nOqoMveYA1XAA_mUf_IL21W-enyxQ,10274
|
102
|
-
dataeval-0.
|
103
|
-
dataeval-0.
|
104
|
-
dataeval-0.
|
105
|
-
dataeval-0.
|
102
|
+
dataeval-0.89.0.dist-info/METADATA,sha256=X0YdtdEe7XBWqlUlEESHjD3gibKvyij-8tstduSwiLc,5601
|
103
|
+
dataeval-0.89.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
104
|
+
dataeval-0.89.0.dist-info/licenses/LICENSE,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
|
105
|
+
dataeval-0.89.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|