dataeval 0.70.0__py3-none-any.whl → 0.70.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +6 -6
- dataeval/_internal/datasets.py +235 -131
- dataeval/_internal/detectors/clusterer.py +2 -0
- dataeval/_internal/detectors/drift/base.py +2 -2
- dataeval/_internal/detectors/drift/mmd.py +1 -1
- dataeval/_internal/detectors/duplicates.py +2 -0
- dataeval/_internal/detectors/ood/ae.py +5 -3
- dataeval/_internal/detectors/ood/aegmm.py +6 -4
- dataeval/_internal/detectors/ood/base.py +12 -7
- dataeval/_internal/detectors/ood/llr.py +6 -4
- dataeval/_internal/detectors/ood/vae.py +5 -3
- dataeval/_internal/detectors/ood/vaegmm.py +6 -4
- dataeval/_internal/detectors/outliers.py +4 -2
- dataeval/_internal/metrics/balance.py +4 -2
- dataeval/_internal/metrics/ber.py +2 -0
- dataeval/_internal/metrics/coverage.py +4 -0
- dataeval/_internal/metrics/divergence.py +6 -2
- dataeval/_internal/metrics/diversity.py +8 -6
- dataeval/_internal/metrics/parity.py +8 -6
- dataeval/_internal/metrics/stats/base.py +2 -2
- dataeval/_internal/metrics/stats/datasetstats.py +2 -0
- dataeval/_internal/metrics/stats/dimensionstats.py +2 -0
- dataeval/_internal/metrics/stats/hashstats.py +2 -0
- dataeval/_internal/metrics/stats/labelstats.py +1 -1
- dataeval/_internal/metrics/stats/pixelstats.py +4 -2
- dataeval/_internal/metrics/stats/visualstats.py +4 -2
- dataeval/_internal/metrics/uap.py +6 -2
- dataeval/_internal/metrics/utils.py +2 -2
- dataeval/_internal/models/pytorch/autoencoder.py +5 -5
- dataeval/_internal/models/tensorflow/pixelcnn.py +1 -4
- dataeval/_internal/utils.py +11 -16
- dataeval/_internal/workflows/sufficiency.py +44 -33
- dataeval/detectors/__init__.py +4 -0
- dataeval/detectors/drift/__init__.py +8 -3
- dataeval/detectors/drift/kernels/__init__.py +4 -0
- dataeval/detectors/drift/updates/__init__.py +4 -0
- dataeval/detectors/linters/__init__.py +15 -4
- dataeval/detectors/ood/__init__.py +14 -2
- dataeval/metrics/__init__.py +5 -0
- dataeval/metrics/bias/__init__.py +13 -4
- dataeval/metrics/estimators/__init__.py +8 -8
- dataeval/metrics/stats/__init__.py +17 -6
- dataeval/utils/__init__.py +16 -3
- dataeval/utils/tensorflow/__init__.py +11 -0
- dataeval/utils/torch/__init__.py +12 -0
- dataeval/utils/torch/datasets/__init__.py +7 -0
- dataeval/workflows/__init__.py +4 -0
- {dataeval-0.70.0.dist-info → dataeval-0.70.1.dist-info}/METADATA +10 -2
- dataeval-0.70.1.dist-info/RECORD +80 -0
- dataeval/tensorflow/__init__.py +0 -3
- dataeval/torch/__init__.py +0 -3
- dataeval-0.70.0.dist-info/RECORD +0 -79
- /dataeval/{tensorflow → utils/tensorflow}/loss/__init__.py +0 -0
- /dataeval/{tensorflow → utils/tensorflow}/models/__init__.py +0 -0
- /dataeval/{tensorflow → utils/tensorflow}/recon/__init__.py +0 -0
- /dataeval/{torch → utils/torch}/models/__init__.py +0 -0
- /dataeval/{torch → utils/torch}/trainer/__init__.py +0 -0
- {dataeval-0.70.0.dist-info → dataeval-0.70.1.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.70.0.dist-info → dataeval-0.70.1.dist-info}/WHEEL +0 -0
@@ -10,7 +10,7 @@ from __future__ import annotations
|
|
10
10
|
|
11
11
|
from abc import ABC, abstractmethod
|
12
12
|
from dataclasses import dataclass
|
13
|
-
from typing import Callable, Literal,
|
13
|
+
from typing import Callable, Literal, cast
|
14
14
|
|
15
15
|
import keras
|
16
16
|
import numpy as np
|
@@ -26,6 +26,9 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
26
26
|
@dataclass(frozen=True)
|
27
27
|
class OODOutput(OutputMetadata):
|
28
28
|
"""
|
29
|
+
Output class for predictions from :class:`OOD_AE`, :class:`OOD_AEGMM`, :class:`OOD_LLR`,
|
30
|
+
:class:`OOD_VAE`, and :class:`OOD_VAEGMM` out-of-distribution detectors
|
31
|
+
|
29
32
|
Attributes
|
30
33
|
----------
|
31
34
|
is_ood : NDArray
|
@@ -41,9 +44,11 @@ class OODOutput(OutputMetadata):
|
|
41
44
|
feature_score: NDArray[np.float32] | None
|
42
45
|
|
43
46
|
|
44
|
-
|
47
|
+
@dataclass(frozen=True)
|
48
|
+
class OODScoreOutput(OutputMetadata):
|
45
49
|
"""
|
46
|
-
|
50
|
+
Output class for instance and feature scores from :class:`OOD_AE`, :class:`OOD_AEGMM`,
|
51
|
+
:class:`OOD_LLR`, :class:`OOD_VAE`, and :class:`OOD_VAEGMM` out-of-distribution detectors
|
47
52
|
|
48
53
|
Parameters
|
49
54
|
----------
|
@@ -76,7 +81,7 @@ class OODBase(ABC):
|
|
76
81
|
def __init__(self, model: keras.Model) -> None:
|
77
82
|
self.model = model
|
78
83
|
|
79
|
-
self._ref_score:
|
84
|
+
self._ref_score: OODScoreOutput
|
80
85
|
self._threshold_perc: float
|
81
86
|
self._data_info: tuple[tuple, type] | None = None
|
82
87
|
|
@@ -102,7 +107,7 @@ class OODBase(ABC):
|
|
102
107
|
self._validate(X)
|
103
108
|
|
104
109
|
@abstractmethod
|
105
|
-
def score(self, X: ArrayLike, batch_size: int = int(1e10)) ->
|
110
|
+
def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
106
111
|
"""
|
107
112
|
Compute the out-of-distribution (OOD) scores for a given dataset.
|
108
113
|
|
@@ -116,7 +121,7 @@ class OODBase(ABC):
|
|
116
121
|
|
117
122
|
Returns
|
118
123
|
-------
|
119
|
-
|
124
|
+
OODScoreOutput
|
120
125
|
An object containing the instance-level and feature-level OOD scores.
|
121
126
|
"""
|
122
127
|
|
@@ -197,7 +202,7 @@ class OODBase(ABC):
|
|
197
202
|
# compute outlier scores
|
198
203
|
score = self.score(X, batch_size=batch_size)
|
199
204
|
ood_pred = score.get(ood_type) > self._threshold_score(ood_type)
|
200
|
-
return OODOutput(is_ood=ood_pred, **score.
|
205
|
+
return OODOutput(is_ood=ood_pred, **score.dict())
|
201
206
|
|
202
207
|
|
203
208
|
class OODGMMBase(OODBase):
|
@@ -18,11 +18,12 @@ from keras.layers import Input
|
|
18
18
|
from keras.models import Model
|
19
19
|
from numpy.typing import ArrayLike, NDArray
|
20
20
|
|
21
|
-
from dataeval._internal.detectors.ood.base import OODBase,
|
21
|
+
from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
|
22
22
|
from dataeval._internal.interop import to_numpy
|
23
23
|
from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
|
24
24
|
from dataeval._internal.models.tensorflow.trainer import trainer
|
25
25
|
from dataeval._internal.models.tensorflow.utils import predict_batch
|
26
|
+
from dataeval._internal.output import set_metadata
|
26
27
|
|
27
28
|
|
28
29
|
def build_model(
|
@@ -124,7 +125,7 @@ class OOD_LLR(OODBase):
|
|
124
125
|
self.sequential = sequential
|
125
126
|
self.log_prob = log_prob
|
126
127
|
|
127
|
-
self._ref_score:
|
128
|
+
self._ref_score: OODScoreOutput
|
128
129
|
self._threshold_perc: float
|
129
130
|
self._data_info: tuple[tuple, type] | None = None
|
130
131
|
|
@@ -279,12 +280,13 @@ class OOD_LLR(OODBase):
|
|
279
280
|
logp_b = logp_fn(self.dist_b, X, return_per_feature=return_per_feature, batch_size=batch_size)
|
280
281
|
return logp_s - logp_b
|
281
282
|
|
283
|
+
@set_metadata("dataeval.detectors")
|
282
284
|
def score(
|
283
285
|
self,
|
284
286
|
X: ArrayLike,
|
285
287
|
batch_size: int = int(1e10),
|
286
|
-
) ->
|
288
|
+
) -> OODScoreOutput:
|
287
289
|
self._validate(X := to_numpy(X))
|
288
290
|
fscore = -self._llr(X, True, batch_size=batch_size)
|
289
291
|
iscore = -self._llr(X, False, batch_size=batch_size)
|
290
|
-
return
|
292
|
+
return OODScoreOutput(iscore, fscore)
|
@@ -15,11 +15,12 @@ import numpy as np
|
|
15
15
|
import tensorflow as tf
|
16
16
|
from numpy.typing import ArrayLike
|
17
17
|
|
18
|
-
from dataeval._internal.detectors.ood.base import OODBase,
|
18
|
+
from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
|
19
19
|
from dataeval._internal.interop import to_numpy
|
20
20
|
from dataeval._internal.models.tensorflow.autoencoder import VAE
|
21
21
|
from dataeval._internal.models.tensorflow.losses import Elbo
|
22
22
|
from dataeval._internal.models.tensorflow.utils import predict_batch
|
23
|
+
from dataeval._internal.output import set_metadata
|
23
24
|
|
24
25
|
|
25
26
|
class OOD_VAE(OODBase):
|
@@ -67,7 +68,8 @@ class OOD_VAE(OODBase):
|
|
67
68
|
loss_fn = Elbo(0.05)
|
68
69
|
super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
|
69
70
|
|
70
|
-
|
71
|
+
@set_metadata("dataeval.detectors")
|
72
|
+
def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
71
73
|
self._validate(X := to_numpy(X))
|
72
74
|
|
73
75
|
# sample reconstructed instances
|
@@ -86,4 +88,4 @@ class OOD_VAE(OODBase):
|
|
86
88
|
sorted_fscore_perc = sorted_fscore[:, -n_score_features:]
|
87
89
|
iscore = np.mean(sorted_fscore_perc, axis=1)
|
88
90
|
|
89
|
-
return
|
91
|
+
return OODScoreOutput(iscore, fscore)
|
@@ -15,12 +15,13 @@ import numpy as np
|
|
15
15
|
import tensorflow as tf
|
16
16
|
from numpy.typing import ArrayLike
|
17
17
|
|
18
|
-
from dataeval._internal.detectors.ood.base import OODGMMBase,
|
18
|
+
from dataeval._internal.detectors.ood.base import OODGMMBase, OODScoreOutput
|
19
19
|
from dataeval._internal.interop import to_numpy
|
20
20
|
from dataeval._internal.models.tensorflow.autoencoder import VAEGMM
|
21
21
|
from dataeval._internal.models.tensorflow.gmm import gmm_energy
|
22
22
|
from dataeval._internal.models.tensorflow.losses import Elbo, LossGMM
|
23
23
|
from dataeval._internal.models.tensorflow.utils import predict_batch
|
24
|
+
from dataeval._internal.output import set_metadata
|
24
25
|
|
25
26
|
|
26
27
|
class OOD_VAEGMM(OODGMMBase):
|
@@ -53,7 +54,8 @@ class OOD_VAEGMM(OODGMMBase):
|
|
53
54
|
loss_fn = LossGMM(elbo=Elbo(0.05))
|
54
55
|
super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
|
55
56
|
|
56
|
-
|
57
|
+
@set_metadata("dataeval.detectors")
|
58
|
+
def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
57
59
|
"""
|
58
60
|
Compute the out-of-distribution (OOD) score for a given dataset.
|
59
61
|
|
@@ -67,7 +69,7 @@ class OOD_VAEGMM(OODGMMBase):
|
|
67
69
|
|
68
70
|
Returns
|
69
71
|
-------
|
70
|
-
|
72
|
+
OODScoreOutput
|
71
73
|
An object containing the instance-level OOD score.
|
72
74
|
|
73
75
|
Note
|
@@ -84,4 +86,4 @@ class OOD_VAEGMM(OODGMMBase):
|
|
84
86
|
energy, _ = gmm_energy(z, self.gmm_params, return_mean=False)
|
85
87
|
energy_samples = energy.numpy().reshape((-1, self.samples)) # type: ignore
|
86
88
|
iscore = np.mean(energy_samples, axis=-1)
|
87
|
-
return
|
89
|
+
return OODScoreOutput(iscore)
|
@@ -22,6 +22,8 @@ TIndexIssueMap = TypeVar("TIndexIssueMap", IndexIssueMap, list[IndexIssueMap])
|
|
22
22
|
@dataclass(frozen=True)
|
23
23
|
class OutliersOutput(Generic[TIndexIssueMap], OutputMetadata):
|
24
24
|
"""
|
25
|
+
Output class for :class:`Outliers` lint detector
|
26
|
+
|
25
27
|
Attributes
|
26
28
|
----------
|
27
29
|
issues : dict[int, dict[str, float]] | list[dict[int, dict[str, float]]]
|
@@ -86,8 +88,8 @@ class Outliers:
|
|
86
88
|
--------
|
87
89
|
Duplicates
|
88
90
|
|
89
|
-
|
90
|
-
|
91
|
+
Note
|
92
|
+
----
|
91
93
|
There are 3 different statistical methods:
|
92
94
|
|
93
95
|
- zscore
|
@@ -15,6 +15,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
15
15
|
@dataclass(frozen=True)
|
16
16
|
class BalanceOutput(OutputMetadata):
|
17
17
|
"""
|
18
|
+
Output class for :func:`balance` bias metric
|
19
|
+
|
18
20
|
Attributes
|
19
21
|
----------
|
20
22
|
balance : NDArray[np.float64]
|
@@ -71,8 +73,8 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
|
|
71
73
|
(num_factors+1) x (num_factors+1) estimate of mutual information
|
72
74
|
between num_factors metadata factors and class label. Symmetry is enforced.
|
73
75
|
|
74
|
-
|
75
|
-
|
76
|
+
Note
|
77
|
+
----
|
76
78
|
We use `mutual_info_classif` from sklearn since class label is categorical.
|
77
79
|
`mutual_info_classif` outputs are consistent up to O(1e-4) and depend on a random
|
78
80
|
seed. MI is computed differently for categorical and continuous variables, and
|
@@ -1,3 +1,5 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
import math
|
2
4
|
from dataclasses import dataclass
|
3
5
|
from typing import Literal
|
@@ -14,6 +16,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
14
16
|
@dataclass(frozen=True)
|
15
17
|
class CoverageOutput(OutputMetadata):
|
16
18
|
"""
|
19
|
+
Output class for :func:`coverage` bias metric
|
20
|
+
|
17
21
|
Attributes
|
18
22
|
----------
|
19
23
|
indices : NDArray
|
@@ -3,6 +3,8 @@ This module contains the implementation of HP Divergence
|
|
3
3
|
using the Fast Nearest Neighbor and Minimum Spanning Tree algorithms
|
4
4
|
"""
|
5
5
|
|
6
|
+
from __future__ import annotations
|
7
|
+
|
6
8
|
from dataclasses import dataclass
|
7
9
|
from typing import Literal
|
8
10
|
|
@@ -17,6 +19,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
17
19
|
@dataclass(frozen=True)
|
18
20
|
class DivergenceOutput(OutputMetadata):
|
19
21
|
"""
|
22
|
+
Output class for :func:`divergence` estimator metric
|
23
|
+
|
20
24
|
Attributes
|
21
25
|
----------
|
22
26
|
divergence : float
|
@@ -96,8 +100,8 @@ def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST
|
|
96
100
|
DivergenceOutput
|
97
101
|
The divergence value (0.0..1.0) and the number of differing edges between the datasets
|
98
102
|
|
99
|
-
|
100
|
-
|
103
|
+
Note
|
104
|
+
----
|
101
105
|
The divergence value indicates how similar the 2 datasets are
|
102
106
|
with 0 indicating approximately identical data distributions.
|
103
107
|
|
@@ -13,6 +13,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
13
13
|
@dataclass(frozen=True)
|
14
14
|
class DiversityOutput(OutputMetadata):
|
15
15
|
"""
|
16
|
+
Output class for :func:`diversity` bias metric
|
17
|
+
|
16
18
|
Attributes
|
17
19
|
----------
|
18
20
|
diversity_index : NDArray[np.float64]
|
@@ -52,8 +54,8 @@ def diversity_shannon(
|
|
52
54
|
subset_mask: NDArray[np.bool_] | None
|
53
55
|
Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
|
54
56
|
|
55
|
-
|
56
|
-
|
57
|
+
Note
|
58
|
+
----
|
57
59
|
For continuous variables, histogram bins are chosen automatically. See `numpy.histogram` for details.
|
58
60
|
|
59
61
|
Returns
|
@@ -103,8 +105,8 @@ def diversity_simpson(
|
|
103
105
|
subset_mask: NDArray[np.bool_] | None
|
104
106
|
Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
|
105
107
|
|
106
|
-
|
107
|
-
|
108
|
+
Note
|
109
|
+
----
|
108
110
|
For continuous variables, histogram bins are chosen automatically. See
|
109
111
|
numpy.histogram for details.
|
110
112
|
If there is only one category, the diversity index takes a value of 0.
|
@@ -162,8 +164,8 @@ def diversity(
|
|
162
164
|
method: Literal["shannon", "simpson"], default "simpson"
|
163
165
|
Indicates which diversity index should be computed
|
164
166
|
|
165
|
-
|
166
|
-
|
167
|
+
Note
|
168
|
+
----
|
167
169
|
- For continuous variables, histogram bins are chosen automatically. See numpy.histogram for details.
|
168
170
|
- The expression is undefined for q=1, but it approaches the Shannon entropy in the limit.
|
169
171
|
- If there is only one category, the diversity index takes a value of 1 = 1/N = 1/1. Entropy will take a value of 0.
|
@@ -17,6 +17,8 @@ TData = TypeVar("TData", np.float64, NDArray[np.float64])
|
|
17
17
|
@dataclass(frozen=True)
|
18
18
|
class ParityOutput(Generic[TData], OutputMetadata):
|
19
19
|
"""
|
20
|
+
Output class for :func:`parity` and :func:`label_parity` bias metrics
|
21
|
+
|
20
22
|
Attributes
|
21
23
|
----------
|
22
24
|
score : np.float64 | NDArray[np.float64]
|
@@ -137,8 +139,8 @@ def normalize_expected_dist(expected_dist: NDArray, observed_dist: NDArray) -> N
|
|
137
139
|
ValueError
|
138
140
|
If the expected distribution is all zeros.
|
139
141
|
|
140
|
-
|
141
|
-
|
142
|
+
Note
|
143
|
+
----
|
142
144
|
The function ensures that the total number of labels in the expected distribution matches the total
|
143
145
|
number of labels in the observed distribution by scaling the expected distribution.
|
144
146
|
"""
|
@@ -224,8 +226,8 @@ def label_parity(
|
|
224
226
|
of unique classes between the observed and expected distributions.
|
225
227
|
|
226
228
|
|
227
|
-
|
228
|
-
|
229
|
+
Note
|
230
|
+
----
|
229
231
|
- Providing ``num_classes`` can be helpful if there are classes with zero instances in one of the distributions.
|
230
232
|
- The function first validates the observed distribution and normalizes the expected distribution so that it
|
231
233
|
has the same total number of labels as the observed distribution.
|
@@ -317,8 +319,8 @@ def parity(
|
|
317
319
|
factor values either 0 times or at least 5 times. Alternatively, continuous-valued factors can be digitized
|
318
320
|
into fewer bins.
|
319
321
|
|
320
|
-
|
321
|
-
|
322
|
+
Note
|
323
|
+
----
|
322
324
|
- Each key of the ``continuous_factor_bincounts`` dictionary must occur as a key in data_factors.
|
323
325
|
- A high score with a low p-value suggests that a metadata factor is strongly correlated with a class label.
|
324
326
|
- The function creates a contingency matrix for each factor, where each entry represents the frequency of a
|
@@ -179,8 +179,8 @@ def run_stats(
|
|
179
179
|
The dictionary keys correspond to the names of the statistics, and the values are NumPy arrays
|
180
180
|
with the results of the computations.
|
181
181
|
|
182
|
-
|
183
|
-
|
182
|
+
Note
|
183
|
+
----
|
184
184
|
- The function performs image normalization (rescaling the image values)
|
185
185
|
before applying some of the statistics.
|
186
186
|
- Pixel-level statistics (e.g., brightness, entropy) are computed after
|
@@ -16,6 +16,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
16
16
|
@dataclass(frozen=True)
|
17
17
|
class DatasetStatsOutput(OutputMetadata):
|
18
18
|
"""
|
19
|
+
Output class for :func:`datasetstats` stats metric
|
20
|
+
|
19
21
|
This class represents the outputs of various stats functions against a single
|
20
22
|
dataset, such that each index across all stat outputs are representative of
|
21
23
|
the same source image. Modifying or mixing outputs will result in inaccurate
|
@@ -13,7 +13,7 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
13
13
|
@dataclass(frozen=True)
|
14
14
|
class LabelStatsOutput(OutputMetadata):
|
15
15
|
"""
|
16
|
-
Output class for
|
16
|
+
Output class for :func:`labelstats` stats metric
|
17
17
|
|
18
18
|
Attributes
|
19
19
|
----------
|
@@ -36,6 +36,8 @@ class PixelStatsProcessor(StatsProcessor):
|
|
36
36
|
@dataclass(frozen=True)
|
37
37
|
class PixelStatsOutput(BaseStatsOutput):
|
38
38
|
"""
|
39
|
+
Output class for :func:`pixelstats` stats metric
|
40
|
+
|
39
41
|
Attributes
|
40
42
|
----------
|
41
43
|
mean : NDArray[np.float16]
|
@@ -93,8 +95,8 @@ def pixelstats(
|
|
93
95
|
--------
|
94
96
|
dimensionstats, visualstats, Outliers
|
95
97
|
|
96
|
-
|
97
|
-
|
98
|
+
Note
|
99
|
+
----
|
98
100
|
- All metrics are scaled based on the perceived bit depth (which is derived from the largest pixel value)
|
99
101
|
to allow for better comparison between images stored in different formats and different resolutions.
|
100
102
|
|
@@ -43,6 +43,8 @@ class VisualStatsProcessor(StatsProcessor):
|
|
43
43
|
@dataclass(frozen=True)
|
44
44
|
class VisualStatsOutput(BaseStatsOutput):
|
45
45
|
"""
|
46
|
+
Output class for :func:`visualstats` stats metric
|
47
|
+
|
46
48
|
Attributes
|
47
49
|
----------
|
48
50
|
brightness : NDArray[np.float16]
|
@@ -100,8 +102,8 @@ def visualstats(
|
|
100
102
|
--------
|
101
103
|
dimensionstats, pixelstats, Outliers
|
102
104
|
|
103
|
-
|
104
|
-
|
105
|
+
Note
|
106
|
+
----
|
105
107
|
- `zeros` and `missing` are presented as a percentage of total pixel counts
|
106
108
|
|
107
109
|
Examples
|
@@ -4,6 +4,8 @@ FR Test Statistic based estimate for the upperbound
|
|
4
4
|
average precision using empirical mean precision
|
5
5
|
"""
|
6
6
|
|
7
|
+
from __future__ import annotations
|
8
|
+
|
7
9
|
from dataclasses import dataclass
|
8
10
|
|
9
11
|
from numpy.typing import ArrayLike
|
@@ -16,6 +18,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
16
18
|
@dataclass(frozen=True)
|
17
19
|
class UAPOutput(OutputMetadata):
|
18
20
|
"""
|
21
|
+
Output class for :func:`uap` estimator metric
|
22
|
+
|
19
23
|
Attributes
|
20
24
|
----------
|
21
25
|
uap : float
|
@@ -48,8 +52,8 @@ def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
|
|
48
52
|
ValueError
|
49
53
|
If unique classes M < 2
|
50
54
|
|
51
|
-
|
52
|
-
|
55
|
+
Note
|
56
|
+
----
|
53
57
|
This function calculates the empirical mean precision using the
|
54
58
|
``average_precision_score`` from scikit-learn, weighted by the class distribution.
|
55
59
|
|
@@ -91,8 +91,8 @@ def entropy(
|
|
91
91
|
subset_mask: NDArray[np.bool_] | None
|
92
92
|
Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
|
93
93
|
|
94
|
-
|
95
|
-
|
94
|
+
Note
|
95
|
+
----
|
96
96
|
For continuous variables, histogram bins are chosen automatically. See
|
97
97
|
numpy.histogram for details.
|
98
98
|
|
@@ -61,9 +61,9 @@ class AETrainer:
|
|
61
61
|
List[float]
|
62
62
|
A list of average loss values for each epoch.
|
63
63
|
|
64
|
-
|
64
|
+
Note
|
65
65
|
----
|
66
|
-
To replace this function with a custom function, do
|
66
|
+
To replace this function with a custom function, do:
|
67
67
|
AETrainer.train = custom_function
|
68
68
|
"""
|
69
69
|
# Setup training
|
@@ -120,7 +120,7 @@ class AETrainer:
|
|
120
120
|
|
121
121
|
Note
|
122
122
|
----
|
123
|
-
To replace this function with a custom function, do
|
123
|
+
To replace this function with a custom function, do:
|
124
124
|
AETrainer.eval = custom_function
|
125
125
|
"""
|
126
126
|
self.model.eval()
|
@@ -155,8 +155,8 @@ class AETrainer:
|
|
155
155
|
torch.Tensor
|
156
156
|
Data encoded by the model
|
157
157
|
|
158
|
-
|
159
|
-
|
158
|
+
Note
|
159
|
+
----
|
160
160
|
This function should be run after the model has been trained and evaluated.
|
161
161
|
"""
|
162
162
|
self.model.eval()
|
@@ -272,8 +272,6 @@ class PixelCNN(distribution.Distribution):
|
|
272
272
|
The minimum value of the input data.
|
273
273
|
dtype : tensorflow dtype, default tf.float32
|
274
274
|
Data type of the `Distribution`.
|
275
|
-
name : str, default "PixelCNN"
|
276
|
-
The name of the `Distribution`.
|
277
275
|
"""
|
278
276
|
|
279
277
|
def __init__(
|
@@ -293,10 +291,9 @@ class PixelCNN(distribution.Distribution):
|
|
293
291
|
high: int = 255,
|
294
292
|
low: int = 0,
|
295
293
|
dtype=tf.float32,
|
296
|
-
name: str = "PixelCNN",
|
297
294
|
) -> None:
|
298
295
|
parameters = dict(locals())
|
299
|
-
with tf.name_scope(
|
296
|
+
with tf.name_scope("PixelCNN") as name:
|
300
297
|
super().__init__(
|
301
298
|
dtype=dtype,
|
302
299
|
reparameterization_type=reparameterization.NOT_REPARAMETERIZED,
|
dataeval/_internal/utils.py
CHANGED
@@ -8,7 +8,7 @@ from torch.utils.data import Dataset
|
|
8
8
|
|
9
9
|
def read_dataset(dataset: Dataset) -> list[list[Any]]:
|
10
10
|
"""
|
11
|
-
Extract information from a dataset at each index into
|
11
|
+
Extract information from a dataset at each index into individual lists of each information position
|
12
12
|
|
13
13
|
Parameters
|
14
14
|
----------
|
@@ -31,36 +31,31 @@ def read_dataset(dataset: Dataset) -> list[list[Any]]:
|
|
31
31
|
Examples
|
32
32
|
--------
|
33
33
|
>>> import numpy as np
|
34
|
-
|
35
|
-
>>> data = np.ones((10, 3, 3))
|
34
|
+
>>> data = np.ones((10, 1, 3, 3))
|
36
35
|
>>> labels = np.ones((10,))
|
37
36
|
>>> class ICDataset:
|
38
37
|
... def __init__(self, data, labels):
|
39
38
|
... self.data = data
|
40
39
|
... self.labels = labels
|
41
|
-
|
40
|
+
...
|
42
41
|
... def __getitem__(self, idx):
|
43
42
|
... return self.data[idx], self.labels[idx]
|
44
43
|
|
45
44
|
>>> ds = ICDataset(data, labels)
|
46
45
|
|
47
46
|
>>> result = read_dataset(ds)
|
48
|
-
>>>
|
49
|
-
|
50
|
-
>>>
|
51
|
-
|
52
|
-
>>>
|
53
|
-
|
47
|
+
>>> len(result) # images and labels
|
48
|
+
2
|
49
|
+
>>> np.asarray(result[0]).shape # images
|
50
|
+
(10, 1, 3, 3)
|
51
|
+
>>> np.asarray(result[1]).shape # labels
|
52
|
+
(10,)
|
54
53
|
"""
|
55
54
|
|
56
|
-
ddict: dict[int, list] = defaultdict(list)
|
55
|
+
ddict: dict[int, list[Any]] = defaultdict(list[Any])
|
57
56
|
|
58
57
|
for data in dataset:
|
59
|
-
|
60
|
-
if not isinstance(data, tuple):
|
61
|
-
data = (data,)
|
62
|
-
|
63
|
-
for i, d in enumerate(data):
|
58
|
+
for i, d in enumerate(data if isinstance(data, tuple) else (data,)):
|
64
59
|
ddict[i].append(d)
|
65
60
|
|
66
61
|
return list(ddict.values())
|