dataeval 0.72.0__py3-none-any.whl → 0.72.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/_internal/datasets.py +1 -1
- dataeval/_internal/detectors/clusterer.py +6 -6
- dataeval/_internal/detectors/drift/base.py +15 -14
- dataeval/_internal/detectors/drift/cvm.py +5 -5
- dataeval/_internal/detectors/drift/ks.py +7 -7
- dataeval/_internal/detectors/drift/mmd.py +10 -9
- dataeval/_internal/detectors/drift/torch.py +2 -2
- dataeval/_internal/detectors/drift/uncertainty.py +5 -4
- dataeval/_internal/detectors/duplicates.py +1 -1
- dataeval/_internal/detectors/ood/ae.py +2 -2
- dataeval/_internal/detectors/ood/aegmm.py +2 -2
- dataeval/_internal/detectors/ood/base.py +3 -3
- dataeval/_internal/detectors/ood/llr.py +3 -3
- dataeval/_internal/detectors/ood/vae.py +1 -1
- dataeval/_internal/detectors/ood/vaegmm.py +1 -1
- dataeval/_internal/detectors/outliers.py +5 -5
- dataeval/_internal/metrics/balance.py +5 -5
- dataeval/_internal/metrics/ber.py +6 -6
- dataeval/_internal/metrics/coverage.py +4 -4
- dataeval/_internal/metrics/divergence.py +4 -4
- dataeval/_internal/metrics/diversity.py +6 -6
- dataeval/_internal/metrics/parity.py +9 -7
- dataeval/_internal/metrics/stats/base.py +7 -5
- dataeval/_internal/metrics/stats/boxratiostats.py +1 -1
- dataeval/_internal/metrics/stats/datasetstats.py +1 -1
- dataeval/_internal/metrics/stats/dimensionstats.py +3 -3
- dataeval/_internal/metrics/stats/hashstats.py +3 -3
- dataeval/_internal/metrics/stats/labelstats.py +3 -3
- dataeval/_internal/metrics/stats/pixelstats.py +3 -3
- dataeval/_internal/metrics/stats/visualstats.py +4 -4
- dataeval/_internal/metrics/uap.py +3 -3
- dataeval/_internal/metrics/utils.py +8 -8
- dataeval/_internal/models/pytorch/autoencoder.py +8 -8
- dataeval/_internal/models/pytorch/utils.py +3 -3
- dataeval/_internal/models/tensorflow/autoencoder.py +4 -4
- dataeval/_internal/models/tensorflow/losses.py +1 -1
- dataeval/_internal/models/tensorflow/pixelcnn.py +2 -2
- dataeval/_internal/models/tensorflow/utils.py +1 -1
- dataeval/_internal/split_dataset.py +421 -0
- dataeval/_internal/workflows/sufficiency.py +3 -3
- dataeval/detectors/drift/__init__.py +1 -1
- dataeval/detectors/drift/updates/__init__.py +2 -1
- dataeval/detectors/ood/__init__.py +2 -10
- dataeval/utils/__init__.py +1 -1
- dataeval/utils/tensorflow/__init__.py +2 -1
- {dataeval-0.72.0.dist-info → dataeval-0.72.1.dist-info}/METADATA +6 -5
- dataeval-0.72.1.dist-info/RECORD +81 -0
- dataeval-0.72.0.dist-info/RECORD +0 -80
- {dataeval-0.72.0.dist-info → dataeval-0.72.1.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.72.0.dist-info → dataeval-0.72.1.dist-info}/WHEEL +0 -0
@@ -16,7 +16,7 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
16
16
|
@dataclass(frozen=True)
|
17
17
|
class CoverageOutput(OutputMetadata):
|
18
18
|
"""
|
19
|
-
Output class for :func:`coverage` bias metric
|
19
|
+
Output class for :func:`coverage` :term:`bias<Bias>` metric
|
20
20
|
|
21
21
|
Attributes
|
22
22
|
----------
|
@@ -25,7 +25,7 @@ class CoverageOutput(OutputMetadata):
|
|
25
25
|
radii : NDArray
|
26
26
|
Array of critical value radii
|
27
27
|
critical_value : float
|
28
|
-
Radius for coverage
|
28
|
+
Radius for :term:`coverage<Coverage>`
|
29
29
|
"""
|
30
30
|
|
31
31
|
indices: NDArray[np.intp]
|
@@ -41,7 +41,7 @@ def coverage(
|
|
41
41
|
percent: np.float64 = np.float64(0.01),
|
42
42
|
) -> CoverageOutput:
|
43
43
|
"""
|
44
|
-
Class for evaluating coverage and identifying images/samples that are in undercovered regions.
|
44
|
+
Class for evaluating :term:`coverage<Coverage>` and identifying images/samples that are in undercovered regions.
|
45
45
|
|
46
46
|
Parameters
|
47
47
|
----------
|
@@ -64,7 +64,7 @@ def coverage(
|
|
64
64
|
Raises
|
65
65
|
------
|
66
66
|
ValueError
|
67
|
-
If length of embeddings is less than or equal to k
|
67
|
+
If length of :term:`embeddings<Embeddings>` is less than or equal to k
|
68
68
|
ValueError
|
69
69
|
If radius_type is unknown
|
70
70
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
"""
|
2
|
-
This module contains the implementation of HP Divergence
|
2
|
+
This module contains the implementation of HP :term:`divergence<Divergence>`
|
3
3
|
using the Fast Nearest Neighbor and Minimum Spanning Tree algorithms
|
4
4
|
"""
|
5
5
|
|
@@ -24,7 +24,7 @@ class DivergenceOutput(OutputMetadata):
|
|
24
24
|
Attributes
|
25
25
|
----------
|
26
26
|
divergence : float
|
27
|
-
Divergence value calculated between 2 datasets ranging between 0.0 and 1.0
|
27
|
+
:term:`Divergence` value calculated between 2 datasets ranging between 0.0 and 1.0
|
28
28
|
errors : int
|
29
29
|
The number of differing edges between the datasets
|
30
30
|
"""
|
@@ -82,7 +82,7 @@ DIVERGENCE_FN_MAP = {"FNN": divergence_fnn, "MST": divergence_mst}
|
|
82
82
|
@set_metadata("dataeval.metrics")
|
83
83
|
def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST"] = "FNN") -> DivergenceOutput:
|
84
84
|
"""
|
85
|
-
Calculates the divergence and any errors between the datasets
|
85
|
+
Calculates the :term`divergence` and any errors between the datasets
|
86
86
|
|
87
87
|
Parameters
|
88
88
|
----------
|
@@ -93,7 +93,7 @@ def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST
|
|
93
93
|
A dataset in an ArrayLike format to compare.
|
94
94
|
Function expects the data to have 2 dimensions, N number of observations in a P-dimensionial space.
|
95
95
|
method : Literal["MST, "FNN"], default "FNN"
|
96
|
-
Method used to estimate dataset divergence
|
96
|
+
Method used to estimate dataset :term:`divergence<Divergence>`
|
97
97
|
|
98
98
|
Returns
|
99
99
|
-------
|
@@ -13,12 +13,12 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
13
13
|
@dataclass(frozen=True)
|
14
14
|
class DiversityOutput(OutputMetadata):
|
15
15
|
"""
|
16
|
-
Output class for :func:`diversity` bias metric
|
16
|
+
Output class for :func:`diversity` :term:`bias<Bias>` metric
|
17
17
|
|
18
18
|
Attributes
|
19
19
|
----------
|
20
20
|
diversity_index : NDArray[np.float64]
|
21
|
-
Diversity index for classes and factors
|
21
|
+
:term:`Diversity` index for classes and factors
|
22
22
|
classwise : NDArray[np.float64]
|
23
23
|
Classwise diversity index [n_class x n_factor]
|
24
24
|
"""
|
@@ -34,7 +34,7 @@ def diversity_shannon(
|
|
34
34
|
subset_mask: NDArray[np.bool_] | None = None,
|
35
35
|
) -> NDArray:
|
36
36
|
"""
|
37
|
-
Compute diversity for discrete/categorical variables and, through standard
|
37
|
+
Compute :term:`diversity<Diversity>` for discrete/categorical variables and, through standard
|
38
38
|
histogram binning, for continuous variables.
|
39
39
|
|
40
40
|
We define diversity as a normalized form of the Shannon entropy.
|
@@ -85,7 +85,7 @@ def diversity_simpson(
|
|
85
85
|
subset_mask: NDArray[np.bool_] | None = None,
|
86
86
|
) -> NDArray:
|
87
87
|
"""
|
88
|
-
Compute diversity for discrete/categorical variables and, through standard
|
88
|
+
Compute :term:`diversity<Diversity>` for discrete/categorical variables and, through standard
|
89
89
|
histogram binning, for continuous variables.
|
90
90
|
|
91
91
|
We define diversity as the inverse Simpson diversity index linearly rescaled to the unit interval.
|
@@ -147,8 +147,8 @@ def diversity(
|
|
147
147
|
class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], method: Literal["shannon", "simpson"] = "simpson"
|
148
148
|
) -> DiversityOutput:
|
149
149
|
"""
|
150
|
-
Compute diversity and classwise diversity for discrete/categorical variables and,
|
151
|
-
histogram binning, for continuous variables.
|
150
|
+
Compute :term:`diversity<Diversity>` and classwise diversity for discrete/categorical variables and,
|
151
|
+
through standard histogram binning, for continuous variables.
|
152
152
|
|
153
153
|
We define diversity as a normalized form of the inverse Simpson diversity index.
|
154
154
|
|
@@ -17,7 +17,7 @@ TData = TypeVar("TData", np.float64, NDArray[np.float64])
|
|
17
17
|
@dataclass(frozen=True)
|
18
18
|
class ParityOutput(Generic[TData], OutputMetadata):
|
19
19
|
"""
|
20
|
-
Output class for :func:`parity` and :func:`label_parity` bias metrics
|
20
|
+
Output class for :func:`parity` and :func:`label_parity` :term:`bias<Bias>` metrics
|
21
21
|
|
22
22
|
Attributes
|
23
23
|
----------
|
@@ -198,7 +198,8 @@ def label_parity(
|
|
198
198
|
num_classes: int | None = None,
|
199
199
|
) -> ParityOutput[np.float64]:
|
200
200
|
"""
|
201
|
-
Calculate the chi-square statistic to assess the parity between expected and
|
201
|
+
Calculate the chi-square statistic to assess the :term:`parity<Parity>` between expected and
|
202
|
+
observed label distributions.
|
202
203
|
|
203
204
|
This function computes the frequency distribution of classes in both expected and observed labels, normalizes
|
204
205
|
the expected distribution to match the total number of observed labels, and then calculates the chi-square
|
@@ -217,7 +218,7 @@ def label_parity(
|
|
217
218
|
Returns
|
218
219
|
-------
|
219
220
|
ParityOutput[np.float64]
|
220
|
-
chi-squared score and
|
221
|
+
chi-squared score and :term`P-Value` of the test
|
221
222
|
|
222
223
|
Raises
|
223
224
|
------
|
@@ -231,8 +232,8 @@ def label_parity(
|
|
231
232
|
- Providing ``num_classes`` can be helpful if there are classes with zero instances in one of the distributions.
|
232
233
|
- The function first validates the observed distribution and normalizes the expected distribution so that it
|
233
234
|
has the same total number of labels as the observed distribution.
|
234
|
-
- It then performs a
|
235
|
-
the observed and expected label distributions.
|
235
|
+
- It then performs a :term:`Chi-Square Test of Independence` to determine if there is a statistically significant
|
236
|
+
difference between the observed and expected label distributions.
|
236
237
|
- This function acts as an interface to the scipy.stats.chisquare method, which is documented at
|
237
238
|
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chisquare.html
|
238
239
|
|
@@ -285,7 +286,8 @@ def parity(
|
|
285
286
|
continuous_factor_bincounts: Mapping[str, int] | None = None,
|
286
287
|
) -> ParityOutput[NDArray[np.float64]]:
|
287
288
|
"""
|
288
|
-
Calculate chi-square statistics to assess the relationship between multiple factors
|
289
|
+
Calculate chi-square statistics to assess the relationship between multiple factors
|
290
|
+
and class labels.
|
289
291
|
|
290
292
|
This function computes the chi-square statistic for each metadata factor to determine if there is
|
291
293
|
a significant relationship between the factor values and class labels. The function handles both categorical
|
@@ -308,7 +310,7 @@ def parity(
|
|
308
310
|
-------
|
309
311
|
ParityOutput[NDArray[np.float64]]
|
310
312
|
Arrays of length (num_factors) whose (i)th element corresponds to the
|
311
|
-
chi-square score and p-value for the relationship between factor i and
|
313
|
+
chi-square score and :term:`p-value<P-Value>` for the relationship between factor i and
|
312
314
|
the class labels in the dataset.
|
313
315
|
|
314
316
|
Raises
|
@@ -215,7 +215,7 @@ def run_stats(
|
|
215
215
|
stats_processor_cls: Iterable[type[StatsProcessor[TStatsOutput]]],
|
216
216
|
) -> list[TStatsOutput]:
|
217
217
|
"""
|
218
|
-
Compute specified statistics on a set of images.
|
218
|
+
Compute specified :term:`statistics<Statistics>` on a set of images.
|
219
219
|
|
220
220
|
This function applies a set of statistical operations to each image in the input iterable,
|
221
221
|
based on the specified output class. The function determines which statistics to apply
|
@@ -225,7 +225,7 @@ def run_stats(
|
|
225
225
|
----------
|
226
226
|
images : Iterable[ArrayLike]
|
227
227
|
An iterable of images (e.g., list of arrays), where each image is represented as an
|
228
|
-
array-like structure (e.g., NumPy arrays).
|
228
|
+
array-like structure (e.g., :term:`NumPy` arrays).
|
229
229
|
bboxes : Iterable[ArrayLike]
|
230
230
|
An iterable of bounding boxes (e.g. list of arrays) where each bounding box is represented
|
231
231
|
as an array-like structure in the format of (X0, Y0, X1, Y1). The length of the bounding boxes
|
@@ -237,14 +237,16 @@ def run_stats(
|
|
237
237
|
|
238
238
|
Returns
|
239
239
|
-------
|
240
|
-
|
241
|
-
A
|
240
|
+
dict[str, NDArray]]
|
241
|
+
A dictionary containing the computed statistics for each image.
|
242
|
+
The dictionary keys correspond to the names of the statistics, and the values are :term:`NumPy` arrays
|
243
|
+
with the results of the computations.
|
242
244
|
|
243
245
|
Note
|
244
246
|
----
|
245
247
|
- The function performs image normalization (rescaling the image values)
|
246
248
|
before applying some of the statistics.
|
247
|
-
- Pixel-level statistics (e.g., brightness
|
249
|
+
- Pixel-level statistics (e.g., :term:`brightness<Brightness>`, entropy) are computed after
|
248
250
|
rescaling and, optionally, flattening the images.
|
249
251
|
- For statistics like histograms and entropy, intermediate results may
|
250
252
|
be reused to avoid redundant computation.
|
@@ -98,7 +98,7 @@ def boxratiostats(
|
|
98
98
|
imgstats: TStatOutput,
|
99
99
|
) -> TStatOutput:
|
100
100
|
"""
|
101
|
-
Calculates ratio statistics of box outputs over image outputs
|
101
|
+
Calculates ratio :term:`statistics<Statistics>` of box outputs over image outputs
|
102
102
|
|
103
103
|
Parameters
|
104
104
|
----------
|
@@ -89,7 +89,7 @@ def datasetstats(
|
|
89
89
|
labels: Iterable[ArrayLike] | None = None,
|
90
90
|
) -> DatasetStatsOutput:
|
91
91
|
"""
|
92
|
-
Calculates various statistics for each image
|
92
|
+
Calculates various :term:`statistics<Statistics>` for each image
|
93
93
|
|
94
94
|
This function computes dimension, pixel and visual metrics
|
95
95
|
on the images or individual bounding boxes for each image as
|
@@ -31,7 +31,7 @@ class DimensionStatsOutput(BaseStatsOutput):
|
|
31
31
|
size : NDArray[np.uint32]
|
32
32
|
Size of the images in pixels
|
33
33
|
aspect_ratio : NDArray[np.float16]
|
34
|
-
Aspect
|
34
|
+
:term:`ASspect Ratio<Aspect Ratio>` of the images (width/height)
|
35
35
|
depth : NDArray[np.uint8]
|
36
36
|
Color depth of the images in bits
|
37
37
|
center : NDArray[np.uint16]
|
@@ -77,7 +77,7 @@ def dimensionstats(
|
|
77
77
|
bboxes: Iterable[ArrayLike] | None = None,
|
78
78
|
) -> DimensionStatsOutput:
|
79
79
|
"""
|
80
|
-
Calculates dimension statistics for each image
|
80
|
+
Calculates dimension :term:`statistics<Statistics>` for each image
|
81
81
|
|
82
82
|
This function computes various dimensional metrics (e.g., width, height, channels)
|
83
83
|
on the images or individual bounding boxes for each image.
|
@@ -94,7 +94,7 @@ def dimensionstats(
|
|
94
94
|
DimensionStatsOutput
|
95
95
|
A dictionary-like object containing the computed dimension statistics for each image or bounding
|
96
96
|
box. The keys correspond to the names of the statistics (e.g., 'width', 'height'), and the values
|
97
|
-
are lists of results for each image or
|
97
|
+
are lists of results for each image or :term:NumPy` arrays when the results are multi-dimensional.
|
98
98
|
|
99
99
|
See Also
|
100
100
|
--------
|
@@ -20,7 +20,7 @@ class HashStatsOutput(BaseStatsOutput):
|
|
20
20
|
xxhash : List[str]
|
21
21
|
xxHash hash of the images as a hex string
|
22
22
|
pchash : List[str]
|
23
|
-
Perception
|
23
|
+
:term:`Perception-based Hash` of the images as a hex string
|
24
24
|
"""
|
25
25
|
|
26
26
|
xxhash: list[str]
|
@@ -60,11 +60,11 @@ def hashstats(
|
|
60
60
|
|
61
61
|
See Also
|
62
62
|
--------
|
63
|
-
Duplicates
|
63
|
+
:term:`Duplicates`
|
64
64
|
|
65
65
|
Examples
|
66
66
|
--------
|
67
|
-
Calculating the statistics on the images, whose shape is (C, H, W)
|
67
|
+
Calculating the :term:`statistics<Statistics>` on the images, whose shape is (C, H, W)
|
68
68
|
|
69
69
|
>>> results = hashstats(images)
|
70
70
|
>>> print(results.xxhash)
|
@@ -60,7 +60,7 @@ def labelstats(
|
|
60
60
|
labels: Iterable[ArrayLike],
|
61
61
|
) -> LabelStatsOutput:
|
62
62
|
"""
|
63
|
-
Calculates statistics for data labels
|
63
|
+
Calculates :term:`statistics<Statistics>` for data labels
|
64
64
|
|
65
65
|
This function computes counting metrics (e.g., total per class, total per image)
|
66
66
|
on the labels.
|
@@ -68,7 +68,7 @@ def labelstats(
|
|
68
68
|
Parameters
|
69
69
|
----------
|
70
70
|
labels : ArrayLike, shape - [label] | [[label]] or (N,M) | (N,)
|
71
|
-
Lists or
|
71
|
+
Lists or :term:`NumPy` array of labels.
|
72
72
|
A set of lists where each list contains all labels per image -
|
73
73
|
(e.g. [[label1, label2], [label2], [label1, label3]] or [label1, label2, label1, label3]).
|
74
74
|
If a numpy array, N is the number of images, M is the number of labels per image.
|
@@ -80,7 +80,7 @@ def labelstats(
|
|
80
80
|
|
81
81
|
Examples
|
82
82
|
--------
|
83
|
-
Calculating the statistics on labels for a set of data
|
83
|
+
Calculating the :term:`statistics<Statistics>` on labels for a set of data
|
84
84
|
|
85
85
|
>>> stats = labelstats(labels)
|
86
86
|
>>> stats.label_counts_per_class
|
@@ -23,7 +23,7 @@ class PixelStatsOutput(BaseStatsOutput):
|
|
23
23
|
std : NDArray[np.float16]
|
24
24
|
Standard deviation of the pixel values of the images
|
25
25
|
var : NDArray[np.float16]
|
26
|
-
Variance of the pixel values of the images
|
26
|
+
:term:`Variance` of the pixel values of the images
|
27
27
|
skew : NDArray[np.float16]
|
28
28
|
Skew of the pixel values of the images
|
29
29
|
kurtosis : NDArray[np.float16]
|
@@ -73,7 +73,7 @@ def pixelstats(
|
|
73
73
|
per_channel: bool = False,
|
74
74
|
) -> PixelStatsOutput:
|
75
75
|
"""
|
76
|
-
Calculates pixel statistics for each image
|
76
|
+
Calculates pixel :term:`statistics<Statistics>` for each image
|
77
77
|
|
78
78
|
This function computes various statistical metrics (e.g., mean, standard deviation, entropy)
|
79
79
|
on the images as a whole.
|
@@ -90,7 +90,7 @@ def pixelstats(
|
|
90
90
|
PixelStatsOutput
|
91
91
|
A dictionary-like object containing the computed statistics for each image. The keys correspond
|
92
92
|
to the names of the statistics (e.g., 'mean', 'std'), and the values are lists of results for
|
93
|
-
each image or
|
93
|
+
each image or :term:`NumPy` arrays when the results are multi-dimensional.
|
94
94
|
|
95
95
|
See Also
|
96
96
|
--------
|
@@ -82,7 +82,7 @@ def visualstats(
|
|
82
82
|
"""
|
83
83
|
Calculates visual statistics for each image
|
84
84
|
|
85
|
-
This function computes various visual metrics (e.g., brightness
|
85
|
+
This function computes various visual metrics (e.g., :term:`brightness<Brightness>`, darkness, contrast, blurriness)
|
86
86
|
on the images as a whole.
|
87
87
|
|
88
88
|
Parameters
|
@@ -96,8 +96,8 @@ def visualstats(
|
|
96
96
|
-------
|
97
97
|
VisualStatsOutput
|
98
98
|
A dictionary-like object containing the computed visual statistics for each image. The keys correspond
|
99
|
-
to the names of the statistics (e.g., 'brightness', '
|
100
|
-
each image or
|
99
|
+
to the names of the statistics (e.g., 'brightness', 'blurriness'), and the values are lists of results for
|
100
|
+
each image or :term:`NumPy` arrays when the results are multi-dimensional.
|
101
101
|
|
102
102
|
See Also
|
103
103
|
--------
|
@@ -109,7 +109,7 @@ def visualstats(
|
|
109
109
|
|
110
110
|
Examples
|
111
111
|
--------
|
112
|
-
Calculating the statistics on the images, whose shape is (C, H, W)
|
112
|
+
Calculating the :term:`statistics<Statistics>` on the images, whose shape is (C, H, W)
|
113
113
|
|
114
114
|
>>> results = visualstats(images)
|
115
115
|
>>> print(results.brightness)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
This module contains the implementation of the
|
3
|
-
FR Test Statistic based estimate for the
|
4
|
-
average precision using empirical mean precision
|
3
|
+
FR Test Statistic based estimate for the :term:`upper-bound
|
4
|
+
average precision<Upper-Bound Average Precision (UAP)>` using empirical mean precision
|
5
5
|
"""
|
6
6
|
|
7
7
|
from __future__ import annotations
|
@@ -38,7 +38,7 @@ def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
|
|
38
38
|
Parameters
|
39
39
|
----------
|
40
40
|
labels : ArrayLike
|
41
|
-
A
|
41
|
+
A term:`NumPy` array of n_samples of class labels with M unique classes.
|
42
42
|
scores : ArrayLike
|
43
43
|
A 2D array of class probabilities per image
|
44
44
|
|
@@ -78,8 +78,8 @@ def entropy(
|
|
78
78
|
subset_mask: NDArray[np.bool_] | None = None,
|
79
79
|
) -> NDArray[np.float64]:
|
80
80
|
"""
|
81
|
-
Meant for use with Bias metrics, Balance
|
82
|
-
and Classwise Diversity.
|
81
|
+
Meant for use with :term:`bias<Bias>` metrics, :term:`balance<Balance>`, :term:`diversity<Diversity>`,
|
82
|
+
ClasswiseBalance, and Classwise Diversity.
|
83
83
|
|
84
84
|
Compute entropy for discrete/categorical variables and for continuous variables through standard
|
85
85
|
histogram binning.
|
@@ -128,7 +128,7 @@ def get_num_bins(
|
|
128
128
|
) -> NDArray[np.float64]:
|
129
129
|
"""
|
130
130
|
Number of bins or unique values for each metadata factor, used to
|
131
|
-
normalize entropy
|
131
|
+
normalize entropy/:term:`diversity<Diversity>`.
|
132
132
|
|
133
133
|
Parameters
|
134
134
|
----------
|
@@ -207,12 +207,12 @@ def flatten(X: NDArray):
|
|
207
207
|
|
208
208
|
def minimum_spanning_tree(X: NDArray) -> Any:
|
209
209
|
"""
|
210
|
-
Returns the minimum spanning tree from a NumPy image array.
|
210
|
+
Returns the minimum spanning tree from a :term:`NumPy` image array.
|
211
211
|
|
212
212
|
Parameters
|
213
213
|
----------
|
214
214
|
X : NDArray
|
215
|
-
|
215
|
+
NumPy image array
|
216
216
|
|
217
217
|
Returns
|
218
218
|
-------
|
@@ -234,7 +234,7 @@ def get_classes_counts(labels: NDArray) -> tuple[int, int]:
|
|
234
234
|
Parameters
|
235
235
|
----------
|
236
236
|
label : NDArray
|
237
|
-
|
237
|
+
:term:`NumPy` labels array
|
238
238
|
|
239
239
|
Returns
|
240
240
|
-------
|
@@ -389,7 +389,7 @@ def pchash(image: NDArray) -> str:
|
|
389
389
|
Parameters
|
390
390
|
----------
|
391
391
|
image : NDArray
|
392
|
-
An image as a
|
392
|
+
An image as a :term:`NumPy` array in CxHxW format
|
393
393
|
|
394
394
|
Returns
|
395
395
|
-------
|
@@ -437,7 +437,7 @@ def xxhash(image: NDArray) -> str:
|
|
437
437
|
Parameters
|
438
438
|
----------
|
439
439
|
image : NDArray
|
440
|
-
An image as a
|
440
|
+
An image as a :term:NumPy` array
|
441
441
|
|
442
442
|
Returns
|
443
443
|
-------
|
@@ -17,7 +17,7 @@ def get_images_from_batch(batch: Any) -> Any:
|
|
17
17
|
|
18
18
|
class AETrainer:
|
19
19
|
"""
|
20
|
-
A class to train and evaluate an autoencoder model.
|
20
|
+
A class to train and evaluate an autoencoder<Autoencoder>` model.
|
21
21
|
|
22
22
|
Parameters
|
23
23
|
----------
|
@@ -44,7 +44,7 @@ class AETrainer:
|
|
44
44
|
|
45
45
|
def train(self, dataset: Dataset, epochs: int = 25) -> list[float]:
|
46
46
|
"""
|
47
|
-
Basic image reconstruction training function for Autoencoder models
|
47
|
+
Basic image reconstruction training function for :term:`Autoencoder` models
|
48
48
|
|
49
49
|
Uses `torch.optim.Adam` and `torch.nn.MSELoss` as default hyperparameters
|
50
50
|
|
@@ -59,7 +59,7 @@ class AETrainer:
|
|
59
59
|
Returns
|
60
60
|
-------
|
61
61
|
List[float]
|
62
|
-
A list of average loss values for each epoch
|
62
|
+
A list of average loss values for each :term:`epoch<Epoch>`.
|
63
63
|
|
64
64
|
Note
|
65
65
|
----
|
@@ -103,7 +103,7 @@ class AETrainer:
|
|
103
103
|
@torch.no_grad
|
104
104
|
def eval(self, dataset: Dataset) -> float:
|
105
105
|
"""
|
106
|
-
Basic image reconstruction evaluation function for Autoencoder models
|
106
|
+
Basic image reconstruction evaluation function for :term:`autoencoder<Autoencoder>` models
|
107
107
|
|
108
108
|
Uses `torch.nn.MSELoss` as default loss function.
|
109
109
|
|
@@ -139,7 +139,7 @@ class AETrainer:
|
|
139
139
|
@torch.no_grad
|
140
140
|
def encode(self, dataset: Dataset) -> torch.Tensor:
|
141
141
|
"""
|
142
|
-
Create image embeddings for the dataset using the model's encoder.
|
142
|
+
Create image :term:`embeddings<Embeddings>` for the dataset using the model's encoder.
|
143
143
|
|
144
144
|
If the model has an `encode` method, it will be used; otherwise,
|
145
145
|
`model.forward` will be used.
|
@@ -178,7 +178,7 @@ class AETrainer:
|
|
178
178
|
|
179
179
|
class AriaAutoencoder(nn.Module):
|
180
180
|
"""
|
181
|
-
An autoencoder model with a separate encoder and decoder.
|
181
|
+
An :term:`autoencoder<Autoencoder>` model with a separate encoder and decoder.
|
182
182
|
|
183
183
|
Parameters
|
184
184
|
----------
|
@@ -228,7 +228,7 @@ class AriaAutoencoder(nn.Module):
|
|
228
228
|
|
229
229
|
class Encoder(nn.Module):
|
230
230
|
"""
|
231
|
-
A simple encoder to be used in an autoencoder model.
|
231
|
+
A simple encoder to be used in an :term:`autoencoder<Autoencoder>` model.
|
232
232
|
|
233
233
|
This is the encoder used by the AriaAutoencoder model.
|
234
234
|
|
@@ -269,7 +269,7 @@ class Encoder(nn.Module):
|
|
269
269
|
|
270
270
|
class Decoder(nn.Module):
|
271
271
|
"""
|
272
|
-
A simple decoder to be used in an autoencoder model.
|
272
|
+
A simple decoder to be used in an :term:`autoencoder<Autoencoder>` model.
|
273
273
|
|
274
274
|
This is the decoder used by the AriaAutoencoder model.
|
275
275
|
|
@@ -17,7 +17,7 @@ def torch_to_numpy(tensor: Tensor) -> ndarray:
|
|
17
17
|
|
18
18
|
def numpy_to_torch(array: ndarray) -> Tensor:
|
19
19
|
"""
|
20
|
-
Converts a NumPy array to a PyTorch tensor
|
20
|
+
Converts a :term:`NumPy` array to a PyTorch tensor
|
21
21
|
"""
|
22
22
|
if isinstance(array, Tensor): # Already tensor, return
|
23
23
|
return array
|
@@ -29,7 +29,7 @@ def numpy_to_torch(array: ndarray) -> Tensor:
|
|
29
29
|
|
30
30
|
def permute_to_torch(array: ndarray) -> Tensor:
|
31
31
|
"""
|
32
|
-
Converts and permutes a NumPy image array into a PyTorch image tensor.
|
32
|
+
Converts and permutes a :term:`NumPy` image array into a PyTorch image tensor.
|
33
33
|
|
34
34
|
Parameters
|
35
35
|
----------
|
@@ -48,7 +48,7 @@ def permute_to_torch(array: ndarray) -> Tensor:
|
|
48
48
|
|
49
49
|
def permute_to_numpy(tensor: Tensor) -> ndarray:
|
50
50
|
"""
|
51
|
-
Converts and permutes a PyTorch image tensor into a NumPy image array.
|
51
|
+
Converts and permutes a PyTorch image tensor into a :term:`NumPy` image array.
|
52
52
|
|
53
53
|
Does not permute if given ndarray
|
54
54
|
|
@@ -90,7 +90,7 @@ class Sampling(Layer):
|
|
90
90
|
Parameters
|
91
91
|
----------
|
92
92
|
inputs
|
93
|
-
Tuple with mean and log variance
|
93
|
+
Tuple with mean and log :term:`variance<Variance>`.
|
94
94
|
|
95
95
|
Returns
|
96
96
|
-------
|
@@ -131,7 +131,7 @@ class EncoderVAE(Layer):
|
|
131
131
|
encoder_net
|
132
132
|
Layers for the encoder wrapped in a keras.Sequential class.
|
133
133
|
latent_dim
|
134
|
-
Dimensionality of the latent space
|
134
|
+
Dimensionality of the :term:`latent space<Latent Space>`.
|
135
135
|
name
|
136
136
|
Name of encoder.
|
137
137
|
"""
|
@@ -204,7 +204,7 @@ class VAE(keras.Model):
|
|
204
204
|
decoder_net : keras.Model
|
205
205
|
Layers for the decoder wrapped in a keras.Sequential class.
|
206
206
|
latent_dim : int
|
207
|
-
Dimensionality of the latent space
|
207
|
+
Dimensionality of the :term:`latent space<Latent Space>`.
|
208
208
|
beta : float, default 1.0
|
209
209
|
Beta parameter for KL-divergence loss term.
|
210
210
|
"""
|
@@ -282,7 +282,7 @@ class VAEGMM(keras.Model):
|
|
282
282
|
n_gmm : int
|
283
283
|
Number of components in GMM.
|
284
284
|
latent_dim : int
|
285
|
-
Dimensionality of the latent space
|
285
|
+
Dimensionality of the :term:`latent space<Latent Space>`.
|
286
286
|
recon_features : Callable, default eucl_cosim_features
|
287
287
|
Function to extract features from the reconstructed instance by the decoder.
|
288
288
|
beta : float, default 1.0
|
@@ -31,7 +31,7 @@ class Elbo:
|
|
31
31
|
Parameters
|
32
32
|
----------
|
33
33
|
cov_type : Union[Literal["cov_full", "cov_diag"], float], default 1.0
|
34
|
-
Full covariance matrix, diagonal variance matrix, or scale identity multiplier.
|
34
|
+
Full covariance matrix, diagonal :term:`variance<Variance>` matrix, or scale identity multiplier.
|
35
35
|
x : ArrayLike, optional - default None
|
36
36
|
Dataset used to calculate the covariance matrix. Required for full and diagonal covariance matrix types.
|
37
37
|
"""
|
@@ -400,7 +400,7 @@ class PixelCNN(distribution.Distribution):
|
|
400
400
|
Parameters
|
401
401
|
----------
|
402
402
|
value
|
403
|
-
`Tensor` or
|
403
|
+
`Tensor` or :term:`NumPy` array of image data. May have leading batch
|
404
404
|
dimension(s), which must broadcast to the leading batch dimensions of
|
405
405
|
`conditional_input`.
|
406
406
|
conditional_input
|
@@ -706,7 +706,7 @@ class _PixelCNNNetwork(keras.layers.Layer):
|
|
706
706
|
use_data_init: bool = True,
|
707
707
|
dtype=tf.float32,
|
708
708
|
) -> None:
|
709
|
-
"""Initialize the neural network for the Pixel CNN++ distribution.
|
709
|
+
"""Initialize the :term:`neural network<Neural Network>` for the Pixel CNN++ distribution.
|
710
710
|
|
711
711
|
Parameters
|
712
712
|
----------
|
@@ -55,7 +55,7 @@ def predict_batch(
|
|
55
55
|
|
56
56
|
Returns
|
57
57
|
-------
|
58
|
-
|
58
|
+
:term:`NumPy` array, tensorflow tensor or tuples of those with model outputs.
|
59
59
|
"""
|
60
60
|
n = len(x)
|
61
61
|
n_minibatch = int(np.ceil(n / batch_size))
|