dataeval 0.74.2__py3-none-any.whl → 0.76.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. dataeval/__init__.py +27 -23
  2. dataeval/detectors/__init__.py +2 -2
  3. dataeval/detectors/drift/__init__.py +14 -12
  4. dataeval/detectors/drift/base.py +3 -3
  5. dataeval/detectors/drift/cvm.py +1 -1
  6. dataeval/detectors/drift/ks.py +3 -2
  7. dataeval/detectors/drift/mmd.py +9 -7
  8. dataeval/detectors/drift/torch.py +12 -12
  9. dataeval/detectors/drift/uncertainty.py +5 -4
  10. dataeval/detectors/drift/updates.py +1 -1
  11. dataeval/detectors/linters/__init__.py +4 -4
  12. dataeval/detectors/linters/clusterer.py +5 -9
  13. dataeval/detectors/linters/duplicates.py +10 -14
  14. dataeval/detectors/linters/outliers.py +100 -5
  15. dataeval/detectors/ood/__init__.py +4 -11
  16. dataeval/detectors/ood/{ae_torch.py → ae.py} +6 -4
  17. dataeval/detectors/ood/base.py +47 -160
  18. dataeval/detectors/ood/metadata_ks_compare.py +34 -42
  19. dataeval/detectors/ood/metadata_least_likely.py +3 -3
  20. dataeval/detectors/ood/metadata_ood_mi.py +6 -5
  21. dataeval/detectors/ood/mixin.py +146 -0
  22. dataeval/detectors/ood/output.py +63 -0
  23. dataeval/interop.py +7 -6
  24. dataeval/{logging.py → log.py} +2 -0
  25. dataeval/metrics/__init__.py +3 -3
  26. dataeval/metrics/bias/__init__.py +10 -13
  27. dataeval/metrics/bias/balance.py +13 -11
  28. dataeval/metrics/bias/coverage.py +53 -5
  29. dataeval/metrics/bias/diversity.py +56 -24
  30. dataeval/metrics/bias/parity.py +20 -17
  31. dataeval/metrics/estimators/__init__.py +2 -2
  32. dataeval/metrics/estimators/ber.py +7 -4
  33. dataeval/metrics/estimators/divergence.py +4 -4
  34. dataeval/metrics/estimators/uap.py +4 -4
  35. dataeval/metrics/stats/__init__.py +19 -19
  36. dataeval/metrics/stats/base.py +28 -12
  37. dataeval/metrics/stats/boxratiostats.py +13 -14
  38. dataeval/metrics/stats/datasetstats.py +49 -20
  39. dataeval/metrics/stats/dimensionstats.py +8 -8
  40. dataeval/metrics/stats/hashstats.py +14 -10
  41. dataeval/metrics/stats/labelstats.py +94 -11
  42. dataeval/metrics/stats/pixelstats.py +11 -14
  43. dataeval/metrics/stats/visualstats.py +10 -13
  44. dataeval/output.py +23 -14
  45. dataeval/utils/__init__.py +5 -14
  46. dataeval/utils/dataset/__init__.py +7 -0
  47. dataeval/utils/{torch → dataset}/datasets.py +2 -0
  48. dataeval/utils/dataset/read.py +63 -0
  49. dataeval/utils/{split_dataset.py → dataset/split.py} +38 -30
  50. dataeval/utils/image.py +2 -2
  51. dataeval/utils/metadata.py +317 -14
  52. dataeval/{metrics/bias/metadata_utils.py → utils/plot.py} +91 -71
  53. dataeval/utils/torch/__init__.py +2 -17
  54. dataeval/utils/torch/gmm.py +29 -6
  55. dataeval/utils/torch/{utils.py → internal.py} +82 -58
  56. dataeval/utils/torch/models.py +10 -8
  57. dataeval/utils/torch/trainer.py +6 -85
  58. dataeval/workflows/__init__.py +2 -5
  59. dataeval/workflows/sufficiency.py +18 -8
  60. {dataeval-0.74.2.dist-info → dataeval-0.76.0.dist-info}/LICENSE.txt +2 -2
  61. dataeval-0.76.0.dist-info/METADATA +137 -0
  62. dataeval-0.76.0.dist-info/RECORD +67 -0
  63. dataeval/detectors/ood/base_torch.py +0 -109
  64. dataeval/metrics/bias/metadata_preprocessing.py +0 -285
  65. dataeval/utils/gmm.py +0 -26
  66. dataeval-0.74.2.dist-info/METADATA +0 -120
  67. dataeval-0.74.2.dist-info/RECORD +0 -66
  68. {dataeval-0.74.2.dist-info → dataeval-0.76.0.dist-info}/WHEEL +0 -0
@@ -1,24 +1,21 @@
1
1
  """
2
- Bias metrics check for skewed or imbalanced datasets and incomplete feature
2
+ Bias metrics check for skewed or imbalanced datasets and incomplete feature \
3
3
  representation which may impact model performance.
4
4
  """
5
5
 
6
- from dataeval.metrics.bias.balance import BalanceOutput, balance
7
- from dataeval.metrics.bias.coverage import CoverageOutput, coverage
8
- from dataeval.metrics.bias.diversity import DiversityOutput, diversity
9
- from dataeval.metrics.bias.metadata_preprocessing import MetadataOutput, metadata_preprocessing
10
- from dataeval.metrics.bias.parity import ParityOutput, label_parity, parity
11
-
12
6
  __all__ = [
7
+ "BalanceOutput",
8
+ "CoverageOutput",
9
+ "DiversityOutput",
10
+ "ParityOutput",
13
11
  "balance",
14
12
  "coverage",
15
13
  "diversity",
16
14
  "label_parity",
17
15
  "parity",
18
- "metadata_preprocessing",
19
- "BalanceOutput",
20
- "CoverageOutput",
21
- "DiversityOutput",
22
- "ParityOutput",
23
- "MetadataOutput",
24
16
  ]
17
+
18
+ from dataeval.metrics.bias.balance import BalanceOutput, balance
19
+ from dataeval.metrics.bias.coverage import CoverageOutput, coverage
20
+ from dataeval.metrics.bias.diversity import DiversityOutput, diversity
21
+ from dataeval.metrics.bias.parity import ParityOutput, label_parity, parity
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ["BalanceOutput", "balance"]
3
+ __all__ = []
4
4
 
5
5
  import contextlib
6
6
  import warnings
@@ -12,9 +12,9 @@ import scipy as sp
12
12
  from numpy.typing import NDArray
13
13
  from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
14
14
 
15
- from dataeval.metrics.bias.metadata_preprocessing import MetadataOutput
16
- from dataeval.metrics.bias.metadata_utils import get_counts, heatmap
17
15
  from dataeval.output import Output, set_metadata
16
+ from dataeval.utils.metadata import Metadata, get_counts
17
+ from dataeval.utils.plot import heatmap
18
18
 
19
19
  with contextlib.suppress(ImportError):
20
20
  from matplotlib.figure import Figure
@@ -23,8 +23,8 @@ with contextlib.suppress(ImportError):
23
23
  @dataclass(frozen=True)
24
24
  class BalanceOutput(Output):
25
25
  """
26
- Output class for :func:`balance` bias metric
27
-
26
+ Output class for :func:`balance` :term:`bias<Bias>` metric.
27
+
28
28
  Attributes
29
29
  ----------
30
30
  balance : NDArray[np.float64]
@@ -119,22 +119,24 @@ def _validate_num_neighbors(num_neighbors: int) -> int:
119
119
 
120
120
  @set_metadata
121
121
  def balance(
122
- metadata: MetadataOutput,
122
+ metadata: Metadata,
123
123
  num_neighbors: int = 5,
124
124
  ) -> BalanceOutput:
125
125
  """
126
- Mutual information (MI) between factors (class label, metadata, label/image properties)
126
+ Mutual information (MI) between factors (class label, metadata, label/image properties).
127
127
 
128
128
  Parameters
129
129
  ----------
130
- metadata : MetadataOutput
131
- Output after running `metadata_preprocessing`
130
+ metadata : Metadata
131
+ Preprocessed metadata from :func:`dataeval.utils.metadata.preprocess`
132
+ num_neighbors : int, default 5
133
+ Number of points to consider as neighbors
132
134
 
133
135
  Returns
134
136
  -------
135
137
  BalanceOutput
136
- (num_factors+1) x (num_factors+1) estimate of mutual information
137
- between num_factors metadata factors and class label. Symmetry is enforced.
138
+ (num_factors+1) x (num_factors+1) estimate of mutual information \
139
+ between num_factors metadata factors and class label. Symmetry is enforced.
138
140
 
139
141
  Note
140
142
  ----
@@ -1,18 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ["CoverageOutput", "coverage"]
3
+ __all__ = []
4
4
 
5
5
  import contextlib
6
6
  import math
7
7
  from dataclasses import dataclass
8
- from typing import Literal
8
+ from typing import Any, Literal
9
9
 
10
10
  import numpy as np
11
11
  from numpy.typing import ArrayLike, NDArray
12
12
  from scipy.spatial.distance import pdist, squareform
13
13
 
14
14
  from dataeval.interop import to_numpy
15
- from dataeval.metrics.bias.metadata_utils import coverage_plot
16
15
  from dataeval.output import Output, set_metadata
17
16
  from dataeval.utils.shared import flatten
18
17
 
@@ -20,10 +19,59 @@ with contextlib.suppress(ImportError):
20
19
  from matplotlib.figure import Figure
21
20
 
22
21
 
22
+ def _plot(images: NDArray[Any], num_images: int) -> Figure:
23
+ """
24
+ Creates a single plot of all of the provided images
25
+
26
+ Parameters
27
+ ----------
28
+ images : NDArray
29
+ Array containing only the desired images to plot
30
+
31
+ Returns
32
+ -------
33
+ matplotlib.figure.Figure
34
+ Plot of all provided images
35
+ """
36
+ import matplotlib.pyplot as plt
37
+
38
+ num_images = min(num_images, len(images))
39
+
40
+ if images.ndim == 4:
41
+ images = np.moveaxis(images, 1, -1)
42
+ elif images.ndim == 3:
43
+ images = np.repeat(images[:, :, :, np.newaxis], 3, axis=-1)
44
+ else:
45
+ raise ValueError(
46
+ f"Expected a (N,C,H,W) or a (N, H, W) set of images, but got a {images.ndim}-dimensional set of images."
47
+ )
48
+
49
+ rows = int(np.ceil(num_images / 3))
50
+ fig, axs = plt.subplots(rows, 3, figsize=(9, 3 * rows))
51
+
52
+ if rows == 1:
53
+ for j in range(3):
54
+ if j >= len(images):
55
+ continue
56
+ axs[j].imshow(images[j])
57
+ axs[j].axis("off")
58
+ else:
59
+ for i in range(rows):
60
+ for j in range(3):
61
+ i_j = i * 3 + j
62
+ if i_j >= len(images):
63
+ continue
64
+ axs[i, j].imshow(images[i_j])
65
+ axs[i, j].axis("off")
66
+
67
+ fig.tight_layout()
68
+ return fig
69
+
70
+
23
71
  @dataclass(frozen=True)
24
72
  class CoverageOutput(Output):
25
73
  """
26
- Output class for :func:`coverage` :term:`bias<Bias>` metric
74
+ Output class for :func:`coverage` :term:`bias<Bias>` metric.
27
75
 
28
76
  Attributes
29
77
  ----------
@@ -62,7 +110,7 @@ class CoverageOutput(Output):
62
110
  selected_images = images[highest_uncovered_indices]
63
111
 
64
112
  # Plot the images
65
- fig = coverage_plot(selected_images, top_k)
113
+ fig = _plot(selected_images, top_k)
66
114
 
67
115
  return fig
68
116
 
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ["DiversityOutput", "diversity"]
3
+ __all__ = []
4
4
 
5
5
  import contextlib
6
6
  from dataclasses import dataclass
@@ -10,19 +10,48 @@ import numpy as np
10
10
  import scipy as sp
11
11
  from numpy.typing import ArrayLike, NDArray
12
12
 
13
- from dataeval.metrics.bias.metadata_preprocessing import MetadataOutput
14
- from dataeval.metrics.bias.metadata_utils import diversity_bar_plot, get_counts, heatmap
15
13
  from dataeval.output import Output, set_metadata
14
+ from dataeval.utils.metadata import Metadata, get_counts
15
+ from dataeval.utils.plot import heatmap
16
16
  from dataeval.utils.shared import get_method
17
17
 
18
18
  with contextlib.suppress(ImportError):
19
19
  from matplotlib.figure import Figure
20
20
 
21
21
 
22
+ def _plot(labels: NDArray[Any], bar_heights: NDArray[Any]) -> Figure:
23
+ """
24
+ Plots a formatted bar plot
25
+
26
+ Parameters
27
+ ----------
28
+ labels : NDArray
29
+ Array containing the labels for each bar
30
+ bar_heights : NDArray
31
+ Array containing the values for each bar
32
+
33
+ Returns
34
+ -------
35
+ matplotlib.figure.Figure
36
+ Bar plot figure
37
+ """
38
+ import matplotlib.pyplot as plt
39
+
40
+ fig, ax = plt.subplots(figsize=(10, 10))
41
+
42
+ ax.bar(labels, bar_heights)
43
+ ax.set_xlabel("Factors")
44
+
45
+ plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
46
+
47
+ fig.tight_layout()
48
+ return fig
49
+
50
+
22
51
  @dataclass(frozen=True)
23
52
  class DiversityOutput(Output):
24
53
  """
25
- Output class for :func:`diversity` :term:`bias<Bias>` metric
54
+ Output class for :func:`diversity` :term:`bias<Bias>` metric.
26
55
 
27
56
  Attributes
28
57
  ----------
@@ -77,8 +106,7 @@ class DiversityOutput(Output):
77
106
  else:
78
107
  # Creating label array for heat map axes
79
108
  heat_labels = np.concatenate((["class"], self.factor_names))
80
-
81
- fig = diversity_bar_plot(heat_labels, self.diversity_index)
109
+ fig = _plot(heat_labels, self.diversity_index)
82
110
 
83
111
  return fig
84
112
 
@@ -165,27 +193,26 @@ def diversity_simpson(
165
193
 
166
194
  @set_metadata
167
195
  def diversity(
168
- metadata: MetadataOutput,
196
+ metadata: Metadata,
169
197
  method: Literal["simpson", "shannon"] = "simpson",
170
198
  ) -> DiversityOutput:
171
199
  """
172
- Compute :term:`diversity<Diversity>` and classwise diversity for discrete/categorical variables and,
173
- through standard histogram binning, for continuous variables.
200
+ Compute :term:`diversity<Diversity>` and classwise diversity for \
201
+ discrete/categorical variables through standard histogram binning, \
202
+ for continuous variables.
174
203
 
175
- We define diversity as a normalized form of the inverse Simpson diversity index.
204
+ The method specified defines diversity as the inverse Simpson diversity index linearly rescaled to
205
+ the unit interval, or the normalized form of the Shannon entropy.
176
206
 
177
207
  diversity = 1 implies that samples are evenly distributed across a particular factor
178
208
  diversity = 0 implies that all samples belong to one category/bin
179
209
 
180
210
  Parameters
181
211
  ----------
182
- metadata : MetadataOutput
183
- Output after running `metadata_preprocessing`
184
-
185
- Note
186
- ----
187
- - The expression is undefined for q=1, but it approaches the Shannon entropy in the limit.
188
- - If there is only one category, the diversity index takes a value of 0.
212
+ metadata : Metadata
213
+ Preprocessed metadata from :func:`dataeval.utils.metadata.preprocess`
214
+ method : "simpson" or "shannon", default "simpson"
215
+ The methodology used for defining diversity
189
216
 
190
217
  Returns
191
218
  -------
@@ -193,27 +220,32 @@ def diversity(
193
220
  Diversity index per column of self.data or each factor in self.names and
194
221
  classwise diversity [n_class x n_factor]
195
222
 
223
+ Note
224
+ ----
225
+ - The expression is undefined for q=1, but it approaches the Shannon entropy in the limit.
226
+ - If there is only one category, the diversity index takes a value of 0.
227
+
196
228
  Example
197
229
  -------
198
- Compute Simpson diversity index of metadata and class labels
230
+ Compute the diversity index of metadata and class labels
199
231
 
200
232
  >>> div_simp = diversity(metadata, method="simpson")
201
233
  >>> div_simp.diversity_index
202
- array([0.72413793, 0.88636364, 0.72413793])
234
+ array([0.6 , 0.80882353, 1. , 0.8 ])
203
235
 
204
236
  >>> div_simp.classwise
205
- array([[0.69230769, 0.68965517],
206
- [0.5 , 0.8 ]])
237
+ array([[0.5 , 0.8 , 0.8 ],
238
+ [0.63043478, 0.97560976, 0.52830189]])
207
239
 
208
240
  Compute Shannon diversity index of metadata and class labels
209
241
 
210
242
  >>> div_shan = diversity(metadata, method="shannon")
211
243
  >>> div_shan.diversity_index
212
- array([0.8812909 , 0.96748876, 0.8812909 ])
244
+ array([0.81127812, 0.9426312 , 1. , 0.91829583])
213
245
 
214
246
  >>> div_shan.classwise
215
- array([[0.91651644, 0.86312057],
216
- [0.68260619, 0.91829583]])
247
+ array([[0.68260619, 0.91829583, 0.91829583],
248
+ [0.81443569, 0.99107606, 0.76420451]])
217
249
 
218
250
  See Also
219
251
  --------
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ["ParityOutput", "parity", "label_parity"]
3
+ __all__ = []
4
4
 
5
5
  import warnings
6
6
  from dataclasses import dataclass
@@ -12,8 +12,8 @@ from scipy.stats import chisquare
12
12
  from scipy.stats.contingency import chi2_contingency, crosstab
13
13
 
14
14
  from dataeval.interop import as_numpy, to_numpy
15
- from dataeval.metrics.bias.metadata_preprocessing import MetadataOutput
16
15
  from dataeval.output import Output, set_metadata
16
+ from dataeval.utils.metadata import Metadata
17
17
 
18
18
  TData = TypeVar("TData", np.float64, NDArray[np.float64])
19
19
 
@@ -21,7 +21,7 @@ TData = TypeVar("TData", np.float64, NDArray[np.float64])
21
21
  @dataclass(frozen=True)
22
22
  class ParityOutput(Generic[TData], Output):
23
23
  """
24
- Output class for :func:`parity` and :func:`label_parity` :term:`bias<Bias>` metrics
24
+ Output class for :func:`parity` and :func:`label_parity` :term:`bias<Bias>` metrics.
25
25
 
26
26
  Attributes
27
27
  ----------
@@ -123,8 +123,8 @@ def label_parity(
123
123
  num_classes: int | None = None,
124
124
  ) -> ParityOutput[np.float64]:
125
125
  """
126
- Calculate the chi-square statistic to assess the :term:`parity<Parity>` between expected and
127
- observed label distributions.
126
+ Calculate the chi-square statistic to assess the :term:`parity<Parity>` \
127
+ between expected and observed label distributions.
128
128
 
129
129
  This function computes the frequency distribution of classes in both expected and observed labels, normalizes
130
130
  the expected distribution to match the total number of observed labels, and then calculates the chi-square
@@ -167,8 +167,9 @@ def label_parity(
167
167
  --------
168
168
  Randomly creating some label distributions using ``np.random.default_rng``
169
169
 
170
- >>> expected_labels = np_random_gen.choice([0, 1, 2, 3, 4], (100))
171
- >>> observed_labels = np_random_gen.choice([2, 3, 0, 4, 1], (100))
170
+ >>> rng = np.random.default_rng(175)
171
+ >>> expected_labels = rng.choice([0, 1, 2, 3, 4], (100))
172
+ >>> observed_labels = rng.choice([2, 3, 0, 4, 1], (100))
172
173
  >>> label_parity(expected_labels, observed_labels)
173
174
  ParityOutput(score=14.007374204742625, p_value=0.0072715574616218, metadata_names=None)
174
175
  """
@@ -205,10 +206,10 @@ def label_parity(
205
206
 
206
207
 
207
208
  @set_metadata
208
- def parity(metadata: MetadataOutput) -> ParityOutput[NDArray[np.float64]]:
209
+ def parity(metadata: Metadata) -> ParityOutput[NDArray[np.float64]]:
209
210
  """
210
- Calculate chi-square statistics to assess the linear relationship between multiple factors
211
- and class labels.
211
+ Calculate chi-square statistics to assess the linear relationship \
212
+ between multiple factors and class labels.
212
213
 
213
214
  This function computes the chi-square statistic for each metadata factor to determine if there is
214
215
  a significant relationship between the factor values and class labels. The chi-square statistic is
@@ -216,8 +217,8 @@ def parity(metadata: MetadataOutput) -> ParityOutput[NDArray[np.float64]]:
216
217
 
217
218
  Parameters
218
219
  ----------
219
- metadata : MetadataOutput
220
- Output after running `metadata_preprocessing`
220
+ metadata : Metadata
221
+ Preprocessed metadata from :func:`dataeval.utils.metadata.preprocess`
221
222
 
222
223
  Returns
223
224
  -------
@@ -249,16 +250,18 @@ def parity(metadata: MetadataOutput) -> ParityOutput[NDArray[np.float64]]:
249
250
  --------
250
251
  Randomly creating some "continuous" and categorical variables using ``np.random.default_rng``
251
252
 
252
- >>> labels = np_random_gen.choice([0, 1, 2], (100))
253
+ >>> from dataeval.utils.metadata import preprocess
254
+ >>> rng = np.random.default_rng(175)
255
+ >>> labels = rng.choice([0, 1, 2], (100))
253
256
  >>> metadata_dict = [
254
257
  ... {
255
- ... "age": list(np_random_gen.choice([25, 30, 35, 45], (100))),
256
- ... "income": list(np_random_gen.choice([50000, 65000, 80000], (100))),
257
- ... "gender": list(np_random_gen.choice(["M", "F"], (100))),
258
+ ... "age": list(rng.choice([25, 30, 35, 45], (100))),
259
+ ... "income": list(rng.choice([50000, 65000, 80000], (100))),
260
+ ... "gender": list(rng.choice(["M", "F"], (100))),
258
261
  ... }
259
262
  ... ]
260
263
  >>> continuous_factor_bincounts = {"age": 4, "income": 3}
261
- >>> metadata = metadata_preprocessing(metadata_dict, labels, continuous_factor_bincounts)
264
+ >>> metadata = preprocess(metadata_dict, labels, continuous_factor_bincounts)
262
265
  >>> parity(metadata)
263
266
  ParityOutput(score=array([7.35731943, 5.46711299, 0.51506212]), p_value=array([0.28906231, 0.24263543, 0.77295762]), metadata_names=['age', 'income', 'gender'])
264
267
  """ # noqa: E501
@@ -2,8 +2,8 @@
2
2
  Estimators calculate performance bounds and the statistical distance between datasets.
3
3
  """
4
4
 
5
+ __all__ = ["ber", "divergence", "uap", "BEROutput", "DivergenceOutput", "UAPOutput"]
6
+
5
7
  from dataeval.metrics.estimators.ber import BEROutput, ber
6
8
  from dataeval.metrics.estimators.divergence import DivergenceOutput, divergence
7
9
  from dataeval.metrics.estimators.uap import UAPOutput, uap
8
-
9
- __all__ = ["ber", "divergence", "uap", "BEROutput", "DivergenceOutput", "UAPOutput"]
@@ -5,11 +5,12 @@ KNN based estimate for the :term:`Bayes error rate<Bayes Error Rate (BER)>`
5
5
 
6
6
  Learning to Bound the Multi-class Bayes Error (Th. 3 and Th. 4)
7
7
  https://arxiv.org/abs/1811.06419
8
+
8
9
  """
9
10
 
10
11
  from __future__ import annotations
11
12
 
12
- __all__ = ["BEROutput", "ber"]
13
+ __all__ = []
13
14
 
14
15
  from dataclasses import dataclass
15
16
  from typing import Literal
@@ -27,7 +28,7 @@ from dataeval.utils.shared import compute_neighbors, get_classes_counts, get_met
27
28
  @dataclass(frozen=True)
28
29
  class BEROutput(Output):
29
30
  """
30
- Output class for :func:`ber` estimator metric
31
+ Output class for :func:`ber` estimator metric.
31
32
 
32
33
  Attributes
33
34
  ----------
@@ -38,11 +39,12 @@ class BEROutput(Output):
38
39
  """
39
40
 
40
41
  ber: float
42
+
41
43
  ber_lower: float
42
44
 
43
45
 
44
46
  def ber_mst(images: NDArray[np.float64], labels: NDArray[np.int_], k: int = 1) -> tuple[float, float]:
45
- """Calculates the :term:`Bayes error rate<Bayes Error Rate (BER)>` using a minimum spanning tree
47
+ """Calculates the :term:`Bayes error rate<Bayes Error Rate (BER)>` using a minimum spanning tree.
46
48
 
47
49
  Parameters
48
50
  ----------
@@ -117,7 +119,8 @@ def knn_lowerbound(value: float, classes: int, k: int) -> float:
117
119
  @set_metadata
118
120
  def ber(images: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN", "MST"] = "KNN") -> BEROutput:
119
121
  """
120
- An estimator for Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` using FR or KNN test statistic basis
122
+ An estimator for Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` \
123
+ using FR or KNN test statistic basis.
121
124
 
122
125
  Parameters
123
126
  ----------
@@ -5,7 +5,7 @@ using the Fast Nearest Neighbor and Minimum Spanning Tree algorithms
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- __all__ = ["DivergenceOutput", "divergence"]
8
+ __all__ = []
9
9
 
10
10
  from dataclasses import dataclass
11
11
  from typing import Literal
@@ -21,7 +21,7 @@ from dataeval.utils.shared import compute_neighbors, get_method, minimum_spannin
21
21
  @dataclass(frozen=True)
22
22
  class DivergenceOutput(Output):
23
23
  """
24
- Output class for :func:`divergence` estimator metric
24
+ Output class for :func:`divergence` estimator metric.
25
25
 
26
26
  Attributes
27
27
  ----------
@@ -59,7 +59,7 @@ def divergence_mst(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
59
59
 
60
60
  def divergence_fnn(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
61
61
  """
62
- Calculates the estimated label errors based on their nearest neighbors
62
+ Calculates the estimated label errors based on their nearest neighbors.
63
63
 
64
64
  Parameters
65
65
  ----------
@@ -81,7 +81,7 @@ def divergence_fnn(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
81
81
  @set_metadata
82
82
  def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST"] = "FNN") -> DivergenceOutput:
83
83
  """
84
- Calculates the :term`divergence` and any errors between the datasets
84
+ Calculates the :term:`divergence` and any errors between the datasets.
85
85
 
86
86
  Parameters
87
87
  ----------
@@ -6,7 +6,7 @@ average precision<Upper-Bound Average Precision (UAP)>` using empirical mean pre
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- __all__ = ["UAPOutput", "uap"]
9
+ __all__ = []
10
10
 
11
11
  from dataclasses import dataclass
12
12
 
@@ -20,7 +20,7 @@ from dataeval.output import Output, set_metadata
20
20
  @dataclass(frozen=True)
21
21
  class UAPOutput(Output):
22
22
  """
23
- Output class for :func:`uap` estimator metric
23
+ Output class for :func:`uap` estimator metric.
24
24
 
25
25
  Attributes
26
26
  ----------
@@ -34,8 +34,8 @@ class UAPOutput(Output):
34
34
  @set_metadata
35
35
  def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
36
36
  """
37
- FR Test Statistic based estimate of the empirical mean precision for
38
- the upperbound average precision
37
+ FR Test Statistic based estimate of the empirical mean precision for the \
38
+ upperbound average precision.
39
39
 
40
40
  Parameters
41
41
  ----------
@@ -1,8 +1,26 @@
1
1
  """
2
- Statistics metrics calculate a variety of image properties and pixel statistics
2
+ Statistics metrics calculate a variety of image properties and pixel statistics \
3
3
  and label statistics against the images and labels of a dataset.
4
4
  """
5
5
 
6
+ __all__ = [
7
+ "ChannelStatsOutput",
8
+ "DatasetStatsOutput",
9
+ "DimensionStatsOutput",
10
+ "HashStatsOutput",
11
+ "LabelStatsOutput",
12
+ "PixelStatsOutput",
13
+ "VisualStatsOutput",
14
+ "boxratiostats",
15
+ "channelstats",
16
+ "datasetstats",
17
+ "dimensionstats",
18
+ "hashstats",
19
+ "labelstats",
20
+ "pixelstats",
21
+ "visualstats",
22
+ ]
23
+
6
24
  from dataeval.metrics.stats.boxratiostats import boxratiostats
7
25
  from dataeval.metrics.stats.datasetstats import (
8
26
  ChannelStatsOutput,
@@ -15,21 +33,3 @@ from dataeval.metrics.stats.hashstats import HashStatsOutput, hashstats
15
33
  from dataeval.metrics.stats.labelstats import LabelStatsOutput, labelstats
16
34
  from dataeval.metrics.stats.pixelstats import PixelStatsOutput, pixelstats
17
35
  from dataeval.metrics.stats.visualstats import VisualStatsOutput, visualstats
18
-
19
- __all__ = [
20
- "boxratiostats",
21
- "channelstats",
22
- "datasetstats",
23
- "dimensionstats",
24
- "hashstats",
25
- "labelstats",
26
- "pixelstats",
27
- "visualstats",
28
- "ChannelStatsOutput",
29
- "DatasetStatsOutput",
30
- "DimensionStatsOutput",
31
- "HashStatsOutput",
32
- "LabelStatsOutput",
33
- "PixelStatsOutput",
34
- "VisualStatsOutput",
35
- ]