dataeval 0.81.0__py3-none-any.whl → 0.82.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/config.py +68 -11
- dataeval/detectors/drift/__init__.py +2 -2
- dataeval/detectors/drift/_base.py +8 -64
- dataeval/detectors/drift/_mmd.py +12 -38
- dataeval/detectors/drift/_torch.py +7 -7
- dataeval/detectors/drift/_uncertainty.py +6 -5
- dataeval/detectors/drift/updates.py +20 -3
- dataeval/detectors/linters/__init__.py +3 -2
- dataeval/detectors/linters/duplicates.py +14 -46
- dataeval/detectors/linters/outliers.py +25 -159
- dataeval/detectors/ood/__init__.py +1 -1
- dataeval/detectors/ood/ae.py +6 -5
- dataeval/detectors/ood/base.py +2 -2
- dataeval/detectors/ood/metadata_ood_mi.py +4 -6
- dataeval/detectors/ood/mixin.py +3 -4
- dataeval/detectors/ood/vae.py +3 -2
- dataeval/metadata/__init__.py +2 -1
- dataeval/metadata/_distance.py +134 -0
- dataeval/metadata/_ood.py +30 -49
- dataeval/metadata/_utils.py +44 -0
- dataeval/metrics/bias/__init__.py +5 -4
- dataeval/metrics/bias/_balance.py +17 -149
- dataeval/metrics/bias/_coverage.py +4 -106
- dataeval/metrics/bias/_diversity.py +12 -107
- dataeval/metrics/bias/_parity.py +7 -71
- dataeval/metrics/estimators/__init__.py +5 -4
- dataeval/metrics/estimators/_ber.py +2 -20
- dataeval/metrics/estimators/_clusterer.py +1 -61
- dataeval/metrics/estimators/_divergence.py +2 -19
- dataeval/metrics/estimators/_uap.py +2 -16
- dataeval/metrics/stats/__init__.py +15 -12
- dataeval/metrics/stats/_base.py +41 -128
- dataeval/metrics/stats/_boxratiostats.py +13 -13
- dataeval/metrics/stats/_dimensionstats.py +17 -58
- dataeval/metrics/stats/_hashstats.py +19 -35
- dataeval/metrics/stats/_imagestats.py +94 -0
- dataeval/metrics/stats/_labelstats.py +42 -121
- dataeval/metrics/stats/_pixelstats.py +19 -51
- dataeval/metrics/stats/_visualstats.py +19 -51
- dataeval/outputs/__init__.py +57 -0
- dataeval/outputs/_base.py +182 -0
- dataeval/outputs/_bias.py +381 -0
- dataeval/outputs/_drift.py +83 -0
- dataeval/outputs/_estimators.py +114 -0
- dataeval/outputs/_linters.py +186 -0
- dataeval/outputs/_metadata.py +54 -0
- dataeval/{detectors/ood/output.py → outputs/_ood.py} +22 -22
- dataeval/outputs/_stats.py +393 -0
- dataeval/outputs/_utils.py +44 -0
- dataeval/outputs/_workflows.py +364 -0
- dataeval/typing.py +187 -7
- dataeval/utils/_method.py +1 -5
- dataeval/utils/_plot.py +2 -2
- dataeval/utils/data/__init__.py +5 -1
- dataeval/utils/data/_dataset.py +217 -0
- dataeval/utils/data/_embeddings.py +12 -14
- dataeval/utils/data/_images.py +30 -27
- dataeval/utils/data/_metadata.py +28 -11
- dataeval/utils/data/_selection.py +25 -22
- dataeval/utils/data/_split.py +5 -29
- dataeval/utils/data/_targets.py +14 -2
- dataeval/utils/data/datasets/_base.py +5 -5
- dataeval/utils/data/datasets/_cifar10.py +1 -1
- dataeval/utils/data/datasets/_milco.py +1 -1
- dataeval/utils/data/datasets/_mnist.py +1 -1
- dataeval/utils/data/datasets/_ships.py +1 -1
- dataeval/utils/data/{_types.py → datasets/_types.py} +10 -16
- dataeval/utils/data/datasets/_voc.py +1 -1
- dataeval/utils/data/selections/_classfilter.py +4 -5
- dataeval/utils/data/selections/_indices.py +2 -2
- dataeval/utils/data/selections/_limit.py +2 -2
- dataeval/utils/data/selections/_reverse.py +2 -2
- dataeval/utils/data/selections/_shuffle.py +2 -2
- dataeval/utils/torch/_internal.py +5 -5
- dataeval/utils/torch/trainer.py +8 -8
- dataeval/workflows/__init__.py +2 -1
- dataeval/workflows/sufficiency.py +6 -342
- {dataeval-0.81.0.dist-info → dataeval-0.82.1.dist-info}/METADATA +2 -2
- dataeval-0.82.1.dist-info/RECORD +105 -0
- dataeval/_output.py +0 -137
- dataeval/detectors/ood/metadata_ks_compare.py +0 -129
- dataeval/metrics/stats/_datasetstats.py +0 -198
- dataeval-0.81.0.dist-info/RECORD +0 -94
- {dataeval-0.81.0.dist-info → dataeval-0.82.1.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.81.0.dist-info → dataeval-0.82.1.dist-info}/WHEEL +0 -0
@@ -2,116 +2,18 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
__all__ = []
|
4
4
|
|
5
|
-
import
|
6
|
-
from dataclasses import dataclass
|
7
|
-
from typing import Any, Literal
|
5
|
+
from typing import Literal
|
8
6
|
|
9
7
|
import numpy as np
|
10
8
|
import scipy as sp
|
11
9
|
from numpy.typing import NDArray
|
12
10
|
|
13
|
-
from dataeval.
|
14
|
-
from dataeval.
|
11
|
+
from dataeval.outputs import DiversityOutput
|
12
|
+
from dataeval.outputs._base import set_metadata
|
15
13
|
from dataeval.utils._bin import get_counts
|
16
14
|
from dataeval.utils._method import get_method
|
17
|
-
from dataeval.utils._plot import heatmap
|
18
15
|
from dataeval.utils.data import Metadata
|
19
16
|
|
20
|
-
with contextlib.suppress(ImportError):
|
21
|
-
from matplotlib.figure import Figure
|
22
|
-
|
23
|
-
|
24
|
-
def _plot(labels: NDArray[Any], bar_heights: NDArray[Any]) -> Figure:
|
25
|
-
"""
|
26
|
-
Plots a formatted bar plot
|
27
|
-
|
28
|
-
Parameters
|
29
|
-
----------
|
30
|
-
labels : NDArray
|
31
|
-
Array containing the labels for each bar
|
32
|
-
bar_heights : NDArray
|
33
|
-
Array containing the values for each bar
|
34
|
-
|
35
|
-
Returns
|
36
|
-
-------
|
37
|
-
matplotlib.figure.Figure
|
38
|
-
Bar plot figure
|
39
|
-
"""
|
40
|
-
import matplotlib.pyplot as plt
|
41
|
-
|
42
|
-
fig, ax = plt.subplots(figsize=(8, 8))
|
43
|
-
|
44
|
-
ax.bar(labels, bar_heights)
|
45
|
-
ax.set_xlabel("Factors")
|
46
|
-
|
47
|
-
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
|
48
|
-
|
49
|
-
fig.tight_layout()
|
50
|
-
return fig
|
51
|
-
|
52
|
-
|
53
|
-
@dataclass(frozen=True)
|
54
|
-
class DiversityOutput(Output):
|
55
|
-
"""
|
56
|
-
Output class for :func:`.diversity` :term:`bias<Bias>` metric.
|
57
|
-
|
58
|
-
Attributes
|
59
|
-
----------
|
60
|
-
diversity_index : NDArray[np.double]
|
61
|
-
:term:`Diversity` index for classes and factors
|
62
|
-
classwise : NDArray[np.double]
|
63
|
-
Classwise diversity index [n_class x n_factor]
|
64
|
-
factor_names : list[str]
|
65
|
-
Names of each metadata factor
|
66
|
-
class_names : list[str]
|
67
|
-
Class labels for each value in the dataset
|
68
|
-
"""
|
69
|
-
|
70
|
-
diversity_index: NDArray[np.double]
|
71
|
-
classwise: NDArray[np.double]
|
72
|
-
factor_names: list[str]
|
73
|
-
class_names: list[str]
|
74
|
-
|
75
|
-
def plot(
|
76
|
-
self,
|
77
|
-
row_labels: ArrayLike | None = None,
|
78
|
-
col_labels: ArrayLike | None = None,
|
79
|
-
plot_classwise: bool = False,
|
80
|
-
) -> Figure:
|
81
|
-
"""
|
82
|
-
Plot a heatmap of diversity information
|
83
|
-
|
84
|
-
Parameters
|
85
|
-
----------
|
86
|
-
row_labels : ArrayLike or None, default None
|
87
|
-
List/Array containing the labels for rows in the histogram
|
88
|
-
col_labels : ArrayLike or None, default None
|
89
|
-
List/Array containing the labels for columns in the histogram
|
90
|
-
plot_classwise : bool, default False
|
91
|
-
Whether to plot per-class balance instead of global balance
|
92
|
-
"""
|
93
|
-
if plot_classwise:
|
94
|
-
if row_labels is None:
|
95
|
-
row_labels = self.class_names
|
96
|
-
if col_labels is None:
|
97
|
-
col_labels = self.factor_names
|
98
|
-
|
99
|
-
fig = heatmap(
|
100
|
-
self.classwise,
|
101
|
-
row_labels,
|
102
|
-
col_labels,
|
103
|
-
xlabel="Factors",
|
104
|
-
ylabel="Class",
|
105
|
-
cbarlabel=f"Normalized {self.meta()['arguments']['method'].title()} Index",
|
106
|
-
)
|
107
|
-
|
108
|
-
else:
|
109
|
-
# Creating label array for heat map axes
|
110
|
-
heat_labels = np.concatenate((["class"], self.factor_names))
|
111
|
-
fig = _plot(heat_labels, self.diversity_index)
|
112
|
-
|
113
|
-
return fig
|
114
|
-
|
115
17
|
|
116
18
|
def diversity_shannon(
|
117
19
|
counts: NDArray[np.int_],
|
@@ -236,26 +138,29 @@ def diversity(
|
|
236
138
|
|
237
139
|
>>> div_simp = diversity(metadata, method="simpson")
|
238
140
|
>>> div_simp.diversity_index
|
239
|
-
array([0.6
|
141
|
+
array([0.6 , 0.809, 1. , 0.8 ])
|
240
142
|
|
241
143
|
>>> div_simp.classwise
|
242
|
-
array([[0.5
|
243
|
-
[0.
|
144
|
+
array([[0.5 , 0.8 , 0.8 ],
|
145
|
+
[0.63 , 0.976, 0.528]])
|
244
146
|
|
245
147
|
Compute Shannon diversity index of metadata and class labels
|
246
148
|
|
247
149
|
>>> div_shan = diversity(metadata, method="shannon")
|
248
150
|
>>> div_shan.diversity_index
|
249
|
-
array([0.
|
151
|
+
array([0.811, 0.943, 1. , 0.918])
|
250
152
|
|
251
153
|
>>> div_shan.classwise
|
252
|
-
array([[0.
|
253
|
-
[0.
|
154
|
+
array([[0.683, 0.918, 0.918],
|
155
|
+
[0.814, 0.991, 0.764]])
|
254
156
|
|
255
157
|
See Also
|
256
158
|
--------
|
257
159
|
scipy.stats.entropy
|
258
160
|
"""
|
161
|
+
if not metadata.discrete_factor_names and not metadata.continuous_factor_names:
|
162
|
+
raise ValueError("No factors found in provided metadata.")
|
163
|
+
|
259
164
|
diversity_fn = get_method(_DIVERSITY_FN_MAP, method)
|
260
165
|
discretized_data = np.hstack((metadata.class_labels[:, np.newaxis], metadata.discrete_data))
|
261
166
|
cnts = get_counts(discretized_data)
|
dataeval/metrics/bias/_parity.py
CHANGED
@@ -2,87 +2,20 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
__all__ = []
|
4
4
|
|
5
|
-
import contextlib
|
6
5
|
import warnings
|
7
|
-
from
|
8
|
-
from typing import Any, Generic, TypeVar
|
6
|
+
from typing import Any
|
9
7
|
|
10
8
|
import numpy as np
|
11
9
|
from numpy.typing import NDArray
|
12
10
|
from scipy.stats import chisquare
|
13
11
|
from scipy.stats.contingency import chi2_contingency, crosstab
|
14
12
|
|
15
|
-
from dataeval.
|
13
|
+
from dataeval.outputs import LabelParityOutput, ParityOutput
|
14
|
+
from dataeval.outputs._base import set_metadata
|
16
15
|
from dataeval.typing import ArrayLike
|
17
16
|
from dataeval.utils._array import as_numpy
|
18
17
|
from dataeval.utils.data import Metadata
|
19
18
|
|
20
|
-
with contextlib.suppress(ImportError):
|
21
|
-
import pandas as pd
|
22
|
-
|
23
|
-
TData = TypeVar("TData", np.float64, NDArray[np.float64])
|
24
|
-
|
25
|
-
|
26
|
-
@dataclass(frozen=True)
|
27
|
-
class BaseParityOutput(Generic[TData], Output):
|
28
|
-
score: TData
|
29
|
-
p_value: TData
|
30
|
-
|
31
|
-
def to_dataframe(self) -> pd.DataFrame:
|
32
|
-
"""
|
33
|
-
Exports the parity output results to a pandas DataFrame.
|
34
|
-
|
35
|
-
Returns
|
36
|
-
-------
|
37
|
-
pd.DataFrame
|
38
|
-
"""
|
39
|
-
import pandas as pd
|
40
|
-
|
41
|
-
return pd.DataFrame(
|
42
|
-
index=self.factor_names, # type: ignore - list[str] is documented as acceptable index type
|
43
|
-
data={
|
44
|
-
"score": self.score.round(2),
|
45
|
-
"p-value": self.p_value.round(2),
|
46
|
-
},
|
47
|
-
)
|
48
|
-
|
49
|
-
|
50
|
-
@dataclass(frozen=True)
|
51
|
-
class LabelParityOutput(BaseParityOutput[np.float64]):
|
52
|
-
"""
|
53
|
-
Output class for :func:`.label_parity` :term:`bias<Bias>` metrics.
|
54
|
-
|
55
|
-
Attributes
|
56
|
-
----------
|
57
|
-
score : np.float64
|
58
|
-
chi-squared score(s) of the test
|
59
|
-
p_value : np.float64
|
60
|
-
p-value(s) of the test
|
61
|
-
"""
|
62
|
-
|
63
|
-
|
64
|
-
@dataclass(frozen=True)
|
65
|
-
class ParityOutput(BaseParityOutput[NDArray[np.float64]]):
|
66
|
-
"""
|
67
|
-
Output class for :func:`.parity` :term:`bias<Bias>` metrics.
|
68
|
-
|
69
|
-
Attributes
|
70
|
-
----------
|
71
|
-
score : NDArray[np.float64]
|
72
|
-
chi-squared score(s) of the test
|
73
|
-
p_value : NDArray[np.float64]
|
74
|
-
p-value(s) of the test
|
75
|
-
factor_names : list[str]
|
76
|
-
Names of each metadata factor
|
77
|
-
insufficient_data: dict
|
78
|
-
Dictionary of metadata factors with less than 5 class occurrences per value
|
79
|
-
"""
|
80
|
-
|
81
|
-
# score: NDArray[np.float64]
|
82
|
-
# p_value: NDArray[np.float64]
|
83
|
-
factor_names: list[str]
|
84
|
-
insufficient_data: dict[str, dict[int, dict[str, int]]]
|
85
|
-
|
86
19
|
|
87
20
|
def normalize_expected_dist(expected_dist: NDArray[Any], observed_dist: NDArray[Any]) -> NDArray[Any]:
|
88
21
|
"""
|
@@ -306,8 +239,11 @@ def parity(metadata: Metadata) -> ParityOutput:
|
|
306
239
|
... random_seed=175)
|
307
240
|
>>> metadata.continuous_factor_bins = {"age": 4, "income": 3}
|
308
241
|
>>> parity(metadata)
|
309
|
-
ParityOutput(score=array([7.
|
242
|
+
ParityOutput(score=array([7.357, 5.467, 0.515]), p_value=array([0.289, 0.243, 0.773]), factor_names=['age', 'income', 'gender'], insufficient_data={'age': {3: {'artist': 4}, 4: {'artist': 4, 'teacher': 3}}, 'income': {1: {'artist': 3}}})
|
310
243
|
""" # noqa: E501
|
244
|
+
if not metadata.discrete_factor_names and not metadata.continuous_factor_names:
|
245
|
+
raise ValueError("No factors found in provided metadata.")
|
246
|
+
|
311
247
|
chi_scores = np.zeros(metadata.discrete_data.shape[1])
|
312
248
|
p_values = np.zeros_like(chi_scores)
|
313
249
|
insufficient_data = {}
|
@@ -13,7 +13,8 @@ __all__ = [
|
|
13
13
|
"UAPOutput",
|
14
14
|
]
|
15
15
|
|
16
|
-
from dataeval.metrics.estimators._ber import
|
17
|
-
from dataeval.metrics.estimators._clusterer import
|
18
|
-
from dataeval.metrics.estimators._divergence import
|
19
|
-
from dataeval.metrics.estimators._uap import
|
16
|
+
from dataeval.metrics.estimators._ber import ber
|
17
|
+
from dataeval.metrics.estimators._clusterer import clusterer
|
18
|
+
from dataeval.metrics.estimators._divergence import divergence
|
19
|
+
from dataeval.metrics.estimators._uap import uap
|
20
|
+
from dataeval.outputs._estimators import BEROutput, ClustererOutput, DivergenceOutput, UAPOutput
|
@@ -12,7 +12,6 @@ from __future__ import annotations
|
|
12
12
|
|
13
13
|
__all__ = []
|
14
14
|
|
15
|
-
from dataclasses import dataclass
|
16
15
|
from typing import Literal
|
17
16
|
|
18
17
|
import numpy as np
|
@@ -20,31 +19,14 @@ from numpy.typing import NDArray
|
|
20
19
|
from scipy.sparse import coo_matrix
|
21
20
|
from scipy.stats import mode
|
22
21
|
|
23
|
-
from dataeval.
|
22
|
+
from dataeval.outputs import BEROutput
|
23
|
+
from dataeval.outputs._base import set_metadata
|
24
24
|
from dataeval.typing import ArrayLike
|
25
25
|
from dataeval.utils._array import as_numpy, ensure_embeddings
|
26
26
|
from dataeval.utils._method import get_method
|
27
27
|
from dataeval.utils._mst import compute_neighbors, minimum_spanning_tree
|
28
28
|
|
29
29
|
|
30
|
-
@dataclass(frozen=True)
|
31
|
-
class BEROutput(Output):
|
32
|
-
"""
|
33
|
-
Output class for :func:`.ber` estimator metric.
|
34
|
-
|
35
|
-
Attributes
|
36
|
-
----------
|
37
|
-
ber : float
|
38
|
-
The upper bounds of the :term:`Bayes error rate<Bayes Error Rate (BER)>`
|
39
|
-
ber_lower : float
|
40
|
-
The lower bounds of the Bayes Error Rate
|
41
|
-
"""
|
42
|
-
|
43
|
-
ber: float
|
44
|
-
|
45
|
-
ber_lower: float
|
46
|
-
|
47
|
-
|
48
30
|
def ber_mst(images: NDArray[np.float64], labels: NDArray[np.int_], k: int = 1) -> tuple[float, float]:
|
49
31
|
"""Calculates the :term:`Bayes error rate<Bayes Error Rate (BER)>` using a minimum spanning tree.
|
50
32
|
|
@@ -2,72 +2,12 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
__all__ = []
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
5
|
|
7
|
-
|
8
|
-
from numpy.typing import NDArray
|
9
|
-
|
10
|
-
from dataeval._output import Output
|
6
|
+
from dataeval.outputs import ClustererOutput
|
11
7
|
from dataeval.typing import ArrayLike
|
12
8
|
from dataeval.utils._array import as_numpy
|
13
9
|
|
14
10
|
|
15
|
-
@dataclass(frozen=True)
|
16
|
-
class ClustererOutput(Output):
|
17
|
-
"""
|
18
|
-
Output class for :func:`.clusterer`.
|
19
|
-
|
20
|
-
Attributes
|
21
|
-
----------
|
22
|
-
clusters : NDArray[int]
|
23
|
-
Assigned clusters
|
24
|
-
mst : NDArray[int]
|
25
|
-
The minimum spanning tree of the data
|
26
|
-
linkage_tree : NDArray[float]
|
27
|
-
The linkage array of the data
|
28
|
-
condensed_tree : NDArray[float]
|
29
|
-
The condensed tree of the data
|
30
|
-
membership_strengths : NDArray[float]
|
31
|
-
The strength of the data point belonging to the assigned cluster
|
32
|
-
"""
|
33
|
-
|
34
|
-
clusters: NDArray[np.int_]
|
35
|
-
mst: NDArray[np.double]
|
36
|
-
linkage_tree: NDArray[np.double]
|
37
|
-
condensed_tree: NDArray[np.double]
|
38
|
-
membership_strengths: NDArray[np.double]
|
39
|
-
|
40
|
-
def find_outliers(self) -> NDArray[np.int_]:
|
41
|
-
"""
|
42
|
-
Retrieves Outliers based on when the sample was added to the cluster
|
43
|
-
and how far it was from the cluster when it was added
|
44
|
-
|
45
|
-
Returns
|
46
|
-
-------
|
47
|
-
NDArray[int]
|
48
|
-
A numpy array of the outlier indices
|
49
|
-
"""
|
50
|
-
return np.nonzero(self.clusters == -1)[0]
|
51
|
-
|
52
|
-
def find_duplicates(self) -> tuple[list[list[int]], list[list[int]]]:
|
53
|
-
"""
|
54
|
-
Finds duplicate and near duplicate data based on cluster average distance
|
55
|
-
|
56
|
-
Returns
|
57
|
-
-------
|
58
|
-
Tuple[List[List[int]], List[List[int]]]
|
59
|
-
The exact :term:`duplicates<Duplicates>` and near duplicates as lists of related indices
|
60
|
-
"""
|
61
|
-
# Delay load numba compiled functions
|
62
|
-
from dataeval.utils._clusterer import compare_links_to_cluster_std, sorted_union_find
|
63
|
-
|
64
|
-
exact_indices, near_indices = compare_links_to_cluster_std(self.mst, self.clusters)
|
65
|
-
exact_dupes = sorted_union_find(exact_indices)
|
66
|
-
near_dupes = sorted_union_find(near_indices)
|
67
|
-
|
68
|
-
return [[int(ii) for ii in il] for il in exact_dupes], [[int(ii) for ii in il] for il in near_dupes]
|
69
|
-
|
70
|
-
|
71
11
|
def clusterer(data: ArrayLike) -> ClustererOutput:
|
72
12
|
"""
|
73
13
|
Uses hierarchical clustering on the flattened data and returns clustering
|
@@ -7,36 +7,19 @@ from __future__ import annotations
|
|
7
7
|
|
8
8
|
__all__ = []
|
9
9
|
|
10
|
-
from dataclasses import dataclass
|
11
10
|
from typing import Literal
|
12
11
|
|
13
12
|
import numpy as np
|
14
13
|
from numpy.typing import NDArray
|
15
14
|
|
16
|
-
from dataeval.
|
15
|
+
from dataeval.outputs import DivergenceOutput
|
16
|
+
from dataeval.outputs._base import set_metadata
|
17
17
|
from dataeval.typing import ArrayLike
|
18
18
|
from dataeval.utils._array import ensure_embeddings
|
19
19
|
from dataeval.utils._method import get_method
|
20
20
|
from dataeval.utils._mst import compute_neighbors, minimum_spanning_tree
|
21
21
|
|
22
22
|
|
23
|
-
@dataclass(frozen=True)
|
24
|
-
class DivergenceOutput(Output):
|
25
|
-
"""
|
26
|
-
Output class for :func:`.divergence` estimator metric.
|
27
|
-
|
28
|
-
Attributes
|
29
|
-
----------
|
30
|
-
divergence : float
|
31
|
-
:term:`Divergence` value calculated between 2 datasets ranging between 0.0 and 1.0
|
32
|
-
errors : int
|
33
|
-
The number of differing edges between the datasets
|
34
|
-
"""
|
35
|
-
|
36
|
-
divergence: float
|
37
|
-
errors: int
|
38
|
-
|
39
|
-
|
40
23
|
def divergence_mst(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
|
41
24
|
"""
|
42
25
|
Calculates the estimated label errors based on the minimum spanning tree
|
@@ -8,29 +8,15 @@ from __future__ import annotations
|
|
8
8
|
|
9
9
|
__all__ = []
|
10
10
|
|
11
|
-
from dataclasses import dataclass
|
12
11
|
|
13
12
|
from sklearn.metrics import average_precision_score
|
14
13
|
|
15
|
-
from dataeval.
|
14
|
+
from dataeval.outputs import UAPOutput
|
15
|
+
from dataeval.outputs._base import set_metadata
|
16
16
|
from dataeval.typing import ArrayLike
|
17
17
|
from dataeval.utils._array import as_numpy
|
18
18
|
|
19
19
|
|
20
|
-
@dataclass(frozen=True)
|
21
|
-
class UAPOutput(Output):
|
22
|
-
"""
|
23
|
-
Output class for :func:`.uap` estimator metric.
|
24
|
-
|
25
|
-
Attributes
|
26
|
-
----------
|
27
|
-
uap : float
|
28
|
-
The empirical mean precision estimate
|
29
|
-
"""
|
30
|
-
|
31
|
-
uap: float
|
32
|
-
|
33
|
-
|
34
20
|
@set_metadata
|
35
21
|
def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
|
36
22
|
"""
|
@@ -5,15 +5,14 @@ and label statistics against the images and labels of a dataset.
|
|
5
5
|
|
6
6
|
__all__ = [
|
7
7
|
"ChannelStatsOutput",
|
8
|
-
"
|
8
|
+
"ImageStatsOutput",
|
9
9
|
"DimensionStatsOutput",
|
10
10
|
"HashStatsOutput",
|
11
11
|
"LabelStatsOutput",
|
12
12
|
"PixelStatsOutput",
|
13
13
|
"VisualStatsOutput",
|
14
14
|
"boxratiostats",
|
15
|
-
"
|
16
|
-
"datasetstats",
|
15
|
+
"imagestats",
|
17
16
|
"dimensionstats",
|
18
17
|
"hashstats",
|
19
18
|
"labelstats",
|
@@ -22,14 +21,18 @@ __all__ = [
|
|
22
21
|
]
|
23
22
|
|
24
23
|
from dataeval.metrics.stats._boxratiostats import boxratiostats
|
25
|
-
from dataeval.metrics.stats.
|
24
|
+
from dataeval.metrics.stats._dimensionstats import dimensionstats
|
25
|
+
from dataeval.metrics.stats._hashstats import hashstats
|
26
|
+
from dataeval.metrics.stats._imagestats import imagestats
|
27
|
+
from dataeval.metrics.stats._labelstats import labelstats
|
28
|
+
from dataeval.metrics.stats._pixelstats import pixelstats
|
29
|
+
from dataeval.metrics.stats._visualstats import visualstats
|
30
|
+
from dataeval.outputs._stats import (
|
26
31
|
ChannelStatsOutput,
|
27
|
-
|
28
|
-
|
29
|
-
|
32
|
+
DimensionStatsOutput,
|
33
|
+
HashStatsOutput,
|
34
|
+
ImageStatsOutput,
|
35
|
+
LabelStatsOutput,
|
36
|
+
PixelStatsOutput,
|
37
|
+
VisualStatsOutput,
|
30
38
|
)
|
31
|
-
from dataeval.metrics.stats._dimensionstats import DimensionStatsOutput, dimensionstats
|
32
|
-
from dataeval.metrics.stats._hashstats import HashStatsOutput, hashstats
|
33
|
-
from dataeval.metrics.stats._labelstats import LabelStatsOutput, labelstats
|
34
|
-
from dataeval.metrics.stats._pixelstats import PixelStatsOutput, pixelstats
|
35
|
-
from dataeval.metrics.stats._visualstats import VisualStatsOutput, visualstats
|