dataeval 0.61.0__py3-none-any.whl → 0.63.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. dataeval/__init__.py +1 -1
  2. dataeval/_internal/detectors/clusterer.py +44 -16
  3. dataeval/_internal/detectors/drift/base.py +14 -12
  4. dataeval/_internal/detectors/drift/cvm.py +11 -8
  5. dataeval/_internal/detectors/drift/ks.py +6 -3
  6. dataeval/_internal/detectors/drift/mmd.py +14 -12
  7. dataeval/_internal/detectors/drift/uncertainty.py +7 -5
  8. dataeval/_internal/detectors/duplicates.py +35 -12
  9. dataeval/_internal/detectors/linter.py +85 -16
  10. dataeval/_internal/detectors/ood/ae.py +6 -5
  11. dataeval/_internal/detectors/ood/aegmm.py +5 -5
  12. dataeval/_internal/detectors/ood/base.py +14 -13
  13. dataeval/_internal/detectors/ood/llr.py +6 -4
  14. dataeval/_internal/detectors/ood/vae.py +5 -4
  15. dataeval/_internal/detectors/ood/vaegmm.py +5 -4
  16. dataeval/_internal/functional/__init__.py +0 -0
  17. dataeval/_internal/functional/ber.py +63 -0
  18. dataeval/_internal/functional/coverage.py +75 -0
  19. dataeval/_internal/functional/divergence.py +16 -0
  20. dataeval/_internal/{metrics → functional}/hash.py +1 -1
  21. dataeval/_internal/functional/metadata.py +136 -0
  22. dataeval/_internal/functional/metadataparity.py +190 -0
  23. dataeval/_internal/functional/uap.py +6 -0
  24. dataeval/_internal/interop.py +52 -0
  25. dataeval/_internal/maite/__init__.py +0 -0
  26. dataeval/_internal/maite/utils.py +30 -0
  27. dataeval/_internal/metrics/base.py +2 -2
  28. dataeval/_internal/metrics/ber.py +16 -66
  29. dataeval/_internal/metrics/coverage.py +51 -35
  30. dataeval/_internal/metrics/divergence.py +50 -42
  31. dataeval/_internal/metrics/metadata.py +610 -0
  32. dataeval/_internal/metrics/metadataparity.py +67 -0
  33. dataeval/_internal/metrics/parity.py +40 -56
  34. dataeval/_internal/metrics/stats.py +46 -35
  35. dataeval/_internal/metrics/uap.py +14 -17
  36. dataeval/_internal/workflows/__init__.py +0 -0
  37. dataeval/metrics/__init__.py +2 -1
  38. {dataeval-0.61.0.dist-info → dataeval-0.63.0.dist-info}/METADATA +1 -2
  39. dataeval-0.63.0.dist-info/RECORD +68 -0
  40. dataeval-0.61.0.dist-info/RECORD +0 -55
  41. /dataeval/_internal/{metrics → functional}/utils.py +0 -0
  42. {dataeval-0.61.0.dist-info → dataeval-0.63.0.dist-info}/LICENSE.txt +0 -0
  43. {dataeval-0.61.0.dist-info → dataeval-0.63.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,136 @@
1
+ from typing import Dict, List
2
+
3
+ import numpy as np
4
+ from scipy.stats import entropy
5
+
6
+
7
+ def _get_counts(
8
+ data: np.ndarray, names: list[str], is_categorical: List, subset_mask: np.ndarray = np.empty(shape=0)
9
+ ) -> tuple[Dict, Dict]:
10
+ """
11
+ Initialize dictionary of histogram counts --- treat categorical values
12
+ as histogram bins.
13
+
14
+ Parameters
15
+ ----------
16
+ subset_mask: Optional[np.ndarray[bool]]
17
+ Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
18
+
19
+ Returns
20
+ -------
21
+ counts: Dict
22
+ histogram counts per metadata factor in `factors`. Each
23
+ factor will have a different number of bins. Counts get reused
24
+ across metrics, so hist_counts are cached but only if computed
25
+ globally, i.e. without masked samples.
26
+ """
27
+
28
+ hist_counts, hist_bins = {}, {}
29
+ # np.where needed to satisfy linter
30
+ mask = np.where(subset_mask if len(subset_mask) > 0 else np.ones(data.shape[0], dtype=bool))
31
+
32
+ for cdx, fn in enumerate(names):
33
+ # linter doesn't like double indexing
34
+ col_data = data[mask, cdx].squeeze()
35
+ if is_categorical[cdx]:
36
+ # if discrete, use unique values as bins
37
+ bins, cnts = np.unique(col_data, return_counts=True)
38
+ else:
39
+ bins = hist_bins.get(fn, "auto")
40
+ cnts, bins = np.histogram(col_data, bins=bins, density=True)
41
+
42
+ hist_counts[fn] = cnts
43
+ hist_bins[fn] = bins
44
+
45
+ return hist_counts, hist_bins
46
+
47
+
48
+ def _entropy(
49
+ data: np.ndarray,
50
+ names: list,
51
+ is_categorical: List,
52
+ normalized: bool = False,
53
+ subset_mask: np.ndarray = np.empty(shape=0),
54
+ ) -> np.ndarray:
55
+ """
56
+ Meant for use with Bias metrics, Balance, Diversity, ClasswiseBalance,
57
+ and Classwise Diversity.
58
+
59
+ Compute entropy for discrete/categorical variables and, through standard
60
+ histogram binning, for continuous variables.
61
+
62
+
63
+ Parameters
64
+ ----------
65
+ normalized: bool
66
+ Flag that determines whether or not to normalize entropy by log(num_bins)
67
+ subset_mask: Optional[np.ndarray[bool]]
68
+ Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
69
+
70
+
71
+ Notes
72
+ -----
73
+ For continuous variables, histogram bins are chosen automatically. See
74
+ numpy.histogram for details.
75
+
76
+ Returns
77
+ -------
78
+ ent: np.ndarray[float]
79
+ Entropy estimate per column of X
80
+
81
+ See Also
82
+ --------
83
+ numpy.histogram
84
+ scipy.stats.entropy
85
+ """
86
+
87
+ num_factors = len(names)
88
+ hist_counts, _ = _get_counts(data, names, is_categorical, subset_mask=subset_mask)
89
+
90
+ ev_index = np.empty(num_factors)
91
+ for col, cnts in enumerate(hist_counts.values()):
92
+ # entropy in nats, normalizes counts
93
+ ev_index[col] = entropy(cnts)
94
+ if normalized:
95
+ if len(cnts) == 1:
96
+ # log(0)
97
+ ev_index[col] = 0
98
+ else:
99
+ ev_index[col] /= np.log(len(cnts))
100
+ return ev_index
101
+
102
+
103
+ def _get_num_bins(
104
+ data: np.ndarray, names: list, is_categorical: List, subset_mask: np.ndarray = np.empty(shape=0)
105
+ ) -> np.ndarray:
106
+ """
107
+ Number of bins or unique values for each metadata factor, used to
108
+ normalize entropy/diversity.
109
+
110
+ Parameters
111
+ ----------
112
+ subset_mask: Optional[np.ndarray[bool]]
113
+ Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
114
+ """
115
+ # likely cached
116
+ hist_counts, _ = _get_counts(data, names, is_categorical, subset_mask)
117
+ num_bins = np.empty(len(hist_counts))
118
+ for idx, cnts in enumerate(hist_counts.values()):
119
+ num_bins[idx] = len(cnts)
120
+
121
+ return num_bins
122
+
123
+
124
+ def _infer_categorical(X: np.ndarray, threshold: float = 0.5) -> np.ndarray:
125
+ """
126
+ Compute fraction of feature values that are unique --- intended to be used
127
+ for inferring whether variables are categorical.
128
+ """
129
+ if X.ndim == 1:
130
+ X = np.expand_dims(X, axis=1)
131
+ num_samples = X.shape[0]
132
+ pct_unique = np.empty(X.shape[1])
133
+ for col in range(X.shape[1]): # type: ignore
134
+ uvals = np.unique(X[:, col], axis=0)
135
+ pct_unique[col] = len(uvals) / num_samples
136
+ return pct_unique < threshold
@@ -0,0 +1,190 @@
1
+ import warnings
2
+ from typing import Dict, Tuple
3
+
4
+ import numpy as np
5
+ import scipy
6
+
7
+
8
+ def validate_dict(d: Dict) -> None:
9
+ """
10
+ Verify that dict-of-arrays (proxy for dataframe) contains arrays of equal
11
+ length. Future iterations could include type checking, conversion from
12
+ string to numeric types, etc.
13
+
14
+ Parameters
15
+ ----------
16
+ d: Dict
17
+ dictionary of {variable_name: values}
18
+ """
19
+ # assert that length of all arrays are equal -- could expand to other properties
20
+ lengths = []
21
+ for arr in d.values():
22
+ lengths.append(arr.shape)
23
+
24
+ if lengths[1:] != lengths[:-1]:
25
+ raise ValueError("The lengths of each entry in the dictionary are not equal." f" Found lengths {lengths}")
26
+
27
+
28
+ def digitize_factor_bins(continuous_values: np.ndarray, bins: int, factor_name: str):
29
+ """
30
+ Digitizes a list of values into a given number of bins.
31
+
32
+ Parameters
33
+ ----------
34
+ continuous_values: np.ndarray
35
+ The values to be digitized.
36
+ bins: int
37
+ The number of bins for the discrete values that continuous_values will be digitized into.
38
+ factor_name: str
39
+ The name of the factor to be digitized.
40
+
41
+ Returns
42
+ -------
43
+ np.ndarray
44
+ The digitized values
45
+
46
+ """
47
+ if not np.all([np.issubdtype(type(n), np.number) for n in continuous_values]):
48
+ raise TypeError(
49
+ f"Encountered a non-numeric value for factor {factor_name}, but the factor"
50
+ " was specified to be continuous. Ensure all occurrences of this factor are numeric types,"
51
+ f" or do not specify {factor_name} as a continuous factor."
52
+ )
53
+
54
+ _, bin_edges = np.histogram(continuous_values, bins=bins)
55
+ bin_edges[-1] = np.inf
56
+ bin_edges[0] = -np.inf
57
+ return np.digitize(continuous_values, bin_edges)
58
+
59
+
60
+ def format_discretize_factors(
61
+ data_factors: dict[str, np.ndarray], continuous_factor_names: np.ndarray, continuous_factor_bincounts: np.ndarray
62
+ ) -> Tuple[dict, np.ndarray]:
63
+ """
64
+ Sets up the internal list of metadata factors.
65
+
66
+ Parameters
67
+ ----------
68
+ data_factors: Dict[str, np.ndarray]
69
+ The dataset factors, which are per-image attributes including class label and metadata.
70
+ Each key of dataset_factors is a factor, whose value is the per-image factor values.
71
+ continuous_factor_names : np.ndarray
72
+ The factors in data_factors that have continuous values.
73
+ All factors are treated as having discrete values unless they
74
+ are specified in this array. Each element of this array must occur as a key in data_factors.
75
+ continuous_factor_bincounts : np.ndarray
76
+ Array of the bin counts to discretize values into for each factor in continuous_factor_names.
77
+
78
+ Returns
79
+ -------
80
+ Dict[str, np.ndarray]
81
+ Intrinsic per-image metadata information with the formatting that input data_factors uses.
82
+ Each key is a metadata factor, whose value is the discrete per-image factor values.
83
+ np.ndarray
84
+ Per-image labels, whose ith element is the label for the ith element of the dataset.
85
+ """
86
+
87
+ if len(continuous_factor_bincounts) != len(continuous_factor_names):
88
+ raise ValueError(
89
+ f"continuous_factor_bincounts has length {len(continuous_factor_bincounts)}, "
90
+ f"but continuous_factor_names has length {len(continuous_factor_names)}. "
91
+ "Each element of continuous_factor_names must have a corresponding element "
92
+ "in continuous_factor_bincounts. Alternatively, leave continuous_factor_bincounts empty "
93
+ "to use a default digitization of 10 bins."
94
+ )
95
+
96
+ # TODO: add unit test for this
97
+ for key in continuous_factor_names:
98
+ if key not in data_factors:
99
+ raise KeyError(
100
+ f"The continuous factor name {key} "
101
+ f"does not exist in data_factors. Delete {key} from "
102
+ f"continuous_factor_names or add an entry with key {key} to "
103
+ "data_factors."
104
+ )
105
+
106
+ metadata_factors = {}
107
+
108
+ # make sure each factor has the same number of entries
109
+ validate_dict(data_factors)
110
+
111
+ labels = data_factors["class"]
112
+
113
+ # Each continuous factor is discretized into some number of bins.
114
+ # This matches the number of bins for a factor with the factor
115
+ num_bins = dict(zip(continuous_factor_names, continuous_factor_bincounts))
116
+
117
+ metadata_factors = {
118
+ name: val if name not in continuous_factor_names else digitize_factor_bins(val, num_bins[name], name)
119
+ for name, val in data_factors.items()
120
+ if name != "class"
121
+ }
122
+
123
+ return metadata_factors, labels
124
+
125
+
126
+ def compute_parity(factors: dict[str, np.ndarray], labels: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
127
+ """
128
+ Evaluates the statistical independence of metadata factors from class labels.
129
+ This performs a chi-square test, which provides a score and a p-value for
130
+ statistical independence between each pair of a metadata factor and a class label.
131
+ A high score with a low p-value suggests that a metadata factor is strongly
132
+ correlated with a class label.
133
+
134
+ Parameters
135
+ ----------
136
+ factors: Dict[str, np.ndarray]
137
+ Intrinsic per-image metadata information.
138
+ factors['key'][i] is the value of the metadata factor 'key' at the ith element of the dataset.
139
+ labels: np.ndarray
140
+ Dataset labels.
141
+ Labels[i] is the label for the ith element of the dataset.
142
+
143
+ Returns
144
+ -------
145
+ np.ndarray
146
+ Array of length (num_factors) whose (i)th element corresponds to
147
+ the chi-square score for the relationship between factor i
148
+ and the class labels in the dataset.
149
+ np.ndarray
150
+ Array of length (num_factors) whose (i)th element corresponds to
151
+ the p-value value for the chi-square test for the relationship between
152
+ factor i and the class labels in the dataset.
153
+ """
154
+
155
+ chi_scores = np.zeros(len(factors))
156
+ p_values = np.zeros(len(factors))
157
+ n_cls = len(np.unique(labels))
158
+ for i, (current_factor_name, factor_values) in enumerate(factors.items()):
159
+ unique_factor_values = np.unique(factor_values)
160
+ contingency_matrix = np.zeros((len(unique_factor_values), n_cls))
161
+ # Builds a contingency matrix where entry at index (r,c) represents
162
+ # the frequency of current_factor_name achieving value unique_factor_values[r]
163
+ # at a data point with class c.
164
+
165
+ # TODO: Vectorize this nested for loop
166
+ for fi, factor_value in enumerate(unique_factor_values):
167
+ for label in range(n_cls):
168
+ with_both = np.bitwise_and((labels == label), factor_values == factor_value)
169
+ contingency_matrix[fi, label] = np.sum(with_both)
170
+ if 0 < contingency_matrix[fi, label] < 5:
171
+ warnings.warn(
172
+ f"Factor {current_factor_name} value {factor_value} co-occurs "
173
+ f"only {contingency_matrix[fi, label]} times with label {label}. "
174
+ "This can cause inaccurate chi_square calculation. Recommend"
175
+ "ensuring each label occurs either 0 times or at least 5 times. "
176
+ "Alternatively, digitize any continuous-valued factors "
177
+ "into fewer bins."
178
+ )
179
+
180
+ # This deletes rows containing only zeros,
181
+ # because scipy.stats.chi2_contingency fails when there are rows containing only zeros.
182
+ rowsums = np.sum(contingency_matrix, axis=1)
183
+ rowmask = np.where(rowsums)
184
+ contingency_matrix = contingency_matrix[rowmask]
185
+
186
+ chi2, p, _, _ = scipy.stats.chi2_contingency(contingency_matrix)
187
+
188
+ chi_scores[i] = chi2
189
+ p_values[i] = p
190
+ return chi_scores, p_values
@@ -0,0 +1,6 @@
1
+ import numpy as np
2
+ from sklearn.metrics import average_precision_score
3
+
4
+
5
+ def uap(labels: np.ndarray, scores: np.ndarray):
6
+ return float(average_precision_score(labels, scores, average="weighted"))
@@ -0,0 +1,52 @@
1
+ from importlib import import_module
2
+ from typing import Any, Iterable, Optional, runtime_checkable
3
+
4
+ import numpy as np
5
+
6
+ module_cache = {}
7
+
8
+
9
+ def try_import(module_name):
10
+ if module_name in module_cache:
11
+ return module_cache[module_name]
12
+
13
+ try:
14
+ module = import_module(module_name)
15
+ except ImportError: # pragma: no cover - covered by test_mindeps.py
16
+ module = None
17
+
18
+ module_cache[module_name] = module
19
+ return module
20
+
21
+
22
+ try:
23
+ from maite.protocols import ArrayLike # type: ignore
24
+ except ImportError: # pragma: no cover - covered by test_mindeps.py
25
+ from typing import Protocol
26
+
27
+ @runtime_checkable
28
+ class ArrayLike(Protocol):
29
+ def __array__(self) -> Any: ...
30
+
31
+
32
+ def to_numpy(array: Optional[ArrayLike]) -> np.ndarray:
33
+ if array is None:
34
+ return np.ndarray([])
35
+
36
+ if isinstance(array, np.ndarray):
37
+ return array
38
+
39
+ tf = try_import("tensorflow")
40
+ if tf and tf.is_tensor(array):
41
+ return array.numpy() # type: ignore
42
+
43
+ torch = try_import("torch")
44
+ if torch and isinstance(array, torch.Tensor):
45
+ return array.detach().cpu().numpy() # type: ignore
46
+
47
+ return np.asarray(array)
48
+
49
+
50
+ def to_numpy_iter(iterable: Iterable[ArrayLike]):
51
+ for array in iterable:
52
+ yield to_numpy(array)
File without changes
@@ -0,0 +1,30 @@
1
+ from typing import Tuple
2
+
3
+ import numpy as np
4
+ import torch
5
+
6
+ import maite.protocols.image_classification as ic
7
+ from maite.protocols import ArrayLike
8
+
9
+
10
+ def arraylike_to_numpy(xp: ArrayLike) -> np.ndarray:
11
+ """Converts ArrayLike objects to numpy"""
12
+
13
+ # Must ensure Tensors are not on GPU
14
+ return xp.detach().cpu().numpy() if isinstance(xp, torch.Tensor) else np.asarray(xp)
15
+
16
+
17
+ # TODO: Overload with od.Dataset
18
+ # TODO: Check if batching aggregation is faster (e.g. DataLoader)
19
+ # TODO: Add verbosity flags (tqdm?)
20
+ def extract_to_numpy(dataset: ic.Dataset) -> Tuple[np.ndarray, np.ndarray]:
21
+ """Iterate over dataset and separate images from labels"""
22
+ images = []
23
+ labels = []
24
+
25
+ # (image, label, metadata)
26
+ for image, label, _ in dataset:
27
+ images.append(image)
28
+ labels.append(label)
29
+
30
+ return np.asarray(images), np.asarray(labels)
@@ -8,7 +8,7 @@ TCallable = TypeVar("TCallable", bound=Callable)
8
8
 
9
9
  class MetricMixin(ABC, Generic[TOutput]):
10
10
  @abstractmethod
11
- def update(self, preds, targets): ...
11
+ def update(self, *args, **kwargs): ...
12
12
 
13
13
  @abstractmethod
14
14
  def compute(self) -> TOutput: ...
@@ -19,7 +19,7 @@ class MetricMixin(ABC, Generic[TOutput]):
19
19
 
20
20
  class EvaluateMixin(ABC, Generic[TOutput]):
21
21
  @abstractmethod
22
- def evaluate(self) -> TOutput:
22
+ def evaluate(self, *args, **kwargs) -> TOutput:
23
23
  """Abstract method to calculate metric based off of constructor parameters"""
24
24
 
25
25
 
@@ -10,59 +10,11 @@ https://arxiv.org/abs/1811.06419
10
10
  from typing import Callable, Dict, Literal, Tuple
11
11
 
12
12
  import numpy as np
13
- from maite.protocols import ArrayLike
14
- from scipy.sparse import coo_matrix
15
- from scipy.stats import mode
16
13
 
14
+ from dataeval._internal.functional.ber import ber_knn, ber_mst
15
+ from dataeval._internal.interop import ArrayLike, to_numpy
17
16
  from dataeval._internal.metrics.base import EvaluateMixin, MethodsMixin
18
17
 
19
- from .utils import compute_neighbors, get_classes_counts, minimum_spanning_tree
20
-
21
-
22
- def _mst(X: np.ndarray, y: np.ndarray, _: int) -> Tuple[float, float]:
23
- M, N = get_classes_counts(y)
24
-
25
- tree = coo_matrix(minimum_spanning_tree(X))
26
- matches = np.sum([y[tree.row[i]] != y[tree.col[i]] for i in range(N - 1)])
27
- deltas = matches / (2 * N)
28
- upper = 2 * deltas
29
- lower = ((M - 1) / (M)) * (1 - max(1 - 2 * ((M) / (M - 1)) * deltas, 0) ** 0.5)
30
- return upper, lower
31
-
32
-
33
- def _knn(X: np.ndarray, y: np.ndarray, k: int) -> Tuple[float, float]:
34
- M, N = get_classes_counts(y)
35
-
36
- # All features belong on second dimension
37
- X = X.reshape((X.shape[0], -1))
38
- nn_indices = compute_neighbors(X, X, k=k)
39
- nn_indices = np.expand_dims(nn_indices, axis=1) if nn_indices.ndim == 1 else nn_indices
40
- modal_class = mode(y[nn_indices], axis=1, keepdims=True).mode.squeeze()
41
- upper = float(np.count_nonzero(modal_class - y) / N)
42
- lower = _knn_lowerbound(upper, M, k)
43
- return upper, lower
44
-
45
-
46
- def _knn_lowerbound(value: float, classes: int, k: int) -> float:
47
- "Several cases for computing the BER lower bound"
48
- if value <= 1e-10:
49
- return 0.0
50
-
51
- if classes == 2 and k != 1:
52
- if k > 5:
53
- # Property 2 (Devroye, 1981) cited in Snoopy paper, not in snoopy repo
54
- alpha = 0.3399
55
- beta = 0.9749
56
- a_k = alpha * np.sqrt(k) / (k - 3.25) * (1 + beta / (np.sqrt(k - 3)))
57
- return value / (1 + a_k)
58
- if k > 2:
59
- return value / (1 + (1 / np.sqrt(k)))
60
- # k == 2:
61
- return value / 2
62
-
63
- return ((classes - 1) / classes) * (1 - np.sqrt(max(0, 1 - ((classes / (classes - 1)) * value))))
64
-
65
-
66
18
  _METHODS = Literal["MST", "KNN"]
67
19
  _FUNCTION = Callable[[np.ndarray, np.ndarray, int], Tuple[float, float]]
68
20
 
@@ -73,10 +25,6 @@ class BER(EvaluateMixin, MethodsMixin[_METHODS, _FUNCTION]):
73
25
 
74
26
  Parameters
75
27
  ----------
76
- data : np.ndarray
77
- Array of images or image embeddings
78
- labels : np.ndarray
79
- Array of labels for each image or image embedding
80
28
  method : Literal["MST", "KNN"], default "KNN"
81
29
  Method to use when estimating the Bayes error rate
82
30
  k : int, default 1
@@ -89,22 +37,25 @@ class BER(EvaluateMixin, MethodsMixin[_METHODS, _FUNCTION]):
89
37
 
90
38
  """
91
39
 
92
- def __init__(self, data: ArrayLike, labels: ArrayLike, method: _METHODS = "KNN", k: int = 1) -> None:
93
- self.data = data
94
- self.labels = labels
95
- self.k = k
40
+ def __init__(self, method: _METHODS = "KNN", k: int = 1) -> None:
41
+ self.k: int = k
96
42
  self._set_method(method)
97
43
 
98
44
  @classmethod
99
- def _methods(
100
- cls,
101
- ) -> Dict[str, _FUNCTION]:
102
- return {"MST": _mst, "KNN": _knn}
45
+ def _methods(cls) -> Dict[str, _FUNCTION]:
46
+ return {"KNN": ber_knn, "MST": ber_mst}
103
47
 
104
- def evaluate(self) -> Dict[str, float]:
48
+ def evaluate(self, images: ArrayLike, labels: ArrayLike) -> Dict[str, float]:
105
49
  """
106
50
  Calculates the Bayes Error Rate estimate using the provided method
107
51
 
52
+ Parameters
53
+ ----------
54
+ images : ArrayLike (N, : )
55
+ Array of images or image embeddings
56
+ labels : ArrayLike (N, 1)
57
+ Array of labels for each image or image embedding
58
+
108
59
  Returns
109
60
  -------
110
61
  Dict[str, float]
@@ -118,7 +69,6 @@ class BER(EvaluateMixin, MethodsMixin[_METHODS, _FUNCTION]):
118
69
  ValueError
119
70
  If unique classes M < 2
120
71
  """
121
- data = np.asarray(self.data)
122
- labels = np.asarray(self.labels)
123
- upper, lower = self._method(data, labels, self.k)
72
+
73
+ upper, lower = self._method(to_numpy(images), to_numpy(labels), self.k)
124
74
  return {"ber": upper, "ber_lower": lower}
@@ -1,55 +1,71 @@
1
- import math
2
1
  from typing import Literal, Tuple
3
2
 
4
3
  import numpy as np
5
- from scipy.spatial.distance import pdist, squareform
6
4
 
5
+ from dataeval._internal.functional.coverage import coverage
6
+ from dataeval._internal.interop import ArrayLike, to_numpy
7
+ from dataeval._internal.metrics.base import EvaluateMixin
7
8
 
8
- class Coverage:
9
+
10
+ class Coverage(EvaluateMixin):
9
11
  """
10
12
  Class for evaluating coverage and identifying images/samples that are in undercovered regions.
11
13
 
12
- This implementation is based on https://dl.acm.org/doi/abs/10.1145/3448016.3457315.
13
-
14
14
  Parameters
15
15
  ----------
16
- embeddings : np.ndarray
17
- n x p array of image embeddings from the dataset.
18
16
  radius_type : Literal["adaptive", "naive"], default "adaptive"
19
17
  The function used to determine radius.
20
18
  k: int, default 20
21
19
  Number of observations required in order to be covered.
20
+ [1] suggests that a minimum of 20-50 samples is necessary.
22
21
  percent: np.float64, default np.float(0.01)
23
22
  Percent of observations to be considered uncovered. Only applies to adaptive radius.
24
23
 
25
- Note
26
- ----
27
- Embeddings should be on the unit interval.
24
+ Reference
25
+ ---------
26
+ This implementation is based on https://dl.acm.org/doi/abs/10.1145/3448016.3457315.
27
+ [1] Seymour Sudman. 1976. Applied sampling. Academic Press New York (1976).
28
+
29
+ Examples
30
+ --------
31
+ Initialize the Coverage class:
32
+
33
+ >>> cover = Coverage()
34
+
35
+ Adjusting parameters:
36
+
37
+ >>> cover = Coverage(k=5, percent=0.1)
28
38
  """
29
39
 
30
40
  def __init__(
31
41
  self,
32
- embeddings: np.ndarray,
33
42
  radius_type: Literal["adaptive", "naive"] = "adaptive",
34
43
  k: int = 20,
35
44
  percent: np.float64 = np.float64(0.01),
36
45
  ):
37
- self.embeddings = embeddings
38
- self.radius_type = radius_type
39
- self.k = k
40
- self.percent = percent
46
+ self.radius_type: Literal["adaptive", "naive"] = radius_type
47
+ self.k: int = k
48
+ self.percent: np.float64 = percent
41
49
 
42
- def evaluate(self) -> Tuple[np.ndarray, np.ndarray]:
50
+ def evaluate(self, embeddings: ArrayLike) -> Tuple[np.ndarray, np.ndarray, float]:
43
51
  """
44
52
  Perform a one-way chi-squared test between observation frequencies and expected frequencies that
45
53
  tests the null hypothesis that the observed data has the expected frequencies.
46
54
 
55
+ Parameters
56
+ ----------
57
+ embeddings : ArrayLike, shape - (N, P)
58
+ A dataset in an ArrayLike format.
59
+ Function expects the data to have 2 dimensions, N number of observations in a P-dimesionial space.
60
+
47
61
  Returns
48
62
  -------
49
63
  np.ndarray
50
64
  Array of uncovered indices
51
65
  np.ndarray
52
66
  Array of critical value radii
67
+ float
68
+ Radius for coverage
53
69
 
54
70
  Raises
55
71
  ------
@@ -57,24 +73,24 @@ class Coverage:
57
73
  If length of embeddings is less than or equal to k
58
74
  ValueError
59
75
  If radius_type is unknown
76
+
77
+ Note
78
+ ----
79
+ Embeddings should be on the unit interval.
80
+
81
+ Example
82
+ -------
83
+ >>> cover.evaluate(embeddings)
84
+ (array([31, 7, 22, 37, 11]), array([0.35938604, 0.26462789, 0.20319609, 0.34140912, 0.31069921,
85
+ 0.2308378 , 0.33300179, 0.69881025, 0.53587532, 0.35689803,
86
+ 0.39333634, 0.67497874, 0.21788128, 0.43510162, 0.38601861,
87
+ 0.34171868, 0.16941337, 0.66438044, 0.20319609, 0.19732733,
88
+ 0.48660288, 0.5135814 , 0.69352653, 0.26946943, 0.31120605,
89
+ 0.33067705, 0.30508271, 0.32802489, 0.51805702, 0.31120605,
90
+ 0.40843265, 0.74996768, 0.31069921, 0.52263763, 0.26654013,
91
+ 0.33113507, 0.40814838, 0.67723008, 0.48124375, 0.37243185,
92
+ 0.29760001, 0.30907904, 0.59023236, 0.57778087, 0.21839853,
93
+ 0.46067782, 0.31078966, 0.65199049, 0.26410603, 0.19542706]))
60
94
  """
61
95
 
62
- # Calculate distance matrix, look at the (k+1)th farthest neighbor for each image.
63
- n = len(self.embeddings)
64
- if n <= self.k:
65
- raise ValueError("Number of observations less than or equal to the specified number of neighbors.")
66
- mat = squareform(pdist(self.embeddings))
67
- sorted_dists = np.sort(mat, axis=1)
68
- crit = sorted_dists[:, self.k + 1]
69
-
70
- d = np.shape(self.embeddings)[1]
71
- if self.radius_type == "naive":
72
- self.rho = (1 / math.sqrt(math.pi)) * ((2 * self.k * math.gamma(d / 2 + 1)) / (n)) ** (1 / d)
73
- pvals = np.where(crit > self.rho)[0]
74
- elif self.radius_type == "adaptive":
75
- # Use data adaptive cutoff
76
- cutoff = int(n * self.percent)
77
- pvals = np.argsort(crit)[::-1][:cutoff]
78
- else:
79
- raise ValueError("Invalid radius type.")
80
- return pvals, crit
96
+ return coverage(to_numpy(embeddings), self.radius_type, self.k, self.percent)