dataeval 0.63.0__py3-none-any.whl → 0.64.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. dataeval/__init__.py +3 -3
  2. dataeval/_internal/detectors/clusterer.py +2 -1
  3. dataeval/_internal/detectors/drift/base.py +2 -1
  4. dataeval/_internal/detectors/drift/cvm.py +2 -1
  5. dataeval/_internal/detectors/drift/ks.py +2 -1
  6. dataeval/_internal/detectors/drift/mmd.py +4 -3
  7. dataeval/_internal/detectors/drift/uncertainty.py +1 -2
  8. dataeval/_internal/detectors/duplicates.py +2 -1
  9. dataeval/_internal/detectors/linter.py +1 -1
  10. dataeval/_internal/detectors/ood/ae.py +2 -1
  11. dataeval/_internal/detectors/ood/aegmm.py +2 -1
  12. dataeval/_internal/detectors/ood/base.py +2 -1
  13. dataeval/_internal/detectors/ood/llr.py +3 -2
  14. dataeval/_internal/detectors/ood/vae.py +2 -1
  15. dataeval/_internal/detectors/ood/vaegmm.py +2 -1
  16. dataeval/_internal/interop.py +2 -11
  17. dataeval/_internal/metrics/balance.py +180 -0
  18. dataeval/_internal/metrics/base.py +1 -83
  19. dataeval/_internal/metrics/ber.py +122 -48
  20. dataeval/_internal/metrics/coverage.py +83 -74
  21. dataeval/_internal/metrics/divergence.py +67 -67
  22. dataeval/_internal/metrics/diversity.py +206 -0
  23. dataeval/_internal/metrics/parity.py +300 -155
  24. dataeval/_internal/metrics/stats.py +7 -5
  25. dataeval/_internal/metrics/uap.py +37 -29
  26. dataeval/_internal/metrics/utils.py +393 -0
  27. dataeval/_internal/utils.py +64 -0
  28. dataeval/metrics/__init__.py +25 -6
  29. dataeval/utils/__init__.py +9 -0
  30. {dataeval-0.63.0.dist-info → dataeval-0.64.0.dist-info}/METADATA +1 -1
  31. dataeval-0.64.0.dist-info/RECORD +60 -0
  32. dataeval/_internal/functional/__init__.py +0 -0
  33. dataeval/_internal/functional/ber.py +0 -63
  34. dataeval/_internal/functional/coverage.py +0 -75
  35. dataeval/_internal/functional/divergence.py +0 -16
  36. dataeval/_internal/functional/hash.py +0 -79
  37. dataeval/_internal/functional/metadata.py +0 -136
  38. dataeval/_internal/functional/metadataparity.py +0 -190
  39. dataeval/_internal/functional/uap.py +0 -6
  40. dataeval/_internal/functional/utils.py +0 -158
  41. dataeval/_internal/maite/__init__.py +0 -0
  42. dataeval/_internal/maite/utils.py +0 -30
  43. dataeval/_internal/metrics/metadata.py +0 -610
  44. dataeval/_internal/metrics/metadataparity.py +0 -67
  45. dataeval-0.63.0.dist-info/RECORD +0 -68
  46. {dataeval-0.63.0.dist-info → dataeval-0.64.0.dist-info}/LICENSE.txt +0 -0
  47. {dataeval-0.63.0.dist-info → dataeval-0.64.0.dist-info}/WHEEL +0 -0
@@ -7,68 +7,142 @@ Learning to Bound the Multi-class Bayes Error (Th. 3 and Th. 4)
7
7
  https://arxiv.org/abs/1811.06419
8
8
  """
9
9
 
10
- from typing import Callable, Dict, Literal, Tuple
10
+ from typing import Literal, NamedTuple, Tuple
11
11
 
12
12
  import numpy as np
13
+ from numpy.typing import ArrayLike, NDArray
14
+ from scipy.sparse import coo_matrix
15
+ from scipy.stats import mode
13
16
 
14
- from dataeval._internal.functional.ber import ber_knn, ber_mst
15
- from dataeval._internal.interop import ArrayLike, to_numpy
16
- from dataeval._internal.metrics.base import EvaluateMixin, MethodsMixin
17
+ from dataeval._internal.interop import to_numpy
18
+ from dataeval._internal.metrics.utils import compute_neighbors, get_classes_counts, get_method, minimum_spanning_tree
17
19
 
18
- _METHODS = Literal["MST", "KNN"]
19
- _FUNCTION = Callable[[np.ndarray, np.ndarray, int], Tuple[float, float]]
20
+
21
+ class BEROutput(NamedTuple):
22
+ """
23
+ Attributes
24
+ ----------
25
+ ber : float
26
+ The upper bounds of the Bayes Error Rate
27
+ ber_lower : float
28
+ The lower bounds of the Bayes Error Rate
29
+ """
30
+
31
+ ber: float
32
+ ber_lower: float
33
+
34
+
35
+ def ber_mst(X: NDArray, y: NDArray) -> Tuple[float, float]:
36
+ """Calculates the Bayes Error Rate using a minimum spanning tree
37
+
38
+ Parameters
39
+ ----------
40
+ X : NDArray, shape - (N, ... )
41
+ n_samples containing n_features
42
+ y : NDArray, shape - (N, 1)
43
+ Labels corresponding to each sample
44
+
45
+ Returns
46
+ -------
47
+ Tuple[float, float]
48
+ The upper and lower bounds of the bayes error rate
49
+ """
50
+ M, N = get_classes_counts(y)
51
+
52
+ tree = coo_matrix(minimum_spanning_tree(X))
53
+ matches = np.sum([y[tree.row[i]] != y[tree.col[i]] for i in range(N - 1)])
54
+ deltas = matches / (2 * N)
55
+ upper = 2 * deltas
56
+ lower = ((M - 1) / (M)) * (1 - max(1 - 2 * ((M) / (M - 1)) * deltas, 0) ** 0.5)
57
+ return upper, lower
20
58
 
21
59
 
22
- class BER(EvaluateMixin, MethodsMixin[_METHODS, _FUNCTION]):
60
+ def ber_knn(X: NDArray, y: NDArray, k: int) -> Tuple[float, float]:
61
+ """Calculates the Bayes Error Rate using K-nearest neighbors
62
+
63
+ Parameters
64
+ ----------
65
+ X : NDArray, shape - (N, ... )
66
+ n_samples containing n_features
67
+ y : NDArray, shape - (N, 1)
68
+ Labels corresponding to each sample
69
+
70
+ Returns
71
+ -------
72
+ Tuple[float, float]
73
+ The upper and lower bounds of the bayes error rate
74
+ """
75
+ M, N = get_classes_counts(y)
76
+
77
+ # All features belong on second dimension
78
+ X = X.reshape((X.shape[0], -1))
79
+ nn_indices = compute_neighbors(X, X, k=k)
80
+ nn_indices = np.expand_dims(nn_indices, axis=1) if nn_indices.ndim == 1 else nn_indices
81
+ modal_class = mode(y[nn_indices], axis=1, keepdims=True).mode.squeeze()
82
+ upper = float(np.count_nonzero(modal_class - y) / N)
83
+ lower = knn_lowerbound(upper, M, k)
84
+ return upper, lower
85
+
86
+
87
+ def knn_lowerbound(value: float, classes: int, k: int) -> float:
88
+ """Several cases for computing the BER lower bound"""
89
+ if value <= 1e-10:
90
+ return 0.0
91
+
92
+ if classes == 2 and k != 1:
93
+ if k > 5:
94
+ # Property 2 (Devroye, 1981) cited in Snoopy paper, not in snoopy repo
95
+ alpha = 0.3399
96
+ beta = 0.9749
97
+ a_k = alpha * np.sqrt(k) / (k - 3.25) * (1 + beta / (np.sqrt(k - 3)))
98
+ return value / (1 + a_k)
99
+ if k > 2:
100
+ return value / (1 + (1 / np.sqrt(k)))
101
+ # k == 2:
102
+ return value / 2
103
+
104
+ return ((classes - 1) / classes) * (1 - np.sqrt(max(0, 1 - ((classes / (classes - 1)) * value))))
105
+
106
+
107
+ BER_FN_MAP = {"KNN": ber_knn, "MST": ber_mst}
108
+
109
+
110
+ def ber(images: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN", "MST"] = "KNN") -> BEROutput:
23
111
  """
24
112
  An estimator for Multi-class Bayes Error Rate using FR or KNN test statistic basis
25
113
 
26
114
  Parameters
27
115
  ----------
28
- method : Literal["MST", "KNN"], default "KNN"
29
- Method to use when estimating the Bayes error rate
116
+ images : ArrayLike (N, ... )
117
+ Array of images or image embeddings
118
+ labels : ArrayLike (N, 1)
119
+ Array of labels for each image or image embedding
30
120
  k : int, default 1
31
- number of nearest neighbors for KNN estimator -- ignored by MST estimator
121
+ Number of nearest neighbors for KNN estimator -- ignored by MST estimator
122
+ method : Literal["KNN", "MST"], default "KNN"
123
+ Method to use when estimating the Bayes error rate
32
124
 
125
+ Returns
126
+ -------
127
+ BEROutput
128
+ The upper and lower bounds of the Bayes Error Rate
33
129
 
34
- See Also
130
+ References
131
+ ----------
132
+ [1] `Learning to Bound the Multi-class Bayes Error (Th. 3 and Th. 4) <https://arxiv.org/abs/1811.06419>`_
133
+
134
+ Examples
35
135
  --------
36
- `Learning to Bound the Multi-class Bayes Error (Th. 3 and Th. 4) <https://arxiv.org/abs/1811.06419>`_
136
+ >>> import sklearn.datasets as dsets
137
+ >>> from dataeval.metrics import ber
37
138
 
38
- """
139
+ >>> images, labels = dsets.make_blobs(n_samples=50, centers=2, n_features=2, random_state=0)
39
140
 
40
- def __init__(self, method: _METHODS = "KNN", k: int = 1) -> None:
41
- self.k: int = k
42
- self._set_method(method)
43
-
44
- @classmethod
45
- def _methods(cls) -> Dict[str, _FUNCTION]:
46
- return {"KNN": ber_knn, "MST": ber_mst}
47
-
48
- def evaluate(self, images: ArrayLike, labels: ArrayLike) -> Dict[str, float]:
49
- """
50
- Calculates the Bayes Error Rate estimate using the provided method
51
-
52
- Parameters
53
- ----------
54
- images : ArrayLike (N, : )
55
- Array of images or image embeddings
56
- labels : ArrayLike (N, 1)
57
- Array of labels for each image or image embedding
58
-
59
- Returns
60
- -------
61
- Dict[str, float]
62
- ber : float
63
- The estimated lower bounds of the Bayes Error Rate
64
- ber_lower : float
65
- The estimated upper bounds of the Bayes Error Rate
66
-
67
- Raises
68
- ------
69
- ValueError
70
- If unique classes M < 2
71
- """
72
-
73
- upper, lower = self._method(to_numpy(images), to_numpy(labels), self.k)
74
- return {"ber": upper, "ber_lower": lower}
141
+ >>> ber(images, labels)
142
+ BEROutput(ber=0.04, ber_lower=0.020416847668728033)
143
+ """
144
+ ber_fn = get_method(BER_FN_MAP, method)
145
+ X = to_numpy(images)
146
+ y = to_numpy(labels)
147
+ upper, lower = ber_fn(X, y, k) if method == "KNN" else ber_fn(X, y)
148
+ return BEROutput(upper, lower)
@@ -1,18 +1,44 @@
1
- from typing import Literal, Tuple
1
+ import math
2
+ from typing import Literal, NamedTuple
2
3
 
3
4
  import numpy as np
5
+ from numpy.typing import ArrayLike, NDArray
6
+ from scipy.spatial.distance import pdist, squareform
4
7
 
5
- from dataeval._internal.functional.coverage import coverage
6
- from dataeval._internal.interop import ArrayLike, to_numpy
7
- from dataeval._internal.metrics.base import EvaluateMixin
8
+ from dataeval._internal.interop import to_numpy
8
9
 
9
10
 
10
- class Coverage(EvaluateMixin):
11
+ class CoverageOutput(NamedTuple):
12
+ """
13
+ Attributes
14
+ ----------
15
+ indices : np.ndarray
16
+ Array of uncovered indices
17
+ radii : np.ndarray
18
+ Array of critical value radii
19
+ critical_value : float
20
+ Radius for coverage
21
+ """
22
+
23
+ indices: NDArray[np.intp]
24
+ radii: NDArray[np.float64]
25
+ critical_value: float
26
+
27
+
28
+ def coverage(
29
+ embeddings: ArrayLike,
30
+ radius_type: Literal["adaptive", "naive"] = "adaptive",
31
+ k: int = 20,
32
+ percent: np.float64 = np.float64(0.01),
33
+ ) -> CoverageOutput:
11
34
  """
12
35
  Class for evaluating coverage and identifying images/samples that are in undercovered regions.
13
36
 
14
37
  Parameters
15
38
  ----------
39
+ embeddings : ArrayLike, shape - (N, P)
40
+ A dataset in an ArrayLike format.
41
+ Function expects the data to have 2 dimensions, N number of observations in a P-dimesionial space.
16
42
  radius_type : Literal["adaptive", "naive"], default "adaptive"
17
43
  The function used to determine radius.
18
44
  k: int, default 20
@@ -21,76 +47,59 @@ class Coverage(EvaluateMixin):
21
47
  percent: np.float64, default np.float(0.01)
22
48
  Percent of observations to be considered uncovered. Only applies to adaptive radius.
23
49
 
50
+ Returns
51
+ -------
52
+ CoverageOutput
53
+ Array of uncovered indices, critical value radii, and the radius for coverage
54
+
55
+ Raises
56
+ ------
57
+ ValueError
58
+ If length of embeddings is less than or equal to k
59
+ ValueError
60
+ If radius_type is unknown
61
+
62
+ Note
63
+ ----
64
+ Embeddings should be on the unit interval.
65
+
66
+ Example
67
+ -------
68
+ >>> coverage(embeddings)
69
+ CoverageOutput(indices=array([], dtype=int64), radii=array([0.59307666, 0.56956307, 0.56328616, 0.70660265, 0.57778087,
70
+ 0.53738624, 0.58968217, 1.27721334, 0.84378694, 0.67767021,
71
+ 0.69680335, 1.35532621, 0.59764166, 0.8691945 , 0.83627602,
72
+ 0.84187303, 0.62212358, 1.09039732, 0.67956797, 0.60134383,
73
+ 0.83713908, 0.91784263, 1.12901193, 0.73907618, 0.63943983,
74
+ 0.61188447, 0.47872713, 0.57207771, 0.92885883, 0.54750511,
75
+ 0.83015726, 1.20721778, 0.50421928, 0.98312246, 0.59764166,
76
+ 0.61009202, 0.73864073, 1.0381061 , 0.77598609, 0.72984036,
77
+ 0.67573006, 0.48056064, 1.00050879, 0.89532971, 0.58395529,
78
+ 0.95954793, 0.60134383, 1.10096454, 0.51955314, 0.73038702]), critical_value=0)
79
+
24
80
  Reference
25
81
  ---------
26
82
  This implementation is based on https://dl.acm.org/doi/abs/10.1145/3448016.3457315.
27
83
  [1] Seymour Sudman. 1976. Applied sampling. Academic Press New York (1976).
28
-
29
- Examples
30
- --------
31
- Initialize the Coverage class:
32
-
33
- >>> cover = Coverage()
34
-
35
- Adjusting parameters:
36
-
37
- >>> cover = Coverage(k=5, percent=0.1)
38
- """
39
-
40
- def __init__(
41
- self,
42
- radius_type: Literal["adaptive", "naive"] = "adaptive",
43
- k: int = 20,
44
- percent: np.float64 = np.float64(0.01),
45
- ):
46
- self.radius_type: Literal["adaptive", "naive"] = radius_type
47
- self.k: int = k
48
- self.percent: np.float64 = percent
49
-
50
- def evaluate(self, embeddings: ArrayLike) -> Tuple[np.ndarray, np.ndarray, float]:
51
- """
52
- Perform a one-way chi-squared test between observation frequencies and expected frequencies that
53
- tests the null hypothesis that the observed data has the expected frequencies.
54
-
55
- Parameters
56
- ----------
57
- embeddings : ArrayLike, shape - (N, P)
58
- A dataset in an ArrayLike format.
59
- Function expects the data to have 2 dimensions, N number of observations in a P-dimesionial space.
60
-
61
- Returns
62
- -------
63
- np.ndarray
64
- Array of uncovered indices
65
- np.ndarray
66
- Array of critical value radii
67
- float
68
- Radius for coverage
69
-
70
- Raises
71
- ------
72
- ValueError
73
- If length of embeddings is less than or equal to k
74
- ValueError
75
- If radius_type is unknown
76
-
77
- Note
78
- ----
79
- Embeddings should be on the unit interval.
80
-
81
- Example
82
- -------
83
- >>> cover.evaluate(embeddings)
84
- (array([31, 7, 22, 37, 11]), array([0.35938604, 0.26462789, 0.20319609, 0.34140912, 0.31069921,
85
- 0.2308378 , 0.33300179, 0.69881025, 0.53587532, 0.35689803,
86
- 0.39333634, 0.67497874, 0.21788128, 0.43510162, 0.38601861,
87
- 0.34171868, 0.16941337, 0.66438044, 0.20319609, 0.19732733,
88
- 0.48660288, 0.5135814 , 0.69352653, 0.26946943, 0.31120605,
89
- 0.33067705, 0.30508271, 0.32802489, 0.51805702, 0.31120605,
90
- 0.40843265, 0.74996768, 0.31069921, 0.52263763, 0.26654013,
91
- 0.33113507, 0.40814838, 0.67723008, 0.48124375, 0.37243185,
92
- 0.29760001, 0.30907904, 0.59023236, 0.57778087, 0.21839853,
93
- 0.46067782, 0.31078966, 0.65199049, 0.26410603, 0.19542706]))
94
- """
95
-
96
- return coverage(to_numpy(embeddings), self.radius_type, self.k, self.percent)
84
+ """ # noqa: E501
85
+
86
+ # Calculate distance matrix, look at the (k+1)th farthest neighbor for each image.
87
+ embeddings = to_numpy(embeddings)
88
+ n = len(embeddings)
89
+ if n <= k:
90
+ raise ValueError("Number of observations less than or equal to the specified number of neighbors.")
91
+ mat = squareform(pdist(embeddings)).astype(np.float64)
92
+ sorted_dists = np.sort(mat, axis=1)
93
+ crit = sorted_dists[:, k + 1]
94
+
95
+ d = np.shape(embeddings)[1]
96
+ if radius_type == "naive":
97
+ rho = (1 / math.sqrt(math.pi)) * ((2 * k * math.gamma(d / 2 + 1)) / (n)) ** (1 / d)
98
+ pvals = np.where(crit > rho)[0]
99
+ elif radius_type == "adaptive":
100
+ # Use data adaptive cutoff as rho
101
+ rho = int(n * percent)
102
+ pvals = np.argsort(crit)[::-1][:rho]
103
+ else:
104
+ raise ValueError("Invalid radius type.")
105
+ return CoverageOutput(pvals, crit, rho)
@@ -3,27 +3,70 @@ This module contains the implementation of HP Divergence
3
3
  using the Fast Nearest Neighbor and Minimum Spanning Tree algorithms
4
4
  """
5
5
 
6
- from typing import Any, Callable, Dict, Literal
6
+ from typing import Literal, NamedTuple
7
7
 
8
8
  import numpy as np
9
+ from numpy.typing import ArrayLike
9
10
 
10
- from dataeval._internal.functional.divergence import divergence_fnn, divergence_mst
11
- from dataeval._internal.interop import ArrayLike, to_numpy
12
- from dataeval._internal.metrics.base import EvaluateMixin, MethodsMixin
11
+ from dataeval._internal.interop import to_numpy
12
+ from dataeval._internal.metrics.utils import compute_neighbors, get_method, minimum_spanning_tree
13
13
 
14
- _METHODS = Literal["MST", "FNN"]
15
- _FUNCTION = Callable[[np.ndarray, np.ndarray], int]
14
+
15
+ class DivergenceOutput(NamedTuple):
16
+ """
17
+ Attributes
18
+ ----------
19
+ divergence : float
20
+ Divergence value calculated between 2 datasets ranging between 0.0 and 1.0
21
+ errors : int
22
+ The number of differing edges between the datasets
23
+ """
24
+
25
+ divergence: float
26
+ errors: int
27
+
28
+
29
+ def divergence_mst(data: np.ndarray, labels: np.ndarray) -> int:
30
+ mst = minimum_spanning_tree(data).toarray()
31
+ edgelist = np.transpose(np.nonzero(mst))
32
+ errors = np.sum(labels[edgelist[:, 0]] != labels[edgelist[:, 1]])
33
+ return errors
34
+
35
+
36
+ def divergence_fnn(data: np.ndarray, labels: np.ndarray) -> int:
37
+ nn_indices = compute_neighbors(data, data)
38
+ errors = np.sum(np.abs(labels[nn_indices] - labels))
39
+ return errors
16
40
 
17
41
 
18
- class Divergence(EvaluateMixin, MethodsMixin[_METHODS, _FUNCTION]):
42
+ DIVERGENCE_FN_MAP = {"FNN": divergence_fnn, "MST": divergence_mst}
43
+
44
+
45
+ def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST"] = "FNN") -> DivergenceOutput:
19
46
  """
20
- Calculates the estimated HP divergence between two datasets
47
+ Calculates the divergence and any errors between the datasets
21
48
 
22
49
  Parameters
23
50
  ----------
24
- method : Literal["MST, "FNN"], default "MST"
51
+ data_a : ArrayLike, shape - (N, P)
52
+ A dataset in an ArrayLike format to compare.
53
+ Function expects the data to have 2 dimensions, N number of observations in a P-dimesionial space.
54
+ data_b : ArrayLike, shape - (N, P)
55
+ A dataset in an ArrayLike format to compare.
56
+ Function expects the data to have 2 dimensions, N number of observations in a P-dimesionial space.
57
+ method : Literal["MST, "FNN"], default "FNN"
25
58
  Method used to estimate dataset divergence
26
59
 
60
+ Returns
61
+ -------
62
+ DivergenceOutput
63
+ The divergence value (0.0..1.0) and the number of differing edges between the datasets
64
+
65
+ Notes
66
+ -----
67
+ The divergence value indicates how similar the 2 datasets are
68
+ with 0 indicating approximately identical data distributions.
69
+
27
70
  Warning
28
71
  -------
29
72
  MST is very slow in this implementation, this is unlike matlab where
@@ -40,63 +83,20 @@ class Divergence(EvaluateMixin, MethodsMixin[_METHODS, _FUNCTION]):
40
83
 
41
84
  Examples
42
85
  --------
43
- Initialize the Divergence class:
44
-
45
- >>> divert = Divergence()
86
+ Evaluate the datasets:
46
87
 
47
- Specify the method:
48
-
49
- >>> divert = Divergence(method="FNN")
88
+ >>> divergence(datasetA, datasetB)
89
+ DivergenceOutput(divergence=0.28, errors=36.0)
50
90
  """
51
-
52
- def __init__(self, method: _METHODS = "MST") -> None:
53
- self._set_method(method)
54
-
55
- @classmethod
56
- def _methods(cls) -> Dict[str, _FUNCTION]:
57
- return {"FNN": divergence_fnn, "MST": divergence_mst}
58
-
59
- def evaluate(self, data_a: ArrayLike, data_b: ArrayLike) -> Dict[str, Any]:
60
- """
61
- Calculates the divergence and any errors between the datasets
62
-
63
- Parameters
64
- ----------
65
- data_a : ArrayLike, shape - (N, P)
66
- A dataset in an ArrayLike format to compare.
67
- Function expects the data to have 2 dimensions, N number of observations in a P-dimesionial space.
68
- data_b : ArrayLike, shape - (N, P)
69
- A dataset in an ArrayLike format to compare.
70
- Function expects the data to have 2 dimensions, N number of observations in a P-dimesionial space.
71
-
72
- Returns
73
- -------
74
- Dict[str, Any]
75
- divergence : float
76
- divergence value between 0.0 and 1.0
77
- error : int
78
- the number of differing edges between the datasets
79
-
80
- Notes
81
- -----
82
- The divergence value indicates how similar the 2 datasets are
83
- with 0 indicating approximately identical data distributions.
84
-
85
- Examples
86
- --------
87
- Evaluate the datasets:
88
-
89
- >>> divert.evaluate(datasetA, datasetB)
90
- {'divergence': 0.28, 'error': 36.0}
91
- """
92
- a = to_numpy(data_a)
93
- b = to_numpy(data_b)
94
- N = a.shape[0]
95
- M = b.shape[0]
96
-
97
- stacked_data = np.vstack((a, b))
98
- labels = np.vstack([np.zeros([N, 1]), np.ones([M, 1])])
99
-
100
- errors = self._method(stacked_data, labels)
101
- dp = max(0.0, 1 - ((M + N) / (2 * M * N)) * errors)
102
- return {"divergence": dp, "error": errors}
91
+ div_fn = get_method(DIVERGENCE_FN_MAP, method)
92
+ a = to_numpy(data_a)
93
+ b = to_numpy(data_b)
94
+ N = a.shape[0]
95
+ M = b.shape[0]
96
+
97
+ stacked_data = np.vstack((a, b))
98
+ labels = np.vstack([np.zeros([N, 1]), np.ones([M, 1])])
99
+
100
+ errors = div_fn(stacked_data, labels)
101
+ dp = max(0.0, 1 - ((M + N) / (2 * M * N)) * errors)
102
+ return DivergenceOutput(dp, errors)