mlquantify 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. mlquantify/__init__.py +10 -29
  2. mlquantify/adjust_counting/__init__.py +24 -0
  3. mlquantify/adjust_counting/_adjustment.py +648 -0
  4. mlquantify/adjust_counting/_base.py +245 -0
  5. mlquantify/adjust_counting/_counting.py +153 -0
  6. mlquantify/adjust_counting/_utils.py +109 -0
  7. mlquantify/base.py +117 -519
  8. mlquantify/base_aggregative.py +209 -0
  9. mlquantify/calibration.py +1 -0
  10. mlquantify/confidence.py +329 -0
  11. mlquantify/likelihood/__init__.py +5 -0
  12. mlquantify/likelihood/_base.py +147 -0
  13. mlquantify/likelihood/_classes.py +430 -0
  14. mlquantify/meta/__init__.py +1 -0
  15. mlquantify/meta/_classes.py +785 -0
  16. mlquantify/metrics/__init__.py +21 -0
  17. mlquantify/metrics/_oq.py +109 -0
  18. mlquantify/metrics/_rq.py +98 -0
  19. mlquantify/{evaluation/measures.py → metrics/_slq.py} +51 -36
  20. mlquantify/mixture/__init__.py +7 -0
  21. mlquantify/mixture/_base.py +147 -0
  22. mlquantify/mixture/_classes.py +458 -0
  23. mlquantify/mixture/_utils.py +163 -0
  24. mlquantify/model_selection/__init__.py +9 -0
  25. mlquantify/model_selection/_protocol.py +358 -0
  26. mlquantify/model_selection/_search.py +315 -0
  27. mlquantify/model_selection/_split.py +1 -0
  28. mlquantify/multiclass.py +350 -0
  29. mlquantify/neighbors/__init__.py +9 -0
  30. mlquantify/neighbors/_base.py +168 -0
  31. mlquantify/neighbors/_classes.py +150 -0
  32. mlquantify/{classification/methods.py → neighbors/_classification.py} +37 -62
  33. mlquantify/neighbors/_kde.py +268 -0
  34. mlquantify/neighbors/_utils.py +131 -0
  35. mlquantify/neural/__init__.py +1 -0
  36. mlquantify/utils/__init__.py +47 -2
  37. mlquantify/utils/_artificial.py +27 -0
  38. mlquantify/utils/_constraints.py +219 -0
  39. mlquantify/utils/_context.py +21 -0
  40. mlquantify/utils/_decorators.py +36 -0
  41. mlquantify/utils/_exceptions.py +12 -0
  42. mlquantify/utils/_get_scores.py +159 -0
  43. mlquantify/utils/_load.py +18 -0
  44. mlquantify/utils/_parallel.py +6 -0
  45. mlquantify/utils/_random.py +36 -0
  46. mlquantify/utils/_sampling.py +273 -0
  47. mlquantify/utils/_tags.py +44 -0
  48. mlquantify/utils/_validation.py +447 -0
  49. mlquantify/utils/prevalence.py +64 -0
  50. {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/METADATA +2 -1
  51. mlquantify-0.1.10.dist-info/RECORD +53 -0
  52. mlquantify/classification/__init__.py +0 -1
  53. mlquantify/evaluation/__init__.py +0 -14
  54. mlquantify/evaluation/protocol.py +0 -289
  55. mlquantify/methods/__init__.py +0 -37
  56. mlquantify/methods/aggregative.py +0 -1159
  57. mlquantify/methods/meta.py +0 -472
  58. mlquantify/methods/mixture_models.py +0 -1003
  59. mlquantify/methods/non_aggregative.py +0 -136
  60. mlquantify/methods/threshold_optimization.py +0 -869
  61. mlquantify/model_selection.py +0 -377
  62. mlquantify/plots.py +0 -367
  63. mlquantify/utils/general.py +0 -371
  64. mlquantify/utils/method.py +0 -449
  65. mlquantify-0.1.8.dist-info/RECORD +0 -22
  66. {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/WHEEL +0 -0
  67. {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,458 @@
1
+ import numpy as np
2
+ from abc import abstractmethod
3
+
4
+ from mlquantify.base import BaseQuantifier
5
+ from mlquantify.base_aggregative import AggregationMixin, SoftLearnerQMixin, _get_learner_function
6
+ from mlquantify.mixture._base import BaseMixture
7
+ from mlquantify.multiclass import define_binary
8
+ from mlquantify.utils._constraints import Interval, Options
9
+ from mlquantify.utils._decorators import _fit_context
10
+ from mlquantify.utils._get_scores import apply_cross_validation
11
+ from mlquantify.utils._validation import check_classes_attribute, validate_predictions, validate_prevalences, validate_y
12
+ from mlquantify.mixture._utils import (
13
+ getHist,
14
+ ternary_search,
15
+ )
16
+
17
+
18
+
19
+ # =====================================================
20
+ # Base class
21
+ # =====================================================
22
+ @define_binary
23
+ class AggregativeMixture(SoftLearnerQMixin, AggregationMixin, BaseMixture):
24
+ r"""Base class for Mixture-based Quantification Methods.
25
+
26
+ These methods assume that the test score distribution is a mixture
27
+ of the positive and negative score distributions from the training data.
28
+ """
29
+
30
+ _parameter_constraints = {
31
+ "strategy": [Options(["ovr", "ovo"])]
32
+ }
33
+
34
+ def __init__(self, learner = None, strategy="ovr"):
35
+ super().__init__()
36
+ self.learner = learner
37
+ self.pos_scores = None
38
+ self.neg_scores = None
39
+ self.distances = None
40
+ self.strategy = strategy
41
+
42
+ def _fit(self, X, y, learner_fitted=False, *args, **kwargs):
43
+ learner_function = _get_learner_function(self)
44
+
45
+ if learner_fitted:
46
+ train_predictions = getattr(self.learner, learner_function)(X)
47
+ train_y_values = y
48
+ else:
49
+ train_predictions, train_y_values = apply_cross_validation(
50
+ self.learner,
51
+ X,
52
+ y,
53
+ function= learner_function,
54
+ cv= 5,
55
+ stratified= True,
56
+ random_state= None,
57
+ shuffle= True
58
+ )
59
+
60
+ self.train_predictions = train_predictions
61
+ self.train_y_values = train_y_values
62
+
63
+ self._precompute_training(train_predictions, train_y_values)
64
+ return self
65
+
66
+ def _precompute_training(self, train_predictions, train_y_values):
67
+ """
68
+ Fit learner and store score distributions for positive and negative classes.
69
+ """
70
+ # Store scores for positive and negative classes
71
+ self.pos_scores = train_predictions[train_y_values == self.classes_[1], 1]
72
+ self.neg_scores = train_predictions[train_y_values == self.classes_[0], 1]
73
+ self._precomputed = True
74
+ return self
75
+
76
+ def _predict(self, X):
77
+ """Predict class prevalences for the given data."""
78
+ predictions = getattr(self.learner, _get_learner_function(self))(X)
79
+ prevalences = self.aggregate(predictions, self.train_predictions, self.train_y_values)
80
+
81
+ return prevalences
82
+
83
+ def aggregate(self, predictions, train_predictions, train_y_values):
84
+ predictions = validate_predictions(self, predictions)
85
+ self.classes_ = check_classes_attribute(self, np.unique(train_y_values))
86
+
87
+ if not self._precomputed:
88
+ self._precompute_training(train_predictions, train_y_values)
89
+ self._precomputed = True
90
+
91
+ pos_test_scores = predictions[:, 1]
92
+
93
+ best_alpha, _ = self.best_mixture(pos_test_scores, self.pos_scores, self.neg_scores)
94
+ prevalence = np.array([1 - best_alpha, best_alpha])
95
+ prevalence = validate_prevalences(self, prevalence, self.classes_)
96
+ return prevalence
97
+
98
+ @abstractmethod
99
+ def best_mixture(self, predictions, pos_scores, neg_scores):
100
+ ...
101
+
102
+ # =====================================================
103
+ # DyS
104
+ # =====================================================
105
+
106
+ class DyS(AggregativeMixture):
107
+ r"""Distribution y-Similarity (DyS) quantification method.
108
+
109
+ Uses mixture modeling with a dissimilarity measure between distributions
110
+ computed on histograms of classifier scores. This method optimizes mixture
111
+ weights by minimizing a chosen distance measure: Hellinger, Topsoe, or ProbSymm.
112
+
113
+ Parameters
114
+ ----------
115
+ learner : estimator, optional
116
+ Base probabilistic classifier.
117
+ measure : {'hellinger', 'topsoe', 'probsymm'}, default='topsoe'
118
+ Distance function to minimize.
119
+ bins_size : array-like or None
120
+ Histogram bin sizes to try for score representation. Defaults to a set of
121
+ bin sizes between 2 and 30.
122
+
123
+ References
124
+ ----------
125
+ [1] Maletzke et al. (2019). DyS: A Framework for Mixture Models in Quantification. AAAI 2019.
126
+ [2] Esuli et al. (2023). Learning to Quantify. Springer.
127
+
128
+ Examples
129
+ --------
130
+ >>> from mlquantify.mixture import DyS
131
+ >>> from sklearn.linear_model import LogisticRegression
132
+ >>> q = DyS(learner=LogisticRegression(), measure="hellinger")
133
+ >>> q.fit(X_train, y_train)
134
+ >>> prevalences = q.predict(X_test)
135
+ """
136
+
137
+ _parameter_constraints = {
138
+ "measure": [Options(["hellinger", "topsoe", "probsymm"])],
139
+ "bins_size": ["array-like", None]
140
+ }
141
+
142
+ def __init__(self, learner=None, measure="topsoe", bins_size=None):
143
+ super().__init__(learner)
144
+ if bins_size is None:
145
+ bins_size = np.append(np.linspace(2, 20, 10), 30)
146
+
147
+ self.measure = measure
148
+ self.bins_size = np.asarray(bins_size, dtype=int)
149
+
150
+ def best_mixture(self, predictions, pos_scores, neg_scores):
151
+ r"""Determine the best mixture parameters for the given data.
152
+
153
+ Applies ternary search to find the mixture weight minimizing the distance
154
+ between the test score histogram and the mixture of positive and negative
155
+
156
+ The mixture weight :math:`\alpha` is estimated as:
157
+ .. math::
158
+ \alpha = \arg \min_{\alpha \in [0, 1]} D \left( H_{test}, \alpha H_{pos} + (1 - \alpha) H_{neg} \right)
159
+
160
+ where :math:`D` is the selected distance measure and :math:`H` denotes histograms.
161
+
162
+
163
+ Parameters
164
+ ----------
165
+ predictions : ndarray
166
+ Classifier scores for the test data.
167
+ pos_scores : ndarray
168
+ Classifier scores for the positive class from training data.
169
+ neg_scores : ndarray
170
+ Classifier scores for the negative class from training data.
171
+
172
+
173
+ Returns
174
+ -------
175
+ alpha : float
176
+ Estimated mixture weight.
177
+ best_distance : float
178
+ Distance corresponding to the best mixture weight.
179
+ """
180
+
181
+ prevs = []
182
+ self.distances = []
183
+ for bins in self.bins_size:
184
+ pos = getHist(pos_scores, bins)
185
+ neg = getHist(neg_scores, bins)
186
+ test = getHist(predictions, bins)
187
+
188
+ def f(alpha):
189
+ mix = self._mix(pos, neg, alpha)
190
+ return BaseMixture.get_distance(mix, test, measure=self.measure)
191
+
192
+ alpha = ternary_search(0, 1, f)
193
+ prevs.append(alpha)
194
+ self.distances.append(f(alpha))
195
+ alpha = np.median(prevs)
196
+ best_distance = np.median(self.distances)
197
+ return alpha, best_distance
198
+
199
+ def _mix(self, pos_hist, neg_hist, alpha):
200
+ return alpha * pos_hist + (1 - alpha) * neg_hist
201
+
202
+
203
+ # =====================================================
204
+ # HDy
205
+ # =====================================================
206
+
207
+ class HDy(AggregativeMixture):
208
+ r"""Hellinger Distance Minimization (HDy) quantification method.
209
+
210
+ Estimates class prevalences by finding mixture weights that minimize
211
+ the Hellinger distance between the histogram of test scores and the mixture
212
+ of positive and negative class score histograms, evaluated over multiple bin sizes.
213
+
214
+ Parameters
215
+ ----------
216
+ learner : estimator, optional
217
+ Base probabilistic classifier.
218
+
219
+ References
220
+ ----------
221
+ [2] Esuli et al. (2023). Learning to Quantify. Springer.
222
+
223
+ """
224
+
225
+ def best_mixture(self, predictions, pos_scores, neg_scores):
226
+ r"""Determine the best mixture parameters for the given data.
227
+
228
+ Compute the mixture weight :math:`\alpha` that minimizes the Hellinger distance between the test score histogram and the mixture of positive and negative class score histograms.
229
+
230
+ The mixture weight :math:`\alpha` is estimated as:
231
+ .. math::
232
+ \alpha = \arg \min_{\alpha \in [0, 1]} Hellinger \left( H_{test}, \alpha H_{pos} + (1 - \alpha) H_{neg} \right)
233
+
234
+ where :math:`H` denotes histograms.
235
+
236
+
237
+ Parameters
238
+ ----------
239
+ predictions : ndarray
240
+ Classifier scores for the test data.
241
+ pos_scores : ndarray
242
+ Classifier scores for the positive class from training data.
243
+ neg_scores : ndarray
244
+ Classifier scores for the negative class from training data.
245
+
246
+
247
+ Returns
248
+ -------
249
+ alpha : float
250
+ Estimated mixture weight.
251
+ best_distance : float
252
+ Distance corresponding to the best mixture weight.
253
+ """
254
+
255
+ bins_size = np.arange(10, 110, 11)
256
+ alpha_values = np.round(np.linspace(0, 1, 101), 2)
257
+
258
+ alphas, self.distances = [], []
259
+ for bins in bins_size:
260
+ pos = getHist(pos_scores, bins)
261
+ neg = getHist(neg_scores, bins)
262
+ test = getHist(predictions, bins)
263
+ dists = []
264
+ for a in alpha_values:
265
+ mix = self._mix(pos, neg, a)
266
+ dists.append(BaseMixture.get_distance(mix, test, measure="hellinger"))
267
+ a = alpha_values[np.argmin(dists)]
268
+ alphas.append(a)
269
+ self.distances.append(np.min(dists))
270
+
271
+ best_alpha = np.median(alphas)
272
+ best_distance = np.median(self.distances)
273
+
274
+ return best_alpha, best_distance
275
+
276
+ def _mix(self, pos_hist, neg_hist, alpha):
277
+ return alpha * pos_hist + (1 - alpha) * neg_hist
278
+
279
+
280
+
281
+ # =====================================================
282
+ # SMM
283
+ # =====================================================
284
+
285
+ class SMM(AggregativeMixture):
286
+ r"""Sample Mean Matching (SMM) quantification method.
287
+
288
+ Estimates class prevalence by matching the mean score of the test samples
289
+ to a convex combination of positive and negative training scores. The mixture
290
+ weight :math:`\alpha` is computed as:
291
+
292
+ .. math::
293
+ \alpha = \frac{\bar{s}_{test} - \bar{s}_{neg}}{\bar{s}_{pos} - \bar{s}_{neg}}
294
+
295
+ where :math:`\bar{s}` denotes the sample mean.
296
+
297
+ Parameters
298
+ ----------
299
+ learner : estimator, optional
300
+ Base probabilistic classifier.
301
+
302
+ References
303
+ ----------
304
+ [2] Esuli et al. (2023). Learning to Quantify. Springer.
305
+ """
306
+
307
+ def best_mixture(self, predictions, pos_scores, neg_scores):
308
+ mean_pos = np.mean(pos_scores)
309
+ mean_neg = np.mean(neg_scores)
310
+ mean_test = np.mean(predictions)
311
+
312
+ alpha = (mean_test - mean_neg) / (mean_pos - mean_neg)
313
+ return alpha, None
314
+
315
+
316
+ # =====================================================
317
+ # SORD
318
+ # =====================================================
319
+
320
+ class SORD(AggregativeMixture):
321
+ """Sample Ordinal Distance (SORD) quantification method.
322
+
323
+ Estimates prevalence by minimizing the weighted sum of absolute score differences
324
+ between test data and training classes. The method creates weighted score
325
+ vectors for positive, negative, and test samples, sorts them, and computes
326
+ a cumulative absolute difference as the distance measure.
327
+
328
+ Parameters
329
+ ----------
330
+ learner : estimator, optional
331
+ Base probabilistic classifier.
332
+
333
+ References
334
+ ----------
335
+ [2] Esuli et al. (2023). Learning to Quantify. Springer.
336
+ """
337
+
338
+ def best_mixture(self, predictions, pos_scores, neg_scores):
339
+ alphas = np.linspace(0, 1, 101)
340
+ self.distances = []
341
+
342
+ pos, neg, test = pos_scores, neg_scores, predictions
343
+ n_pos, n_neg, n_test = len(pos), len(neg), len(test)
344
+ for a in alphas:
345
+ pos_w = np.full(n_pos, a / n_pos)
346
+ neg_w = np.full(n_neg, (1 - a) / n_neg)
347
+ test_w = np.full(n_test, -1 / n_test)
348
+ scores = np.concatenate([pos, neg, test])
349
+ weights = np.concatenate([pos_w, neg_w, test_w])
350
+ idx = np.argsort(scores)
351
+ sorted_scores = scores[idx]
352
+ sorted_weights = weights[idx]
353
+ cum_w = sorted_weights[0]
354
+ total = 0
355
+ for i in range(1, len(sorted_scores)):
356
+ seg = sorted_scores[i] - sorted_scores[i - 1]
357
+ total += abs(seg * cum_w)
358
+ cum_w += sorted_weights[i]
359
+ self.distances.append(total)
360
+
361
+ best_distance_index = np.argmin(self.distances)
362
+ best_alpha = alphas[best_distance_index]
363
+ best_distance = self.distances[best_distance_index]
364
+ return best_alpha, best_distance
365
+
366
+
367
+
368
+
369
+
370
+ # =====================================================
371
+ # Non aggregative Mixture-based Quantifiers
372
+ # =====================================================
373
+
374
+ class HDx(BaseMixture):
375
+ """
376
+ Hellinger Distance-based Quantifier (HDx).
377
+
378
+ A non-aggregative mixture quantifier that estimates class prevalences by
379
+ minimizing the average Hellinger distance between class-wise feature histograms
380
+ of training data and test data. It iterates over mixture weights and histogram bin sizes,
381
+ evaluating distance per feature and aggregates the results.
382
+
383
+ Parameters
384
+ ----------
385
+ bins_size : array-like, optional
386
+ Histogram bin sizes to consider for discretizing features.
387
+ strategy : {'ovr', 'ovo'}, default='ovr'
388
+ Multiclass quantification strategy.
389
+
390
+ Attributes
391
+ ----------
392
+ pos_features : ndarray
393
+ Training samples of the positive class.
394
+ neg_features : ndarray
395
+ Training samples of the negative class.
396
+
397
+ References
398
+ ----------
399
+ [2] Esuli et al. (2023). Learning to Quantify. Springer.
400
+ """
401
+
402
+ _parameter_constraints = {
403
+ "bins_size": ["array-like", None],
404
+ "strategy": [Options(["ovr", "ovo"])]
405
+ }
406
+
407
+ def __init__(self, bins_size=None, strategy="ovr"):
408
+ super().__init__()
409
+ if bins_size is None:
410
+ bins_size = np.append(np.linspace(2, 20, 10), 30)
411
+
412
+ self.bins_size = bins_size
413
+ self.neg_features = None
414
+ self.pos_features = None
415
+ self.strategy = strategy
416
+
417
+
418
+ def _fit(self, X, y, *args, **kwargs):
419
+ self.pos_features = X[y == self.classes_[1]]
420
+ self.neg_features = X[y == self.classes_[0]]
421
+ return self
422
+
423
+ def _predict(self, X) -> np.ndarray:
424
+ alpha, _ = self.best_mixture(X, self.pos_features, self.neg_features)
425
+ prevalence = np.array([1 - alpha, alpha])
426
+ prevalence = validate_prevalences(self, prevalence, self.classes_)
427
+ return prevalence
428
+
429
+ def best_mixture(self, X, pos, neg):
430
+ alpha_values = np.round(np.linspace(0, 1, 101), 2)
431
+ self.distances = []
432
+
433
+ # Iterate over alpha values to compute the prevalence
434
+ for alpha in alpha_values:
435
+ distances = []
436
+
437
+ # For each feature, compute the Hellinger distance
438
+ for feature_idx in range(X.shape[1]):
439
+
440
+ for bins in self.bins_size:
441
+
442
+ pos_feature = pos[:, feature_idx]
443
+ neg_feature = neg[:, feature_idx]
444
+ test_feature = X[:, feature_idx]
445
+
446
+ pos_hist = getHist(pos_feature, bins)
447
+ neg_hist = getHist(neg_feature, bins)
448
+ test_hist = getHist(test_feature, bins)
449
+
450
+ mix_hist = alpha * pos_hist + (1 - alpha) * neg_hist
451
+ distance = BaseMixture.get_distance(mix_hist, test_hist, measure="hellinger")
452
+ distances.append(distance)
453
+
454
+ avg_distance = np.mean(distances)
455
+ self.distances.append(avg_distance)
456
+ best_alpha = alpha_values[np.argmin(self.distances)]
457
+ best_distance = np.min(self.distances)
458
+ return best_alpha, best_distance
@@ -0,0 +1,163 @@
1
+ import numpy as np
2
+
3
+
4
+ # =====================================================
5
+ # Utility functions
6
+ # =====================================================
7
+
8
+ def getHist(scores, nbins):
9
+ r"""
10
+ Calculate histogram-like bin probabilities for a given set of scores.
11
+
12
+ This function divides the score range into equal bins and computes the proportion
13
+ of scores in each bin, normalized by the total count.
14
+
15
+ Parameters
16
+ ----------
17
+ scores : np.ndarray
18
+ A 1-dimensional array of scores.
19
+ nbins : int
20
+ Number of bins for dividing the score range.
21
+
22
+ Returns
23
+ -------
24
+ np.ndarray
25
+ An array containing the normalized bin probabilities.
26
+
27
+ Notes
28
+ -----
29
+ - The bins are equally spaced between 0 and 1, with an additional upper boundary
30
+ to include the maximum score.
31
+ - The returned probabilities are normalized to account for the total number of scores.
32
+ """
33
+ breaks = np.linspace(0, 1, int(nbins) + 1)
34
+ breaks = np.delete(breaks, -1)
35
+ breaks = np.append(breaks, 1.1)
36
+
37
+ re = np.repeat(1 / (len(breaks) - 1), (len(breaks) - 1))
38
+ for i in range(1, len(breaks)):
39
+ re[i - 1] = (re[i - 1] + len(np.where((scores >= breaks[i - 1]) & (scores < breaks[i]))[0])) / (len(scores) + 1)
40
+
41
+ return re
42
+
43
+
44
+ def ternary_search(left: float, right: float, func, tol: float = 1e-4) -> float:
45
+ r"""
46
+ Ternary search to find the minimum of a unimodal function in [left, right].
47
+
48
+ Parameters
49
+ ----------
50
+ left : float
51
+ Left bound.
52
+ right : float
53
+ Right bound.
54
+ func : callable
55
+ Function to minimize.
56
+ tol : float, optional
57
+ Tolerance for termination. Default is 1e-4.
58
+
59
+ Returns
60
+ -------
61
+ float
62
+ Approximate position of the minimum.
63
+ """
64
+ while right - left > tol:
65
+ m1 = left + (right - left) / 3
66
+ m2 = right - (right - left) / 3
67
+ f1, f2 = func(m1), func(m2)
68
+ if f1 < f2:
69
+ right = m2
70
+ else:
71
+ left = m1
72
+ return (left + right) / 2
73
+
74
+
75
+ def topsoe(p: np.ndarray, q: np.ndarray) -> float:
76
+ r"""
77
+ Topsoe distance between two probability distributions.
78
+
79
+ .. math::
80
+ D_T(p, q) = \sum \left( p \log \frac{2p}{p + q} + q \log \frac{2q}{p + q} \right)
81
+
82
+ Parameters
83
+ ----------
84
+ p : np.ndarray
85
+ First probability distribution.
86
+ q : np.ndarray
87
+ Second probability distribution.
88
+
89
+ Returns
90
+ -------
91
+ float
92
+ The Topsoe distance.
93
+ """
94
+ p = np.maximum(p, 1e-20)
95
+ q = np.maximum(q, 1e-20)
96
+ return np.sum(p * np.log(2 * p / (p + q)) + q * np.log(2 * q / (p + q)))
97
+
98
+
99
+ def probsymm(p: np.ndarray, q: np.ndarray) -> float:
100
+ r"""
101
+ Probabilistic Symmetric distance.
102
+
103
+ .. math::
104
+ D_{PS}(p, q) = \sum (p - q) \log \frac{p}{q}
105
+
106
+ Parameters
107
+ ----------
108
+ p : np.ndarray
109
+ First probability distribution.
110
+ q : np.ndarray
111
+ Second probability distribution.
112
+
113
+ Returns
114
+ -------
115
+ float
116
+ The Probabilistic Symmetric distance.
117
+ """
118
+ p = np.maximum(p, 1e-20)
119
+ q = np.maximum(q, 1e-20)
120
+ return np.sum((p - q) * np.log(p / q))
121
+
122
+
123
+ def hellinger(p: np.ndarray, q: np.ndarray) -> float:
124
+ r"""
125
+ Hellinger distance between two probability distributions.
126
+
127
+ .. math::
128
+ H(p, q) = \frac{1}{\sqrt{2}} \sqrt{\sum \left( \sqrt{p} - \sqrt{q} \right)^2}
129
+
130
+ Parameters
131
+ ----------
132
+ p : np.ndarray
133
+ First probability distribution.
134
+ q : np.ndarray
135
+ Second probability distribution.
136
+
137
+ Returns
138
+ -------
139
+ float
140
+ The Hellinger distance.
141
+ """
142
+ p = np.maximum(p, 1e-20)
143
+ q = np.maximum(q, 1e-20)
144
+ return np.sqrt(0.5 * np.sum((np.sqrt(p) - np.sqrt(q)) ** 2))
145
+
146
+
147
+ def sqEuclidean(p: np.ndarray, q: np.ndarray) -> float:
148
+ r"""
149
+ Squared Euclidean distance between two vectors.
150
+
151
+ Parameters
152
+ ----------
153
+ p : np.ndarray
154
+ First vector.
155
+ q : np.ndarray
156
+ Second vector.
157
+
158
+ Returns
159
+ -------
160
+ float
161
+ The squared Euclidean distance.
162
+ """
163
+ return np.sum((p - q) ** 2)
@@ -0,0 +1,9 @@
1
+ from ._protocol import (
2
+ BaseProtocol,
3
+ APP,
4
+ NPP,
5
+ UPP,
6
+ PPP
7
+ )
8
+
9
+ from ._search import GridSearchQ