distclassipy 0.1.4__py3-none-any.whl → 0.1.6a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
distclassipy/distances.py CHANGED
@@ -1,18 +1,19 @@
1
- """
2
- A module providing a variety of distance metrics to calculate the distance between two points.
1
+ """A module providing a variety of distance metrics.
3
2
 
4
- This module includes implementations of various distance metrics, including both common and less
5
- common measures. It allows for the calculation of distances between data points in a vectorized
6
- manner using numpy arrays.
7
- A part of this code is based on the work of Andrzej Zielezinski, originally retrieved on 20 November 2022 from
8
- https://github.com/aziele/statistical-distances/blob/04412b3155c59fc7238b3d8ecf6f3723ac5befff/distance.py, which was released via the GNU General Public License v3.0.
3
+ This module includes implementations of various distance metrics, including both
4
+ common and less common measures. It allows for the calculation of distances between
5
+ data points in a vectorized manner using numpy arrays.
6
+ A part of this code is based on the work of Andrzej Zielezinski, originally retrieved
7
+ on 20 November 2022 from
8
+ https://github.com/aziele/statistical-distances/blob/04412b3155c59fc7238b3d8ecf6f3723ac5befff/distance.py, # noqa
9
+ which was released via the GNU General Public License v3.0.
9
10
 
10
11
  It was originally modified by Siddharth Chaini on 27 November 2022.
11
12
 
12
13
  Notes
13
14
  -----
14
-
15
- Modifications by Siddharth Chaini include the addition of the following distance measures:
15
+ Modifications by Siddharth Chaini include the addition of the following distance
16
+ measures:
16
17
  1. Meehl distance
17
18
  2. Sorensen distance
18
19
  3. Ruzicka distance
@@ -24,17 +25,64 @@ Notes
24
25
 
25
26
  In addition, the following code was added to all functions for array conversion:
26
27
  u,v = np.asarray(u), np.asarray(v)
28
+
29
+ Copyright (C) 2024 Siddharth Chaini
27
30
  -----
31
+ This program is free software: you can redistribute it and/or modify
32
+ it under the terms of the GNU General Public License as published by
33
+ the Free Software Foundation, either version 3 of the License, or
34
+ (at your option) any later version.
35
+
36
+ This program is distributed in the hope that it will be useful,
37
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
38
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39
+ GNU General Public License for more details.
40
+
41
+ You should have received a copy of the GNU General Public License
42
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
28
43
  """
29
44
 
45
+ import warnings
46
+
30
47
  import numpy as np
31
48
 
49
+ import scipy
50
+
32
51
 
33
52
  class Distance:
53
+ """A class to calculate various distance metrics between vectors.
54
+
55
+ This class provides methods to compute different types of distances between
56
+ two vectors, such as Euclidean, Manhattan, Canberra, and other statistical
57
+ distances. Each method takes two vectors as input and returns the calculated
58
+ distance. The class can handle both numpy arrays and lists, converting them
59
+ internally to numpy arrays for computation.
60
+
61
+ Attributes
62
+ ----------
63
+ epsilon : float, optional
64
+ A small value to avoid division by zero errors in certain distance
65
+ calculations. Default is the machine precision for float data type.
66
+
67
+ Methods
68
+ -------
69
+ acc(u, v)
70
+ Returns the average of Cityblock/Manhattan and Chebyshev distances.
71
+ add_chisq(u, v)
72
+ Returns the Additive Symmetric Chi-square distance.
73
+ (Other methods are not listed here for brevity)
74
+
75
+ Examples
76
+ --------
77
+ >>> dist = Distance()
78
+ >>> u = [1, 2, 3]
79
+ >>> v = [4, 5, 6]
80
+ >>> print(dist.acc(u, v))
81
+ 5.0
82
+ """
34
83
 
35
84
  def __init__(self, epsilon=None):
36
- """
37
- Initialize the Distance class with an optional epsilon value.
85
+ """Initialize the Distance class with an optional epsilon value.
38
86
 
39
87
  Parameters
40
88
  ----------
@@ -42,34 +90,11 @@ class Distance:
42
90
  """
43
91
  self.epsilon = np.finfo(float).eps if not epsilon else epsilon
44
92
 
45
- def acc(self, u, v):
46
- """
47
- Calculate the average of Cityblock/Manhattan and Chebyshev distances.
48
- This function computes the ACC distance, also known as the Average distance, between two
49
- vectors u and v. It is the average of the Cityblock (or Manhattan) and Chebyshev distances.
50
-
51
- Parameters
52
- ----------
53
- - u, v: Input vectors between which the distance is to be calculated.
54
-
55
- Returns
56
- -------
57
- - The ACC distance between the two vectors.
58
-
59
- References
60
- ----------
61
- 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean Geometry. Dover Publications.
62
- 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity Measures between Probability
63
- Density Functions. International Journal of Mathematical Models and Methods in Applied Sciences.
64
- vol. 1(4), pp. 300-307.
65
- """
66
- return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
93
+ def euclidean(self, u, v, w=None):
94
+ """Calculate the Euclidean distance between two vectors.
67
95
 
68
- def add_chisq(self, u, v):
69
- """
70
- Compute the Additive Symmetric Chi-square distance between two vectors.
71
- The Additive Symmetric Chi-square distance is a measure that can be used to compare two vectors.
72
- This function calculates it based on the input vectors u and v.
96
+ The Euclidean distance is the "ordinary" straight-line distance between two
97
+ points in Euclidean space.
73
98
 
74
99
  Parameters
75
100
  ----------
@@ -77,55 +102,26 @@ class Distance:
77
102
 
78
103
  Returns
79
104
  -------
80
- - The Additive Symmetric Chi-square distance between the two vectors.
105
+ - The Euclidean distance between the two vectors.
81
106
 
82
107
  References
83
108
  ----------
84
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity Measures between Probability
85
- Density Functions. International Journal of Mathematical Models and Methods in Applied Sciences.
86
- vol. 1(4), pp. 300-307.
109
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
110
+ Measures between Probability Density Functions. International
111
+ Journal of Mathematical Models and Methods in Applied Sciences.
112
+ 1(4), 300-307.
87
113
  """
88
114
  u, v = np.asarray(u), np.asarray(v)
89
- uvmult = u * v
90
- with np.errstate(divide="ignore", invalid="ignore"):
91
- return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
115
+ return scipy.spatial.distance.euclidean(u, v, w)
92
116
 
93
- # def bhattacharyya(self, u, v):
94
- # """
95
- # Calculate the Bhattacharyya distance between two vectors.
96
-
97
- # Returns a distance value between 0 and 1.
98
-
99
- # Parameters
100
- # ----------
101
- # - u, v: Input vectors between which the distance is to be calculated.
102
-
103
- # Returns
104
- # -------
105
- # - The Bhattacharyya distance between the two vectors.
106
-
107
- # References
108
- # ----------
109
- # 1. Bhattacharyya A (1947) On a measure of divergence between two
110
- # statistical populations defined by probability distributions,
111
- # Bull. Calcutta Math. Soc., 35, 99–109.
112
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
113
- # Measures between Probability Density Functions. International
114
- # Journal of Mathematical Models and Methods in Applied Sciences.
115
- # 1(4), 300-307.
116
- # 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
117
- # """
118
- # u, v = np.asarray(u), np.asarray(v)
119
- # return -np.log(np.sum(np.sqrt(u * v)))
120
-
121
- def braycurtis(self, u, v):
122
- """
123
- Calculate the Bray-Curtis distance between two vectors.
117
+ def braycurtis(self, u, v, w=None):
118
+ """Calculate the Bray-Curtis distance between two vectors.
124
119
 
125
- The Bray-Curtis distance is a measure of dissimilarity between two non-negative vectors,
126
- often used in ecology to measure the compositional dissimilarity between two sites based on counts
127
- of species at both sites. It is closely related to the Sørensen distance and is also known as
128
- Bray-Curtis dissimilarity.
120
+ The Bray-Curtis distance is a measure of dissimilarity between two non-negative
121
+ vectors, often used in ecology to measure the compositional dissimilarity
122
+ between two sites based on counts of species at both sites. It is closely
123
+ related to the Sørensen distance and is also known as Bray-Curtis
124
+ dissimilarity.
129
125
 
130
126
  Notes
131
127
  -----
@@ -151,13 +147,13 @@ class Distance:
151
147
  3. https://en.wikipedia.org/wiki/Bray–Curtis_dissimilarity
152
148
  """
153
149
  u, v = np.asarray(u), np.asarray(v)
154
- return np.sum(np.abs(u - v)) / np.sum(np.abs(u + v))
150
+ return scipy.spatial.distance.braycurtis(u, v, w)
155
151
 
156
- def canberra(self, u, v):
157
- """
158
- Calculate the Canberra distance between two vectors.
152
+ def canberra(self, u, v, w=None):
153
+ """Calculate the Canberra distance between two vectors.
159
154
 
160
- The Canberra distance is a weighted version of the Manhattan distance, used in numerical analysis.
155
+ The Canberra distance is a weighted version of the Manhattan distance, used
156
+ in numerical analysis.
161
157
 
162
158
  Notes
163
159
  -----
@@ -180,14 +176,49 @@ class Distance:
180
176
  1(4), 300-307.
181
177
  """
182
178
  u, v = np.asarray(u), np.asarray(v)
183
- with np.errstate(invalid="ignore"):
184
- return np.nansum(np.abs(u - v) / (np.abs(u) + np.abs(v)))
179
+ return scipy.spatial.distance.canberra(u, v, w)
180
+
181
+ def cityblock(self, u, v, w=None):
182
+ """Calculate the Cityblock (Manhattan) distance between two vectors.
183
+
184
+ Parameters
185
+ ----------
186
+ - u, v: Input vectors between which the distance is to be calculated.
187
+
188
+ Returns
189
+ -------
190
+ - The Cityblock distance between the two vectors.
191
+
192
+ References
193
+ ----------
194
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
195
+ Measures between Probability Density Functions. International
196
+ Journal of Mathematical Models and Methods in Applied Sciences.
197
+ 1(4):300-307.
198
+
199
+ Synonyms:
200
+ City block distance
201
+ Manhattan distance
202
+ Rectilinear distance
203
+ Taxicab norm
185
204
 
186
- def chebyshev(self, u, v):
205
+ Notes
206
+ -----
207
+ Cityblock distance between two probability density functions
208
+ (pdfs) equals:
209
+ 1. Non-intersection distance multiplied by 2.
210
+ 2. Gower distance multiplied by vector length.
211
+ 3. Bray-Curtis distance multiplied by 2.
212
+ 4. Google distance multiplied by 2.
187
213
  """
188
- Calculate the Chebyshev distance between two vectors.
214
+ u, v = np.asarray(u), np.asarray(v)
215
+ return scipy.spatial.distance.cityblock(u, v, w)
189
216
 
190
- The Chebyshev distance is a metric defined on a vector space where the distance between two vectors
217
+ def chebyshev(self, u, v, w=None):
218
+ """Calculate the Chebyshev distance between two vectors.
219
+
220
+ The Chebyshev distance is a metric defined on a vector space where the distance
221
+ between two vectors
191
222
  is the greatest of their differences along any coordinate dimension.
192
223
 
193
224
  Synonyms:
@@ -212,13 +243,12 @@ class Distance:
212
243
  1(4), 300-307.
213
244
  """
214
245
  u, v = np.asarray(u), np.asarray(v)
215
- return np.amax(np.abs(u - v))
246
+ return scipy.spatial.distance.chebyshev(u, v, w)
216
247
 
217
- def chebyshev_min(self, u, v):
218
- """
219
- Calculate the minimum value distance between two vectors.
248
+ def correlation(self, u, v, w=None, centered=True):
249
+ """Calculate the Pearson correlation distance between two vectors.
220
250
 
221
- This measure represents a custom approach by Zielezinski to distance measurement, focusing on the minimum absolute difference.
251
+ Returns a distance value between 0 and 2.
222
252
 
223
253
  Parameters
224
254
  ----------
@@ -226,14 +256,47 @@ class Distance:
226
256
 
227
257
  Returns
228
258
  -------
229
- - The minimum value distance between the two vectors.
259
+ - The Pearson correlation distance between the two vectors.
230
260
  """
231
261
  u, v = np.asarray(u), np.asarray(v)
232
- return np.amin(np.abs(u - v))
262
+ if len(u) < 2 or len(v) < 2:
263
+ warnings.warn(
264
+ "Pearson correlation requires vectors of length at least 2.",
265
+ RuntimeWarning,
266
+ )
267
+ d = 0
268
+ else:
269
+ d = scipy.spatial.distance.correlation(u, v, w, centered)
270
+ if np.isnan(d) and (
271
+ np.allclose(u - np.mean(u), 0) or np.allclose(v - np.mean(v), 0)
272
+ ):
273
+ warnings.warn(
274
+ "One of the vectors is constant; correlation is set to 0",
275
+ RuntimeWarning,
276
+ )
277
+ d = 0
278
+ return d
279
+
280
+ def cosine(self, u, v, w=None):
281
+ """Calculate the cosine distance between two vectors.
233
282
 
234
- def clark(self, u, v):
283
+ Parameters
284
+ ----------
285
+ - u, v: Input vectors between which the distance is to be calculated.
286
+
287
+ Returns
288
+ -------
289
+ - The cosine distance between the two vectors.
290
+
291
+ References
292
+ ----------
293
+ 1. SciPy.
235
294
  """
236
- Calculate the Clark distance between two vectors.
295
+ u, v = np.asarray(u), np.asarray(v)
296
+ return scipy.spatial.distance.cosine(u, v, w)
297
+
298
+ def clark(self, u, v):
299
+ """Calculate the Clark distance between two vectors.
237
300
 
238
301
  The Clark distance equals the square root of half of the divergence.
239
302
 
@@ -261,9 +324,11 @@ class Distance:
261
324
  with np.errstate(divide="ignore", invalid="ignore"):
262
325
  return np.sqrt(np.nansum(np.power(np.abs(u - v) / (u + v), 2)))
263
326
 
264
- def cosine(self, u, v):
265
- """
266
- Calculate the cosine distance between two vectors.
327
+ def hellinger(self, u, v):
328
+ """Calculate the Hellinger distance between two vectors.
329
+
330
+ The Hellinger distance is a measure of similarity between two probability
331
+ distributions.
267
332
 
268
333
  Parameters
269
334
  ----------
@@ -271,20 +336,28 @@ class Distance:
271
336
 
272
337
  Returns
273
338
  -------
274
- - The cosine distance between the two vectors.
339
+ - The Hellinger distance between the two vectors.
340
+
341
+ Notes
342
+ -----
343
+ This implementation produces values two times larger than values
344
+ obtained by Hellinger distance described in Wikipedia and also
345
+ in https://gist.github.com/larsmans/3116927.
275
346
 
276
347
  References
277
348
  ----------
278
- 1. SciPy.
349
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
350
+ Measures between Probability Density Functions. International
351
+ Journal of Mathematical Models and Methods in Applied Sciences.
352
+ 1(4), 300-307.
279
353
  """
280
354
  u, v = np.asarray(u), np.asarray(v)
281
- return 1 - np.dot(u, v) / (np.sqrt(np.dot(u, u)) * np.sqrt(np.dot(v, v)))
355
+ return np.sqrt(2 * np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
282
356
 
283
- def correlation_pearson(self, u, v):
284
- """
285
- Calculate the Pearson correlation distance between two vectors.
357
+ def jaccard(self, u, v):
358
+ """Calculate the Jaccard distance between two vectors.
286
359
 
287
- Returns a distance value between 0 and 2.
360
+ The Jaccard distance measures dissimilarity between sample sets.
288
361
 
289
362
  Parameters
290
363
  ----------
@@ -292,16 +365,21 @@ class Distance:
292
365
 
293
366
  Returns
294
367
  -------
295
- - The Pearson correlation distance between the two vectors.
296
- """
368
+ - The Jaccard distance between the two vectors.
297
369
 
370
+ References
371
+ ----------
372
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
373
+ Measures between Probability Density Functions. International
374
+ Journal of Mathematical Models and Methods in Applied Sciences.
375
+ 1(4), 300-307.
376
+ """
298
377
  u, v = np.asarray(u), np.asarray(v)
299
- r = np.ma.corrcoef(u, v)[0, 1]
300
- return 1.0 - r
378
+ uv = np.dot(u, v)
379
+ return 1 - (uv / (np.dot(u, u) + np.dot(v, v) - uv))
301
380
 
302
- def czekanowski(self, u, v):
303
- """
304
- Calculate the Czekanowski distance between two vectors.
381
+ def lorentzian(self, u, v):
382
+ """Calculate the Lorentzian distance between two vectors.
305
383
 
306
384
  Parameters
307
385
  ----------
@@ -309,24 +387,25 @@ class Distance:
309
387
 
310
388
  Returns
311
389
  -------
312
- - The Czekanowski distance between the two vectors.
390
+ - The Lorentzian distance between the two vectors.
313
391
 
314
392
  References
315
393
  ----------
316
394
  1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
317
395
  Measures between Probability Density Functions. International
318
396
  Journal of Mathematical Models and Methods in Applied Sciences.
319
- 1(4), 300-307.
320
- """
321
- u, v = np.asarray(u), np.asarray(v)
322
- return np.sum(np.abs(u - v)) / np.sum(u + v)
397
+ 1(4):300-307.
323
398
 
324
- def dice(self, u, v):
399
+ Notes
400
+ -----
401
+ One (1) is added to guarantee the non-negativity property and to
402
+ eschew the log of zero.
325
403
  """
326
- Calculate the Dice dissimilarity between two vectors.
404
+ u, v = np.asarray(u), np.asarray(v)
405
+ return np.sum(np.log(np.abs(u - v) + 1))
327
406
 
328
- Synonyms:
329
- Sorensen distance
407
+ def marylandbridge(self, u, v):
408
+ """Calculate the Maryland Bridge distance between two vectors.
330
409
 
331
410
  Parameters
332
411
  ----------
@@ -334,26 +413,19 @@ class Distance:
334
413
 
335
414
  Returns
336
415
  -------
337
- - The Dice dissimilarity between the two vectors.
416
+ - The Maryland Bridge distance between the two vectors.
338
417
 
339
418
  References
340
419
  ----------
341
- 1. Dice LR (1945) Measures of the amount of ecologic association
342
- between species. Ecology. 26, 297-302.
343
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
344
- Measures between Probability Density Functions. International
345
- Journal of Mathematical Models and Methods in Applied Sciences.
346
- 1(4), 300-307.
420
+ 1. Deza M, Deza E (2009) Encyclopedia of Distances.
421
+ Springer-Verlag Berlin Heidelberg. 1-590.
347
422
  """
348
423
  u, v = np.asarray(u), np.asarray(v)
349
- u_v = u - v
350
- return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
351
-
352
- def divergence(self, u, v):
353
- """
354
- Calculate the divergence between two vectors.
424
+ uvdot = np.dot(u, v)
425
+ return 1 - (uvdot / np.dot(u, u) + uvdot / np.dot(v, v)) / 2
355
426
 
356
- Divergence equals squared Clark distance multiplied by 2.
427
+ def meehl(self, u, v):
428
+ """Calculate the Meehl distance between two vectors.
357
429
 
358
430
  Parameters
359
431
  ----------
@@ -361,24 +433,30 @@ class Distance:
361
433
 
362
434
  Returns
363
435
  -------
364
- - The divergence between the two vectors.
436
+ - The Meehl distance between the two vectors.
437
+
438
+ Notes
439
+ -----
440
+ Added by SC.
365
441
 
366
442
  References
367
443
  ----------
368
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
369
- Measures between Probability Density Functions. International
370
- Journal of Mathematical Models and Methods in Applied Sciences.
371
- 1(4), 300-307.
444
+ 1. Deza M. and Deza E. (2013) Encyclopedia of Distances.
445
+ Berlin, Heidelberg: Springer Berlin Heidelberg.
446
+ https://doi.org/10.1007/978-3-642-30958-8.
372
447
  """
373
448
  u, v = np.asarray(u), np.asarray(v)
374
- with np.errstate(invalid="ignore"):
375
- return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
376
449
 
377
- def euclidean(self, u, v):
378
- """
379
- Calculate the Euclidean distance between two vectors.
450
+ xi = u[:-1]
451
+ yi = v[:-1]
452
+ xiplus1 = np.roll(u, 1)[:-1]
453
+ yiplus1 = np.roll(v, 1)[:-1]
380
454
 
381
- The Euclidean distance is the "ordinary" straight-line distance between two points in Euclidean space.
455
+ with np.errstate(divide="ignore", invalid="ignore"):
456
+ return np.nansum((xi - yi - xiplus1 + yiplus1) ** 2)
457
+
458
+ def motyka(self, u, v):
459
+ """Calculate the Motyka distance between two vectors.
382
460
 
383
461
  Parameters
384
462
  ----------
@@ -386,7 +464,11 @@ class Distance:
386
464
 
387
465
  Returns
388
466
  -------
389
- - The Euclidean distance between the two vectors.
467
+ - The Motyka distance between the two vectors.
468
+
469
+ Notes
470
+ -----
471
+ The distance between identical vectors is not equal to 0 but 0.5.
390
472
 
391
473
  References
392
474
  ----------
@@ -396,34 +478,10 @@ class Distance:
396
478
  1(4), 300-307.
397
479
  """
398
480
  u, v = np.asarray(u), np.asarray(v)
399
- return np.linalg.norm(u - v)
400
-
401
- # def fidelity(self, u, v):
402
- # """
403
- # Calculate the fidelity distance between two vectors.
404
-
405
- # The fidelity distance measures the similarity between two probability distributions.
406
-
407
- # Parameters
408
- # ----------
409
- # - u, v: Input vectors between which the distance is to be calculated.
410
-
411
- # Returns
412
- # -------
413
- # - The fidelity distance between the two vectors.
414
-
415
- # Notes
416
- # -----
417
- # Added by SC.
418
- # """
419
- # u, v = np.asarray(u), np.asarray(v)
420
- # return 1 - (np.sum(np.sqrt(u * v)))
421
-
422
- def google(self, u, v):
423
- """
424
- Calculate the Normalized Google Distance (NGD) between two vectors.
481
+ return np.sum(np.maximum(u, v)) / np.sum(u + v)
425
482
 
426
- NGD is a measure of similarity derived from the number of hits returned by the Google search engine for a given set of keywords.
483
+ def soergel(self, u, v):
484
+ """Calculate the Soergel distance between two vectors.
427
485
 
428
486
  Parameters
429
487
  ----------
@@ -431,29 +489,24 @@ class Distance:
431
489
 
432
490
  Returns
433
491
  -------
434
- - The Normalized Google Distance between the two vectors.
492
+ - The Soergel distance between the two vectors.
435
493
 
436
494
  Notes
437
495
  -----
438
- When used for comparing two probability density functions (pdfs),
439
- Google distance equals half of Cityblock distance.
496
+ Equals Tanimoto distance.
440
497
 
441
498
  References
442
499
  ----------
443
- 1. Lee & Rashid (2008) Information Technology, ITSim 2008.
444
- doi:10.1109/ITSIM.2008.4631601.
500
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
501
+ Measures between Probability Density Functions. International
502
+ Journal of Mathematical Models and Methods in Applied Sciences.
503
+ 1(4), 300-307.
445
504
  """
446
505
  u, v = np.asarray(u), np.asarray(v)
447
- x = float(np.sum(u))
448
- y = float(np.sum(v))
449
- summin = float(np.sum(np.minimum(u, v)))
450
- return (max([x, y]) - summin) / ((x + y) - min([x, y]))
451
-
452
- def gower(self, u, v):
453
- """
454
- Calculate the Gower distance between two vectors.
506
+ return np.sum(np.abs(u - v)) / np.sum(np.maximum(u, v))
455
507
 
456
- The Gower distance equals the Cityblock distance divided by the vector length.
508
+ def wave_hedges(self, u, v):
509
+ """Calculate the Wave Hedges distance between two vectors.
457
510
 
458
511
  Parameters
459
512
  ----------
@@ -461,36 +514,23 @@ class Distance:
461
514
 
462
515
  Returns
463
516
  -------
464
- - The Gower distance between the two vectors.
517
+ - The Wave Hedges distance between the two vectors.
465
518
 
466
519
  References
467
520
  ----------
468
- 1. Gower JC. (1971) General Coefficient of Similarity
469
- and Some of Its Properties, Biometrics 27, 857-874.
470
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
521
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
471
522
  Measures between Probability Density Functions. International
472
523
  Journal of Mathematical Models and Methods in Applied Sciences.
473
- 1(4), 300-307.
524
+ 1(4), 300-307
474
525
  """
475
526
  u, v = np.asarray(u), np.asarray(v)
476
- return np.sum(np.abs(u - v)) / u.size
477
-
478
- #### NEEDS CHECKING ####
479
- # def harmonicmean(self, u, v):
480
- # """
481
- # Harmonic mean distance.
482
- # Notes:
483
- # Added by SC.
484
- # """
485
- # u,v = np.asarray(u), np.asarray(v)
486
- # return 1 - 2.*np.sum(u*v/(u+v))
487
- #########
488
-
489
- def hellinger(self, u, v):
490
- """
491
- Calculate the Hellinger distance between two vectors.
527
+ with np.errstate(divide="ignore", invalid="ignore"):
528
+ u_v = abs(u - v)
529
+ uvmax = np.maximum(u, v)
530
+ return np.sum(np.where(((u_v != 0) & (uvmax != 0)), u_v / uvmax, 0))
492
531
 
493
- The Hellinger distance is a measure of similarity between two probability distributions.
532
+ def kulczynski(self, u, v):
533
+ """Calculate the Kulczynski distance between two vectors.
494
534
 
495
535
  Parameters
496
536
  ----------
@@ -498,50 +538,24 @@ class Distance:
498
538
 
499
539
  Returns
500
540
  -------
501
- - The Hellinger distance between the two vectors.
502
-
503
- Notes
504
- -----
505
- This implementation produces values two times larger than values
506
- obtained by Hellinger distance described in Wikipedia and also
507
- in https://gist.github.com/larsmans/3116927.
541
+ - The Kulczynski distance between the two vectors.
508
542
 
509
543
  References
510
544
  ----------
511
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
512
- Measures between Probability Density Functions. International
513
- Journal of Mathematical Models and Methods in Applied Sciences.
514
- 1(4), 300-307.
545
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
546
+ Measures between Probability Density Functions. International
547
+ Journal of Mathematical Models and Methods in Applied Sciences.
548
+ 1(4):300-307.
515
549
  """
516
550
  u, v = np.asarray(u), np.asarray(v)
517
- return np.sqrt(2 * np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
518
-
519
- # def inner(self, u, v):
520
- # """
521
- # Calculate the inner product distance between two vectors.
522
-
523
- # The inner product distance is a measure of similarity between two vectors, based on their inner product.
524
-
525
- # Parameters
526
- # ----------
527
- # - u, v: Input vectors between which the distance is to be calculated.
528
-
529
- # Returns
530
- # -------
531
- # - The inner product distance between the two vectors.
532
-
533
- # Notes
534
- # -----
535
- # Added by SC.
536
- # """
537
- # u, v = np.asarray(u), np.asarray(v)
538
- # return 1 - np.dot(u, v)
551
+ return np.sum(np.abs(u - v)) / np.sum(np.minimum(u, v))
539
552
 
540
- def jaccard(self, u, v):
541
- """
542
- Calculate the Jaccard distance between two vectors.
553
+ def add_chisq(self, u, v):
554
+ """Compute the Additive Symmetric Chi-square distance between two vectors.
543
555
 
544
- The Jaccard distance measures dissimilarity between sample sets.
556
+ The Additive Symmetric Chi-square distance is a measure that
557
+ can be used to compare two vectors. This function calculates it based
558
+ on the input vectors u and v.
545
559
 
546
560
  Parameters
547
561
  ----------
@@ -549,900 +563,922 @@ class Distance:
549
563
 
550
564
  Returns
551
565
  -------
552
- - The Jaccard distance between the two vectors.
566
+ - The Additive Symmetric Chi-square distance between the two vectors.
553
567
 
554
568
  References
555
569
  ----------
556
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
557
- Measures between Probability Density Functions. International
558
- Journal of Mathematical Models and Methods in Applied Sciences.
559
- 1(4), 300-307.
570
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
571
+ Measures between Probability Density Functions.
572
+ International Journal of Mathematical Models and Methods in
573
+ Applied Sciences.
574
+ vol. 1(4), pp. 300-307.
560
575
  """
561
576
  u, v = np.asarray(u), np.asarray(v)
562
- uv = np.dot(u, v)
563
- return 1 - (uv / (np.dot(u, u) + np.dot(v, v) - uv))
564
-
565
- def jeffreys(self, u, v):
566
- """
567
- Calculate the Jeffreys divergence between two vectors.
568
-
569
- The Jeffreys divergence is a symmetric version of the Kullback-Leibler divergence.
570
-
571
- Parameters
572
- ----------
573
- - u, v: Input vectors between which the divergence is to be calculated.
577
+ uvmult = u * v
578
+ with np.errstate(divide="ignore", invalid="ignore"):
579
+ return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
574
580
 
575
- Returns
576
- -------
577
- - The Jeffreys divergence between the two vectors.
578
581
 
579
- References
580
- ----------
581
- 1. Jeffreys H (1946) An Invariant Form for the Prior Probability
582
- in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
583
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
584
- Measures between Probability Density Functions. International
585
- Journal of Mathematical Models and Methods in Applied Sciences.
586
- 1(4), 300-307.
587
- """
588
- u, v = np.asarray(u), np.asarray(v)
589
- # Add epsilon to zeros in vectors to avoid division
590
- # by 0 and/or log of 0. Alternatively, zeros in the
591
- # vectors could be ignored or masked (see below).
592
- # u = ma.masked_where(u == 0, u)
593
- # v = ma.masked_where(v == 0, u)
594
- u = np.where(u == 0, self.epsilon, u)
595
- v = np.where(v == 0, self.epsilon, v)
596
- return np.sum((u - v) * np.log(u / v))
597
-
598
- def jensenshannon_divergence(self, u, v):
599
- """
600
- Calculate the Jensen-Shannon divergence between two vectors.
601
-
602
- The Jensen-Shannon divergence is a symmetric and finite measure of similarity between two probability distributions.
603
-
604
- Parameters
605
- ----------
606
- - u, v: Input vectors between which the divergence is to be calculated.
607
-
608
- Returns
609
- -------
610
- - The Jensen-Shannon divergence between the two vectors.
611
-
612
- References
613
- ----------
614
- 1. Lin J. (1991) Divergence measures based on the Shannon entropy.
615
- IEEE Transactions on Information Theory, 37(1):145–151.
616
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
617
- Measures between Probability Density Functions. International
618
- Journal of Mathematical Models and Methods in Applied Sciences.
619
- 1(4), 300-307.
620
- Comments:
621
- Equals Jensen difference in Sung-Hyuk (2007):
622
- u = np.where(u==0, self.epsilon, u)
623
- v = np.where(v==0, self.epsilon, v)
624
- el1 = (u * np.log(u) + v * np.log(v)) / 2
625
- el2 = (u + v)/2
626
- el3 = np.log(el2)
627
- return np.sum(el1 - el2 * el3)
628
- """
629
- u, v = np.asarray(u), np.asarray(v)
630
- u = np.where(u == 0, self.epsilon, u)
631
- v = np.where(v == 0, self.epsilon, v)
632
- dl = u * np.log(2 * u / (u + v))
633
- dr = v * np.log(2 * v / (u + v))
634
- return (np.sum(dl) + np.sum(dr)) / 2
635
-
636
- def jensen_difference(self, u, v):
637
- """
638
- Calculate the Jensen difference between two vectors.
639
-
640
- The Jensen difference is considered similar to the Jensen-Shannon divergence.
641
-
642
- Parameters
643
- ----------
644
- - u, v: Input vectors between which the distance is to be calculated.
645
-
646
- Returns
647
- -------
648
- - The Jensen difference between the two vectors.
649
-
650
- Notes
651
- -----
652
- 1. Equals half of Topsøe distance
653
- 2. Equals squared jensenshannon_distance.
654
-
655
-
656
- References
657
- ----------
658
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
659
- Measures between Probability Density Functions. International
660
- Journal of Mathematical Models and Methods in Applied Sciences.
661
- 1(4), 300-307.
662
- """
663
- u, v = np.asarray(u), np.asarray(v)
664
- u = np.where(u == 0, self.epsilon, u)
665
- v = np.where(v == 0, self.epsilon, v)
666
- el1 = (u * np.log(u) + v * np.log(v)) / 2
667
- el2 = (u + v) / 2
668
- return np.sum(el1 - el2 * np.log(el2))
669
-
670
- def k_divergence(self, u, v):
671
- """
672
- Calculate the K divergence between two vectors.
673
-
674
- Parameters
675
- ----------
676
- - u, v: Input vectors between which the divergence is to be calculated.
677
-
678
- Returns
679
- -------
680
- - The K divergence between the two vectors.
681
-
682
- References
683
- ----------
684
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
685
- Measures between Probability Density Functions. International
686
- Journal of Mathematical Models and Methods in Applied Sciences.
687
- 1(4), 300-307.
688
- """
689
- u, v = np.asarray(u), np.asarray(v)
690
- u = np.where(u == 0, self.epsilon, u)
691
- v = np.where(v == 0, self.epsilon, v)
692
- return np.sum(u * np.log(2 * u / (u + v)))
693
-
694
- def kl_divergence(self, u, v):
695
- """
696
- Calculate the Kullback-Leibler divergence between two vectors.
697
-
698
- The Kullback-Leibler divergence measures the difference between two probability distributions.
699
-
700
- Parameters
701
- ----------
702
- - u, v: Input vectors between which the divergence is to be calculated.
703
-
704
- Returns
705
- -------
706
- - The Kullback-Leibler divergence between the two vectors.
707
-
708
- References
709
- ----------
710
- 1. Kullback S, Leibler RA (1951) On information and sufficiency.
711
- Ann. Math. Statist. 22:79–86
712
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
713
- Measures between Probability Density Functions. International
714
- Journal of Mathematical Models and Methods in Applied Sciences.
715
- 1(4):300-307.
716
- """
717
- u, v = np.asarray(u), np.asarray(v)
718
- u = np.where(u == 0, self.epsilon, u)
719
- v = np.where(v == 0, self.epsilon, v)
720
- return np.sum(u * np.log(u / v))
721
-
722
- def kulczynski(self, u, v):
723
- """
724
- Calculate the Kulczynski distance between two vectors.
725
-
726
- Parameters
727
- ----------
728
- - u, v: Input vectors between which the distance is to be calculated.
729
-
730
- Returns
731
- -------
732
- - The Kulczynski distance between the two vectors.
733
-
734
- References
735
- ----------
736
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
737
- Measures between Probability Density Functions. International
738
- Journal of Mathematical Models and Methods in Applied Sciences.
739
- 1(4):300-307.
740
- """
741
- u, v = np.asarray(u), np.asarray(v)
742
- return np.sum(np.abs(u - v)) / np.sum(np.minimum(u, v))
743
-
744
- def kumarjohnson(self, u, v):
745
- """
746
- Calculate the Kumar-Johnson distance between two vectors.
747
-
748
- Parameters
749
- ----------
750
- - u, v: Input vectors between which the distance is to be calculated.
751
-
752
- Returns
753
- -------
754
- - The Kumar-Johnson distance between the two vectors.
755
-
756
- References
757
- ----------
758
- 1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
759
- and information inequalities, Journal of Inequalities in pure
760
- and applied Mathematics. 6(3).
761
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
762
- Measures between Probability Density Functions. International
763
- Journal of Mathematical Models and Methods in Applied Sciences.
764
- 1(4):300-307.
765
- """
766
- u, v = np.asarray(u), np.asarray(v)
767
- uvmult = u * v
768
- with np.errstate(divide="ignore", invalid="ignore"):
769
- numer = np.power(u**2 - v**2, 2)
770
- denom = 2 * np.power(uvmult, 3 / 2)
771
- return np.sum(np.where(uvmult != 0, numer / denom, 0))
772
-
773
- def lorentzian(self, u, v):
774
- """
775
- Calculate the Lorentzian distance between two vectors.
776
-
777
- Parameters
778
- ----------
779
- - u, v: Input vectors between which the distance is to be calculated.
780
-
781
- Returns
782
- -------
783
- - The Lorentzian distance between the two vectors.
784
-
785
- References
786
- ----------
787
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
788
- Measures between Probability Density Functions. International
789
- Journal of Mathematical Models and Methods in Applied Sciences.
790
- 1(4):300-307.
791
-
792
- Notes
793
- -----
794
- One (1) is added to guarantee the non-negativity property and to
795
- eschew the log of zero.
796
- """
797
- u, v = np.asarray(u), np.asarray(v)
798
- return np.sum(np.log(np.abs(u - v) + 1))
799
-
800
- def cityblock(self, u, v):
801
- """
802
- Calculate the Cityblock (Manhattan) distance between two vectors.
803
-
804
- Parameters
805
- ----------
806
- - u, v: Input vectors between which the distance is to be calculated.
807
-
808
- Returns
809
- -------
810
- - The Cityblock distance between the two vectors.
811
-
812
- References
813
- ----------
814
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
815
- Measures between Probability Density Functions. International
816
- Journal of Mathematical Models and Methods in Applied Sciences.
817
- 1(4):300-307.
818
-
819
- Synonyms:
820
- City block distance
821
- Manhattan distance
822
- Rectilinear distance
823
- Taxicab norm
824
-
825
- Notes
826
- -----
827
- Cityblock distance between two probability density functions
828
- (pdfs) equals:
829
- 1. Non-intersection distance multiplied by 2.
830
- 2. Gower distance multiplied by vector length.
831
- 3. Bray-Curtis distance multiplied by 2.
832
- 4. Google distance multiplied by 2.
833
- """
834
- u, v = np.asarray(u), np.asarray(v)
835
- return np.sum(np.abs(u - v))
836
-
837
- def marylandbridge(self, u, v):
838
- """
839
- Calculate the Maryland Bridge distance between two vectors.
840
-
841
- Parameters
842
- ----------
843
- - u, v: Input vectors between which the distance is to be calculated.
844
-
845
- Returns
846
- -------
847
- - The Maryland Bridge distance between the two vectors.
848
-
849
- References
850
- ----------
851
- 1. Deza M, Deza E (2009) Encyclopedia of Distances.
852
- Springer-Verlag Berlin Heidelberg. 1-590.
853
- """
854
- u, v = np.asarray(u), np.asarray(v)
855
- uvdot = np.dot(u, v)
856
- return 1 - (uvdot / np.dot(u, u) + uvdot / np.dot(v, v)) / 2
857
-
858
- def matusita(self, u, v):
859
- """
860
- Calculate the Matusita distance between two vectors.
861
-
862
- Parameters
863
- ----------
864
- - u, v: Input vectors between which the distance is to be calculated.
865
-
866
- Returns
867
- -------
868
- - The Matusita distance between the two vectors.
869
-
870
- References
871
- ----------
872
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
873
- Measures between Probability Density Functions. International
874
- Journal of Mathematical Models and Methods in Applied Sciences.
875
- 1(4):300-307.
876
-
877
- Notes
878
- -----
879
- Equals square root of Squared-chord distance.
880
- """
881
- u, v = np.asarray(u), np.asarray(v)
882
- return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
883
-
884
- def max_symmetric_chisq(self, u, v):
885
- """
886
- Calculate the maximum symmetric chi-square distance between two vectors.
887
-
888
- Parameters
889
- ----------
890
- - u, v: Input vectors between which the distance is to be calculated.
891
-
892
- Returns
893
- -------
894
- - The maximum symmetric chi-square distance between the two vectors.
895
-
896
- References
897
- ----------
898
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
899
- Measures between Probability Density Functions. International
900
- Journal of Mathematical Models and Methods in Applied Sciences.
901
- 1(4):300-307.
902
- """
903
- u, v = np.asarray(u), np.asarray(v)
904
- return max(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
905
-
906
- def min_symmetric_chisq(self, u, v):
907
- """
908
- Calculate the minimum symmetric chi-square distance between two vectors.
909
-
910
- Parameters
911
- ----------
912
- - u, v: Input vectors between which the distance is to be calculated.
913
-
914
- Returns
915
- -------
916
- - The minimum symmetric chi-square distance between the two vectors.
917
-
918
- Notes
919
- -----
920
- Added by SC.
921
- """
922
- u, v = np.asarray(u), np.asarray(v)
923
- return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
924
-
925
- def meehl(self, u, v):
926
- """
927
- Calculate the Meehl distance between two vectors.
928
-
929
- Parameters
930
- ----------
931
- - u, v: Input vectors between which the distance is to be calculated.
932
-
933
- Returns
934
- -------
935
- - The Meehl distance between the two vectors.
936
-
937
- Notes
938
- -----
939
- Added by SC.
940
-
941
- References
942
- ----------
943
- 1. Deza M. and Deza E. (2013) Encyclopedia of Distances.
944
- Berlin, Heidelberg: Springer Berlin Heidelberg.
945
- https://doi.org/10.1007/978-3-642-30958-8.
946
- """
947
- u, v = np.asarray(u), np.asarray(v)
948
-
949
- xi = u[:-1]
950
- yi = v[:-1]
951
- xiplus1 = np.roll(u, 1)[:-1]
952
- yiplus1 = np.roll(v, 1)[:-1]
953
-
954
- with np.errstate(divide="ignore", invalid="ignore"):
955
- return np.nansum((xi - yi - xiplus1 + yiplus1) ** 2)
956
-
957
- def minkowski(self, u, v, p=2):
958
- """
959
- Calculate the Minkowski distance between two vectors.
960
-
961
- Parameters
962
- ----------
963
- - u, v: Input vectors between which the distance is to be calculated.
964
- - p: The order of the norm of the difference.
965
-
966
- Returns
967
- -------
968
- - The Minkowski distance between the two vectors.
969
-
970
- Notes
971
- -----
972
- When p goes to infinite, the Chebyshev distance is derived.
973
-
974
- References
975
- ----------
976
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
977
- Measures between Probability Density Functions. International
978
- Journal of Mathematical Models and Methods in Applied Sciences.
979
- 1(4):300-307.
980
- """
981
- u, v = np.asarray(u), np.asarray(v)
982
- return np.linalg.norm(u - v, ord=p)
983
-
984
- def motyka(self, u, v):
985
- """
986
- Calculate the Motyka distance between two vectors.
987
-
988
- Parameters
989
- ----------
990
- - u, v: Input vectors between which the distance is to be calculated.
991
-
992
- Returns
993
- -------
994
- - The Motyka distance between the two vectors.
995
-
996
- Notes
997
- -----
998
- The distance between identical vectors is not equal to 0 but 0.5.
999
-
1000
- References
1001
- ----------
1002
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1003
- Measures between Probability Density Functions. International
1004
- Journal of Mathematical Models and Methods in Applied Sciences.
1005
- 1(4), 300-307.
1006
- """
1007
- u, v = np.asarray(u), np.asarray(v)
1008
- return np.sum(np.maximum(u, v)) / np.sum(u + v)
1009
-
1010
- def neyman_chisq(self, u, v):
1011
- """
1012
- Calculate the Neyman chi-square distance between two vectors.
1013
-
1014
- Parameters
1015
- ----------
1016
- - u, v: Input vectors between which the distance is to be calculated.
1017
-
1018
- Returns
1019
- -------
1020
- - The Neyman chi-square distance between the two vectors.
1021
-
1022
- References
1023
- ----------
1024
- 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
1025
- In Proceedings of the First Berkley Symposium on Mathematical
1026
- Statistics and Probability.
1027
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1028
- Measures between Probability Density Functions. International
1029
- Journal of Mathematical Models and Methods in Applied Sciences.
1030
- 1(4), 300-307.
1031
- """
1032
- u, v = np.asarray(u), np.asarray(v)
1033
- with np.errstate(divide="ignore", invalid="ignore"):
1034
- return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
1035
-
1036
- # def nonintersection(self, u, v):
1037
- # """
1038
- # Calculate the Nonintersection distance between two vectors.
1039
-
1040
- # Parameters
1041
- # ----------
1042
- # - u, v: Input vectors between which the distance is to be calculated.
1043
-
1044
- # Returns
1045
- # -------
1046
- # - The Nonintersection distance between the two vectors.
1047
-
1048
- # References
1049
- # ----------
1050
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1051
- # Measures between Probability Density Functions. International
1052
- # Journal of Mathematical Models and Methods in Applied Sciences.
1053
- # 1(4), 300-307.
1054
-
1055
- # Notes
1056
- # -----
1057
- # When used for comparing two probability density functions (pdfs),
1058
- # Nonintersection distance equals half of Cityblock distance.
1059
- # """
1060
- # u, v = np.asarray(u), np.asarray(v)
1061
- # return 1 - np.sum(np.minimum(u, v))
1062
-
1063
- def pearson_chisq(self, u, v):
1064
- """
1065
- Calculate the Pearson chi-square divergence between two vectors.
1066
-
1067
- Parameters
1068
- ----------
1069
- - u, v: Input vectors between which the divergence is to be calculated.
1070
-
1071
- Returns
1072
- -------
1073
- - The Pearson chi-square divergence between the two vectors.
1074
-
1075
- References
1076
- ----------
1077
- 1. Pearson K. (1900) On the Criterion that a given system of
1078
- deviations from the probable in the case of correlated system
1079
- of variables is such that it can be reasonable supposed to have
1080
- arisen from random sampling, Phil. Mag. 50, 157-172.
1081
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1082
- Measures between Probability Density Functions. International
1083
- Journal of Mathematical Models and Methods in Applied Sciences.
1084
- 1(4), 300-307.
1085
-
1086
- Notes
1087
- -----
1088
- Pearson chi-square divergence is asymmetric.
1089
- """
1090
- u, v = np.asarray(u), np.asarray(v)
1091
- with np.errstate(divide="ignore", invalid="ignore"):
1092
- return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
1093
-
1094
- def penroseshape(self, u, v):
1095
- """
1096
- Calculate the Penrose shape distance between two vectors.
1097
-
1098
- Parameters
1099
- ----------
1100
- - u, v: Input vectors between which the distance is to be calculated.
1101
-
1102
- Returns
1103
- -------
1104
- - The Penrose shape distance between the two vectors.
1105
-
1106
- References
1107
- ----------
1108
- 1. Deza M, Deza E (2009) Encyclopedia of Distances.
1109
- Springer-Verlag Berlin Heidelberg. 1-590.
1110
- """
1111
- u, v = np.asarray(u), np.asarray(v)
1112
- umu = np.mean(u)
1113
- vmu = np.mean(v)
1114
- return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
1115
-
1116
- def prob_chisq(self, u, v):
1117
- """
1118
- Calculate the Probabilistic chi-square distance between two vectors.
1119
-
1120
- Parameters
1121
- ----------
1122
- - u, v: Input vectors between which the distance is to be calculated.
1123
-
1124
- Returns
1125
- -------
1126
- - The Probabilistic chi-square distance between the two vectors.
1127
-
1128
- Notes
1129
- -----
1130
- Added by SC.
1131
- """
1132
- u, v = np.asarray(u), np.asarray(v)
1133
- uvsum = u + v
1134
- with np.errstate(divide="ignore", invalid="ignore"):
1135
- return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1136
-
1137
- def ruzicka(self, u, v):
1138
- """
1139
- Calculate the Ruzicka distance between two vectors.
1140
-
1141
- Parameters
1142
- ----------
1143
- - u, v: Input vectors between which the distance is to be calculated.
1144
-
1145
- Returns
1146
- -------
1147
- - The Ruzicka distance between the two vectors.
1148
-
1149
- Notes
1150
- -----
1151
- Added by SC.
1152
- """
1153
- u, v = np.asarray(u), np.asarray(v)
1154
- den = np.sum(np.maximum(u, v))
1155
-
1156
- return 1 - np.sum(np.minimum(u, v)) / den
1157
-
1158
- def sorensen(self, u, v):
1159
- """
1160
- Calculate the Sorensen distance between two vectors.
1161
-
1162
- Parameters
1163
- ----------
1164
- - u, v: Input vectors between which the distance is to be calculated.
1165
-
1166
- Returns
1167
- -------
1168
- - The Sorensen distance between the two vectors.
1169
-
1170
- Notes
1171
- -----
1172
- The Sorensen distance equals the Manhattan distance divided by the sum of the two vectors.
1173
-
1174
- Added by SC.
1175
- """
1176
- u, v = np.asarray(u), np.asarray(v)
1177
- return np.sum(np.abs(u - v)) / np.sum(u + v)
1178
-
1179
- def soergel(self, u, v):
1180
- """
1181
- Calculate the Soergel distance between two vectors.
1182
-
1183
- Parameters
1184
- ----------
1185
- - u, v: Input vectors between which the distance is to be calculated.
1186
-
1187
- Returns
1188
- -------
1189
- - The Soergel distance between the two vectors.
1190
-
1191
- Notes
1192
- -----
1193
- Equals Tanimoto distance.
1194
-
1195
- References
1196
- ----------
1197
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1198
- Measures between Probability Density Functions. International
1199
- Journal of Mathematical Models and Methods in Applied Sciences.
1200
- 1(4), 300-307.
1201
- """
1202
- u, v = np.asarray(u), np.asarray(v)
1203
- return np.sum(np.abs(u - v)) / np.sum(np.maximum(u, v))
1204
-
1205
- def squared_chisq(self, u, v):
1206
- """
1207
- Calculate the Squared chi-square distance between two vectors.
1208
-
1209
- Parameters
1210
- ----------
1211
- - u, v: Input vectors between which the distance is to be calculated.
1212
-
1213
- Returns
1214
- -------
1215
- - The Squared chi-square distance between the two vectors.
1216
-
1217
- References
1218
- ----------
1219
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1220
- Measures between Probability Density Functions. International
1221
- Journal of Mathematical Models and Methods in Applied Sciences.
1222
- 1(4), 300-307.
1223
- """
1224
- u, v = np.asarray(u), np.asarray(v)
1225
- uvsum = u + v
1226
- with np.errstate(divide="ignore", invalid="ignore"):
1227
- return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1228
-
1229
- def squaredchord(self, u, v):
1230
- """
1231
- Calculate the Squared-chord distance between two vectors.
1232
-
1233
- Parameters
1234
- ----------
1235
- - u, v: Input vectors between which the distance is to be calculated.
1236
-
1237
- Returns
1238
- -------
1239
- - The Squared-chord distance between the two vectors.
1240
-
1241
- References
1242
- ----------
1243
- 1. Gavin DG et al. (2003) A statistical approach to evaluating
1244
- distance metrics and analog assignments for pollen records.
1245
- Quaternary Research 60:356–367.
1246
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1247
- Measures between Probability Density Functions. International
1248
- Journal of Mathematical Models and Methods in Applied Sciences.
1249
- 1(4), 300-307.
1250
-
1251
- Notes
1252
- -----
1253
- Equals to squared Matusita distance.
1254
- """
1255
- u, v = np.asarray(u), np.asarray(v)
1256
- return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
1257
-
1258
- def squared_euclidean(self, u, v):
1259
- """
1260
- Calculate the Squared Euclidean distance between two vectors.
1261
-
1262
- Parameters
1263
- ----------
1264
- - u, v: Input vectors between which the distance is to be calculated.
1265
-
1266
- Returns
1267
- -------
1268
- - The Squared Euclidean distance between the two vectors.
1269
-
1270
- References
1271
- ----------
1272
- 1. Gavin DG et al. (2003) A statistical approach to evaluating
1273
- distance metrics and analog assignments for pollen records.
1274
- Quaternary Research 60:356–367.
1275
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1276
- Measures between Probability Density Functions. International
1277
- Journal of Mathematical Models and Methods in Applied Sciences.
1278
- 1(4), 300-307.
1279
-
1280
- Notes
1281
- -----
1282
- Equals to squared Euclidean distance.
1283
- """
1284
- u, v = np.asarray(u), np.asarray(v)
1285
- return np.dot((u - v), (u - v))
1286
-
1287
- def taneja(self, u, v):
1288
- """
1289
- Calculate the Taneja distance between two vectors.
1290
-
1291
- Parameters
1292
- ----------
1293
- - u, v: Input vectors between which the distance is to be calculated.
1294
-
1295
- Returns
1296
- -------
1297
- - The Taneja distance between the two vectors.
1298
-
1299
- References
1300
- ----------
1301
- 1. Taneja IJ. (1995), New Developments in Generalized Information
1302
- Measures, Chapter in: Advances in Imaging and Electron Physics,
1303
- Ed. P.W. Hawkes, 91, 37-135.
1304
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1305
- Measures between Probability Density Functions. International
1306
- Journal of Mathematical Models and Methods in Applied Sciences.
1307
- 1(4), 300-307.
1308
- """
1309
- u, v = np.asarray(u), np.asarray(v)
1310
- u = np.where(u == 0, self.epsilon, u)
1311
- v = np.where(v == 0, self.epsilon, v)
1312
- uvsum = u + v
1313
- return np.sum((uvsum / 2) * np.log(uvsum / (2 * np.sqrt(u * v))))
1314
-
1315
- def tanimoto(self, u, v):
1316
- """
1317
- Calculate the Tanimoto distance between two vectors.
1318
-
1319
- Parameters
1320
- ----------
1321
- - u, v: Input vectors between which the distance is to be calculated.
1322
-
1323
- Returns
1324
- -------
1325
- - The Tanimoto distance between the two vectors.
1326
-
1327
- References
1328
- ----------
1329
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1330
- Measures between Probability Density Functions. International
1331
- Journal of Mathematical Models and Methods in Applied Sciences.
1332
- 1(4), 300-307.
1333
-
1334
- Notes
1335
- -----
1336
- Equals Soergel distance.
1337
- """
1338
- u, v = np.asarray(u), np.asarray(v)
1339
- # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
1340
- usum = np.sum(u)
1341
- vsum = np.sum(v)
1342
- minsum = np.sum(np.minimum(u, v))
1343
- return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
1344
-
1345
- def topsoe(self, u, v):
1346
- """
1347
- Calculate the Topsøe distance between two vectors.
1348
-
1349
- Parameters
1350
- ----------
1351
- - u, v: Input vectors between which the distance is to be calculated.
1352
-
1353
- Returns
1354
- -------
1355
- - The Topsøe distance between the two vectors.
1356
-
1357
- References
1358
- ----------
1359
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1360
- Measures between Probability Density Functions. International
1361
- Journal of Mathematical Models and Methods in Applied Sciences.
1362
- 1(4), 300-307.
1363
-
1364
- Notes
1365
- -----
1366
- Equals two times Jensen-Shannon divergence.
1367
- """
1368
- u, v = np.asarray(u), np.asarray(v)
1369
- u = np.where(u == 0, self.epsilon, u)
1370
- v = np.where(v == 0, self.epsilon, v)
1371
- dl = u * np.log(2 * u / (u + v))
1372
- dr = v * np.log(2 * v / (u + v))
1373
- return np.sum(dl + dr)
1374
-
1375
- def vicis_symmetric_chisq(self, u, v):
1376
- """
1377
- Calculate the Vicis Symmetric chi-square distance between two vectors.
1378
-
1379
- Parameters
1380
- ----------
1381
- - u, v: Input vectors between which the distance is to be calculated.
1382
-
1383
- Returns
1384
- -------
1385
- - The Vicis Symmetric chi-square distance between the two vectors.
1386
-
1387
- References
1388
- ----------
1389
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1390
- Measures between Probability Density Functions. International
1391
- Journal of Mathematical Models and Methods in Applied Sciences.
1392
- 1(4), 300-307
1393
- """
1394
- u, v = np.asarray(u), np.asarray(v)
1395
- with np.errstate(divide="ignore", invalid="ignore"):
1396
- u_v = (u - v) ** 2
1397
- uvmin = np.minimum(u, v) ** 2
1398
- return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1399
-
1400
- def vicis_wave_hedges(self, u, v):
1401
- """
1402
- Calculate the Vicis-Wave Hedges distance between two vectors.
1403
-
1404
- Parameters
1405
- ----------
1406
- - u, v: Input vectors between which the distance is to be calculated.
1407
-
1408
- Returns
1409
- -------
1410
- - The Vicis-Wave Hedges distance between the two vectors.
1411
-
1412
- References
1413
- ----------
1414
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1415
- Measures between Probability Density Functions. International
1416
- Journal of Mathematical Models and Methods in Applied Sciences.
1417
- 1(4), 300-307.
1418
- """
1419
- u, v = np.asarray(u), np.asarray(v)
1420
- with np.errstate(divide="ignore", invalid="ignore"):
1421
- u_v = abs(u - v)
1422
- uvmin = np.minimum(u, v)
1423
- return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1424
-
1425
- def wave_hedges(self, u, v):
1426
- """
1427
- Calculate the Wave Hedges distance between two vectors.
1428
-
1429
- Parameters
1430
- ----------
1431
- - u, v: Input vectors between which the distance is to be calculated.
1432
-
1433
- Returns
1434
- -------
1435
- - The Wave Hedges distance between the two vectors.
1436
-
1437
- References
1438
- ----------
1439
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1440
- Measures between Probability Density Functions. International
1441
- Journal of Mathematical Models and Methods in Applied Sciences.
1442
- 1(4), 300-307
1443
- """
1444
- u, v = np.asarray(u), np.asarray(v)
1445
- with np.errstate(divide="ignore", invalid="ignore"):
1446
- u_v = abs(u - v)
1447
- uvmax = np.maximum(u, v)
1448
- return np.sum(np.where(((u_v != 0) & (uvmax != 0)), u_v / uvmax, 0))
582
+ # NOTE: THE FOLLOWING DISTANCES HAVE NOT BEEN TESTED EXTENSIVELY AND ARE
583
+ # CURRENTLY IN ALPHA AND SO HAVE BEEN COMMENTED
584
+
585
+ # def acc(self, u, v):
586
+ # """Calculate the average of Cityblock and Chebyshev distance.
587
+
588
+ # This function computes the ACC distance, also known as the
589
+ # Average distance, between two vectors u and v. It is the average of the
590
+ # Cityblock (or Manhattan) and Chebyshev distances.
591
+
592
+ # Parameters
593
+ # ----------
594
+ # - u, v: Input vectors between which the distance is to be calculated.
595
+
596
+ # Returns
597
+ # -------
598
+ # - The ACC distance between the two vectors.
599
+
600
+ # References
601
+ # ----------
602
+ # 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
603
+ # Geometry. Dover Publications.
604
+ # 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
605
+ # Measures between Probability Density Functions. International
606
+ # Journal of Mathematical Models and Methods in Applied Sciences.
607
+ # vol. 1(4), pp. 300-307.
608
+ # """
609
+ # return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
610
+
611
+ # # def bhattacharyya(self, u, v):
612
+ # # """
613
+ # # Calculate the Bhattacharyya distance between two vectors.
614
+
615
+ # # Returns a distance value between 0 and 1.
616
+
617
+ # # Parameters
618
+ # # ----------
619
+ # # - u, v: Input vectors between which the distance is to be calculated.
620
+
621
+ # # Returns
622
+ # # -------
623
+ # # - The Bhattacharyya distance between the two vectors.
624
+
625
+ # # References
626
+ # # ----------
627
+ # # 1. Bhattacharyya A (1947) On a measure of divergence between two
628
+ # # statistical populations defined by probability distributions,
629
+ # # Bull. Calcutta Math. Soc., 35, 99–109.
630
+ # # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
631
+ # # Measures between Probability Density Functions. International
632
+ # # Journal of Mathematical Models and Methods in Applied Sciences.
633
+ # # 1(4), 300-307.
634
+ # # 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
635
+ # # """
636
+ # # u, v = np.asarray(u), np.asarray(v)
637
+ # # return -np.log(np.sum(np.sqrt(u * v)))
638
+
639
+ # def chebyshev_min(self, u, v):
640
+ # """Calculate the minimum value distance between two vectors.
641
+
642
+ # This measure represents a custom approach by Zielezinski to distance
643
+ # measurement, focusing on the minimum absolute difference.
644
+
645
+ # Parameters
646
+ # ----------
647
+ # - u, v: Input vectors between which the distance is to be calculated.
648
+
649
+ # Returns
650
+ # -------
651
+ # - The minimum value distance between the two vectors.
652
+ # """
653
+ # u, v = np.asarray(u), np.asarray(v)
654
+ # return np.amin(np.abs(u - v))
655
+
656
+ # def czekanowski(self, u, v):
657
+ # """Calculate the Czekanowski distance between two vectors.
658
+
659
+ # Parameters
660
+ # ----------
661
+ # - u, v: Input vectors between which the distance is to be calculated.
662
+
663
+ # Returns
664
+ # -------
665
+ # - The Czekanowski distance between the two vectors.
666
+
667
+ # References
668
+ # ----------
669
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
670
+ # Measures between Probability Density Functions. International
671
+ # Journal of Mathematical Models and Methods in Applied Sciences.
672
+ # 1(4), 300-307.
673
+ # """
674
+ # u, v = np.asarray(u), np.asarray(v)
675
+ # return np.sum(np.abs(u - v)) / np.sum(u + v)
676
+
677
+ # def dice(self, u, v):
678
+ # """Calculate the Dice dissimilarity between two vectors.
679
+
680
+ # Synonyms:
681
+ # Sorensen distance
682
+
683
+ # Parameters
684
+ # ----------
685
+ # - u, v: Input vectors between which the distance is to be calculated.
686
+
687
+ # Returns
688
+ # -------
689
+ # - The Dice dissimilarity between the two vectors.
690
+
691
+ # References
692
+ # ----------
693
+ # 1. Dice LR (1945) Measures of the amount of ecologic association
694
+ # between species. Ecology. 26, 297-302.
695
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
696
+ # Measures between Probability Density Functions. International
697
+ # Journal of Mathematical Models and Methods in Applied Sciences.
698
+ # 1(4), 300-307.
699
+ # """
700
+ # u, v = np.asarray(u), np.asarray(v)
701
+ # u_v = u - v
702
+ # return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
703
+
704
+ # def divergence(self, u, v):
705
+ # """Calculate the divergence between two vectors.
706
+
707
+ # Divergence equals squared Clark distance multiplied by 2.
708
+
709
+ # Parameters
710
+ # ----------
711
+ # - u, v: Input vectors between which the distance is to be calculated.
712
+
713
+ # Returns
714
+ # -------
715
+ # - The divergence between the two vectors.
716
+
717
+ # References
718
+ # ----------
719
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
720
+ # Measures between Probability Density Functions. International
721
+ # Journal of Mathematical Models and Methods in Applied Sciences.
722
+ # 1(4), 300-307.
723
+ # """
724
+ # u, v = np.asarray(u), np.asarray(v)
725
+ # with np.errstate(invalid="ignore"):
726
+ # return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
727
+
728
+ # # def fidelity(self, u, v):
729
+ # # """
730
+ # # Calculate the fidelity distance between two vectors.
731
+
732
+ # # The fidelity distance measures the similarity between two probability
733
+ # # distributions.
734
+
735
+ # # Parameters
736
+ # # ----------
737
+ # # - u, v: Input vectors between which the distance is to be calculated.
738
+
739
+ # # Returns
740
+ # # -------
741
+ # # - The fidelity distance between the two vectors.
742
+
743
+ # # Notes
744
+ # # -----
745
+ # # Added by SC.
746
+ # # """
747
+ # # u, v = np.asarray(u), np.asarray(v)
748
+ # # return 1 - (np.sum(np.sqrt(u * v)))
749
+
750
+ # def google(self, u, v):
751
+ # """Calculate the Normalized Google Distance (NGD) between two vectors.
752
+
753
+ # NGD is a measure of similarity derived from the number of hits returned by the
754
+ # Google search engine for a given set of keywords.
755
+
756
+ # Parameters
757
+ # ----------
758
+ # - u, v: Input vectors between which the distance is to be calculated.
759
+
760
+ # Returns
761
+ # -------
762
+ # - The Normalized Google Distance between the two vectors.
763
+
764
+ # Notes
765
+ # -----
766
+ # When used for comparing two probability density functions (pdfs),
767
+ # Google distance equals half of Cityblock distance.
768
+
769
+ # References
770
+ # ----------
771
+ # 1. Lee & Rashid (2008) Information Technology, ITSim 2008.
772
+ # doi:10.1109/ITSIM.2008.4631601.
773
+ # """
774
+ # u, v = np.asarray(u), np.asarray(v)
775
+ # x = float(np.sum(u))
776
+ # y = float(np.sum(v))
777
+ # summin = float(np.sum(np.minimum(u, v)))
778
+ # return (max([x, y]) - summin) / ((x + y) - min([x, y]))
779
+
780
+ # def gower(self, u, v):
781
+ # """Calculate the Gower distance between two vectors.
782
+
783
+ # The Gower distance equals the Cityblock distance divided by the vector length.
784
+
785
+ # Parameters
786
+ # ----------
787
+ # - u, v: Input vectors between which the distance is to be calculated.
788
+
789
+ # Returns
790
+ # -------
791
+ # - The Gower distance between the two vectors.
792
+
793
+ # References
794
+ # ----------
795
+ # 1. Gower JC. (1971) General Coefficient of Similarity
796
+ # and Some of Its Properties, Biometrics 27, 857-874.
797
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
798
+ # Measures between Probability Density Functions. International
799
+ # Journal of Mathematical Models and Methods in Applied Sciences.
800
+ # 1(4), 300-307.
801
+ # """
802
+ # u, v = np.asarray(u), np.asarray(v)
803
+ # return np.sum(np.abs(u - v)) / u.size
804
+
805
+ # # NEEDS CHECKING
806
+ # # def harmonicmean(self, u, v):
807
+ # # """
808
+ # # Harmonic mean distance.
809
+ # # Notes:
810
+ # # Added by SC.
811
+ # # """
812
+ # # u,v = np.asarray(u), np.asarray(v)
813
+ # # return 1 - 2.*np.sum(u*v/(u+v))
814
+
815
+ # # def inner(self, u, v):
816
+ # # """
817
+ # # Calculate the inner product distance between two vectors.
818
+
819
+ # # The inner product distance is a measure of similarity between two vectors,
820
+ # # based on their inner product.
821
+
822
+ # # Parameters
823
+ # # ----------
824
+ # # - u, v: Input vectors between which the distance is to be calculated.
825
+
826
+ # # Returns
827
+ # # -------
828
+ # # - The inner product distance between the two vectors.
829
+
830
+ # # Notes
831
+ # # -----
832
+ # # Added by SC.
833
+ # # """
834
+ # # u, v = np.asarray(u), np.asarray(v)
835
+ # # return 1 - np.dot(u, v)
836
+
837
+ # def jeffreys(self, u, v):
838
+ # """Calculate the Jeffreys divergence between two vectors.
839
+
840
+ # The Jeffreys divergence is a symmetric version of the Kullback-Leibler
841
+ # divergence.
842
+
843
+ # Parameters
844
+ # ----------
845
+ # - u, v: Input vectors between which the divergence is to be calculated.
846
+
847
+ # Returns
848
+ # -------
849
+ # - The Jeffreys divergence between the two vectors.
850
+
851
+ # References
852
+ # ----------
853
+ # 1. Jeffreys H (1946) An Invariant Form for the Prior Probability
854
+ # in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
855
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
856
+ # Measures between Probability Density Functions. International
857
+ # Journal of Mathematical Models and Methods in Applied Sciences.
858
+ # 1(4), 300-307.
859
+ # """
860
+ # u, v = np.asarray(u), np.asarray(v)
861
+ # # Add epsilon to zeros in vectors to avoid division
862
+ # # by 0 and/or log of 0. Alternatively, zeros in the
863
+ # # vectors could be ignored or masked (see below).
864
+ # # u = ma.masked_where(u == 0, u)
865
+ # # v = ma.masked_where(v == 0, u)
866
+ # u = np.where(u == 0, self.epsilon, u)
867
+ # v = np.where(v == 0, self.epsilon, v)
868
+ # return np.sum((u - v) * np.log(u / v))
869
+
870
+ # def jensenshannon_divergence(self, u, v):
871
+ # """Calculate the Jensen-Shannon divergence between two vectors.
872
+
873
+ # The Jensen-Shannon divergence is a symmetric and finite measure of similarity
874
+ # between two probability distributions.
875
+
876
+ # Parameters
877
+ # ----------
878
+ # - u, v: Input vectors between which the divergence is to be calculated.
879
+
880
+ # Returns
881
+ # -------
882
+ # - The Jensen-Shannon divergence between the two vectors.
883
+
884
+ # References
885
+ # ----------
886
+ # 1. Lin J. (1991) Divergence measures based on the Shannon entropy.
887
+ # IEEE Transactions on Information Theory, 37(1):145–151.
888
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
889
+ # Measures between Probability Density Functions. International
890
+ # Journal of Mathematical Models and Methods in Applied Sciences.
891
+ # 1(4), 300-307.
892
+ # Comments:
893
+ # Equals Jensen difference in Sung-Hyuk (2007):
894
+ # u = np.where(u==0, self.epsilon, u)
895
+ # v = np.where(v==0, self.epsilon, v)
896
+ # el1 = (u * np.log(u) + v * np.log(v)) / 2
897
+ # el2 = (u + v)/2
898
+ # el3 = np.log(el2)
899
+ # return np.sum(el1 - el2 * el3)
900
+ # """
901
+ # u, v = np.asarray(u), np.asarray(v)
902
+ # u = np.where(u == 0, self.epsilon, u)
903
+ # v = np.where(v == 0, self.epsilon, v)
904
+ # dl = u * np.log(2 * u / (u + v))
905
+ # dr = v * np.log(2 * v / (u + v))
906
+ # return (np.sum(dl) + np.sum(dr)) / 2
907
+
908
+ # def jensen_difference(self, u, v):
909
+ # """Calculate the Jensen difference between two vectors.
910
+
911
+ # The Jensen difference is considered similar to the Jensen-Shannon divergence.
912
+
913
+ # Parameters
914
+ # ----------
915
+ # - u, v: Input vectors between which the distance is to be calculated.
916
+
917
+ # Returns
918
+ # -------
919
+ # - The Jensen difference between the two vectors.
920
+
921
+ # Notes
922
+ # -----
923
+ # 1. Equals half of Topsøe distance
924
+ # 2. Equals squared jensenshannon_distance.
925
+
926
+
927
+ # References
928
+ # ----------
929
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
930
+ # Measures between Probability Density Functions. International
931
+ # Journal of Mathematical Models and Methods in Applied Sciences.
932
+ # 1(4), 300-307.
933
+ # """
934
+ # u, v = np.asarray(u), np.asarray(v)
935
+ # u = np.where(u == 0, self.epsilon, u)
936
+ # v = np.where(v == 0, self.epsilon, v)
937
+ # el1 = (u * np.log(u) + v * np.log(v)) / 2
938
+ # el2 = (u + v) / 2
939
+ # return np.sum(el1 - el2 * np.log(el2))
940
+
941
+ # def k_divergence(self, u, v):
942
+ # """Calculate the K divergence between two vectors.
943
+
944
+ # Parameters
945
+ # ----------
946
+ # - u, v: Input vectors between which the divergence is to be calculated.
947
+
948
+ # Returns
949
+ # -------
950
+ # - The K divergence between the two vectors.
951
+
952
+ # References
953
+ # ----------
954
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
955
+ # Measures between Probability Density Functions. International
956
+ # Journal of Mathematical Models and Methods in Applied Sciences.
957
+ # 1(4), 300-307.
958
+ # """
959
+ # u, v = np.asarray(u), np.asarray(v)
960
+ # u = np.where(u == 0, self.epsilon, u)
961
+ # v = np.where(v == 0, self.epsilon, v)
962
+ # return np.sum(u * np.log(2 * u / (u + v)))
963
+
964
+ # def kl_divergence(self, u, v):
965
+ # """Calculate the Kullback-Leibler divergence between two vectors.
966
+
967
+ # The Kullback-Leibler divergence measures the difference between two
968
+ # probability distributions.
969
+
970
+ # Parameters
971
+ # ----------
972
+ # - u, v: Input vectors between which the divergence is to be calculated.
973
+
974
+ # Returns
975
+ # -------
976
+ # - The Kullback-Leibler divergence between the two vectors.
977
+
978
+ # References
979
+ # ----------
980
+ # 1. Kullback S, Leibler RA (1951) On information and sufficiency.
981
+ # Ann. Math. Statist. 22:79–86
982
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
983
+ # Measures between Probability Density Functions. International
984
+ # Journal of Mathematical Models and Methods in Applied Sciences.
985
+ # 1(4):300-307.
986
+ # """
987
+ # u, v = np.asarray(u), np.asarray(v)
988
+ # u = np.where(u == 0, self.epsilon, u)
989
+ # v = np.where(v == 0, self.epsilon, v)
990
+ # return np.sum(u * np.log(u / v))
991
+
992
+ # def kumarjohnson(self, u, v):
993
+ # """Calculate the Kumar-Johnson distance between two vectors.
994
+
995
+ # Parameters
996
+ # ----------
997
+ # - u, v: Input vectors between which the distance is to be calculated.
998
+
999
+ # Returns
1000
+ # -------
1001
+ # - The Kumar-Johnson distance between the two vectors.
1002
+
1003
+ # References
1004
+ # ----------
1005
+ # 1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
1006
+ # and information inequalities, Journal of Inequalities in pure
1007
+ # and applied Mathematics. 6(3).
1008
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1009
+ # Measures between Probability Density Functions. International
1010
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1011
+ # 1(4):300-307.
1012
+ # """
1013
+ # u, v = np.asarray(u), np.asarray(v)
1014
+ # uvmult = u * v
1015
+ # with np.errstate(divide="ignore", invalid="ignore"):
1016
+ # numer = np.power(u**2 - v**2, 2)
1017
+ # denom = 2 * np.power(uvmult, 3 / 2)
1018
+ # return np.sum(np.where(uvmult != 0, numer / denom, 0))
1019
+
1020
+ # def matusita(self, u, v):
1021
+ # """Calculate the Matusita distance between two vectors.
1022
+
1023
+ # Parameters
1024
+ # ----------
1025
+ # - u, v: Input vectors between which the distance is to be calculated.
1026
+
1027
+ # Returns
1028
+ # -------
1029
+ # - The Matusita distance between the two vectors.
1030
+
1031
+ # References
1032
+ # ----------
1033
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1034
+ # Measures between Probability Density Functions. International
1035
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1036
+ # 1(4):300-307.
1037
+
1038
+ # Notes
1039
+ # -----
1040
+ # Equals square root of Squared-chord distance.
1041
+ # """
1042
+ # u, v = np.asarray(u), np.asarray(v)
1043
+ # return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
1044
+
1045
+ # def max_symmetric_chisq(self, u, v):
1046
+ # """Calculate the maximum symmetric chi-square distance.
1047
+
1048
+ # Parameters
1049
+ # ----------
1050
+ # - u, v: Input vectors between which the distance is to be calculated.
1051
+
1052
+ # Returns
1053
+ # -------
1054
+ # - The maximum symmetric chi-square distance between the two vectors.
1055
+
1056
+ # References
1057
+ # ----------
1058
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1059
+ # Measures between Probability Density Functions. International
1060
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1061
+ # 1(4):300-307.
1062
+ # """
1063
+ # u, v = np.asarray(u), np.asarray(v)
1064
+ # return max(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1065
+
1066
+ # def min_symmetric_chisq(self, u, v):
1067
+ # """Calculate the minimum symmetric chi-square distance.
1068
+
1069
+ # Parameters
1070
+ # ----------
1071
+ # - u, v: Input vectors between which the distance is to be calculated.
1072
+
1073
+ # Returns
1074
+ # -------
1075
+ # - The minimum symmetric chi-square distance between the two vectors.
1076
+
1077
+ # Notes
1078
+ # -----
1079
+ # Added by SC.
1080
+ # """
1081
+ # u, v = np.asarray(u), np.asarray(v)
1082
+ # return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1083
+
1084
+ # def minkowski(self, u, v, p=2):
1085
+ # """Calculate the Minkowski distance between two vectors.
1086
+
1087
+ # Parameters
1088
+ # ----------
1089
+ # - u, v: Input vectors between which the distance is to be calculated.
1090
+ # - p: The order of the norm of the difference.
1091
+
1092
+ # Returns
1093
+ # -------
1094
+ # - The Minkowski distance between the two vectors.
1095
+
1096
+ # Notes
1097
+ # -----
1098
+ # When p goes to infinite, the Chebyshev distance is derived.
1099
+
1100
+ # References
1101
+ # ----------
1102
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1103
+ # Measures between Probability Density Functions. International
1104
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1105
+ # 1(4):300-307.
1106
+ # """
1107
+ # u, v = np.asarray(u), np.asarray(v)
1108
+ # return np.linalg.norm(u - v, ord=p)
1109
+
1110
+ # def neyman_chisq(self, u, v):
1111
+ # """Calculate the Neyman chi-square distance between two vectors.
1112
+
1113
+ # Parameters
1114
+ # ----------
1115
+ # - u, v: Input vectors between which the distance is to be calculated.
1116
+
1117
+ # Returns
1118
+ # -------
1119
+ # - The Neyman chi-square distance between the two vectors.
1120
+
1121
+ # References
1122
+ # ----------
1123
+ # 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
1124
+ # In Proceedings of the First Berkley Symposium on Mathematical
1125
+ # Statistics and Probability.
1126
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1127
+ # Measures between Probability Density Functions. International
1128
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1129
+ # 1(4), 300-307.
1130
+ # """
1131
+ # u, v = np.asarray(u), np.asarray(v)
1132
+ # with np.errstate(divide="ignore", invalid="ignore"):
1133
+ # return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
1134
+
1135
+ # # def nonintersection(self, u, v):
1136
+ # # """
1137
+ # # Calculate the Nonintersection distance between two vectors.
1138
+
1139
+ # # Parameters
1140
+ # # ----------
1141
+ # # - u, v: Input vectors between which the distance is to be calculated.
1142
+
1143
+ # # Returns
1144
+ # # -------
1145
+ # # - The Nonintersection distance between the two vectors.
1146
+
1147
+ # # References
1148
+ # # ----------
1149
+ # # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1150
+ # # Measures between Probability Density Functions. International
1151
+ # # Journal of Mathematical Models and Methods in Applied Sciences.
1152
+ # # 1(4), 300-307.
1153
+
1154
+ # # Notes
1155
+ # # -----
1156
+ # # When used for comparing two probability density functions (pdfs),
1157
+ # # Nonintersection distance equals half of Cityblock distance.
1158
+ # # """
1159
+ # # u, v = np.asarray(u), np.asarray(v)
1160
+ # # return 1 - np.sum(np.minimum(u, v))
1161
+
1162
+ # def pearson_chisq(self, u, v):
1163
+ # """Calculate the Pearson chi-square divergence between two vectors.
1164
+
1165
+ # Parameters
1166
+ # ----------
1167
+ # - u, v: Input vectors between which the divergence is to be calculated.
1168
+
1169
+ # Returns
1170
+ # -------
1171
+ # - The Pearson chi-square divergence between the two vectors.
1172
+
1173
+ # References
1174
+ # ----------
1175
+ # 1. Pearson K. (1900) On the Criterion that a given system of
1176
+ # deviations from the probable in the case of correlated system
1177
+ # of variables is such that it can be reasonable supposed to have
1178
+ # arisen from random sampling, Phil. Mag. 50, 157-172.
1179
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1180
+ # Measures between Probability Density Functions. International
1181
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1182
+ # 1(4), 300-307.
1183
+
1184
+ # Notes
1185
+ # -----
1186
+ # Pearson chi-square divergence is asymmetric.
1187
+ # """
1188
+ # u, v = np.asarray(u), np.asarray(v)
1189
+ # with np.errstate(divide="ignore", invalid="ignore"):
1190
+ # return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
1191
+
1192
+ # def penroseshape(self, u, v):
1193
+ # """Calculate the Penrose shape distance between two vectors.
1194
+
1195
+ # Parameters
1196
+ # ----------
1197
+ # - u, v: Input vectors between which the distance is to be calculated.
1198
+
1199
+ # Returns
1200
+ # -------
1201
+ # - The Penrose shape distance between the two vectors.
1202
+
1203
+ # References
1204
+ # ----------
1205
+ # 1. Deza M, Deza E (2009) Encyclopedia of Distances.
1206
+ # Springer-Verlag Berlin Heidelberg. 1-590.
1207
+ # """
1208
+ # u, v = np.asarray(u), np.asarray(v)
1209
+ # umu = np.mean(u)
1210
+ # vmu = np.mean(v)
1211
+ # return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
1212
+
1213
+ # def prob_chisq(self, u, v):
1214
+ # """Calculate the Probabilistic chi-square distance between two vectors.
1215
+
1216
+ # Parameters
1217
+ # ----------
1218
+ # - u, v: Input vectors between which the distance is to be calculated.
1219
+
1220
+ # Returns
1221
+ # -------
1222
+ # - The Probabilistic chi-square distance between the two vectors.
1223
+
1224
+ # Notes
1225
+ # -----
1226
+ # Added by SC.
1227
+ # """
1228
+ # u, v = np.asarray(u), np.asarray(v)
1229
+ # uvsum = u + v
1230
+ # with np.errstate(divide="ignore", invalid="ignore"):
1231
+ # return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1232
+
1233
+ # def ruzicka(self, u, v):
1234
+ # """Calculate the Ruzicka distance between two vectors.
1235
+
1236
+ # Parameters
1237
+ # ----------
1238
+ # - u, v: Input vectors between which the distance is to be calculated.
1239
+
1240
+ # Returns
1241
+ # -------
1242
+ # - The Ruzicka distance between the two vectors.
1243
+
1244
+ # Notes
1245
+ # -----
1246
+ # Added by SC.
1247
+ # """
1248
+ # u, v = np.asarray(u), np.asarray(v)
1249
+ # den = np.sum(np.maximum(u, v))
1250
+
1251
+ # return 1 - np.sum(np.minimum(u, v)) / den
1252
+
1253
+ # def sorensen(self, u, v):
1254
+ # """Calculate the Sorensen distance between two vectors.
1255
+
1256
+ # Parameters
1257
+ # ----------
1258
+ # - u, v: Input vectors between which the distance is to be calculated.
1259
+
1260
+ # Returns
1261
+ # -------
1262
+ # - The Sorensen distance between the two vectors.
1263
+
1264
+ # Notes
1265
+ # -----
1266
+ # The Sorensen distance equals the Manhattan distance divided by the sum of
1267
+ # the two vectors.
1268
+
1269
+ # Added by SC.
1270
+ # """
1271
+ # u, v = np.asarray(u), np.asarray(v)
1272
+ # return np.sum(np.abs(u - v)) / np.sum(u + v)
1273
+
1274
+ # def squared_chisq(self, u, v):
1275
+ # """Calculate the Squared chi-square distance between two vectors.
1276
+
1277
+ # Parameters
1278
+ # ----------
1279
+ # - u, v: Input vectors between which the distance is to be calculated.
1280
+
1281
+ # Returns
1282
+ # -------
1283
+ # - The Squared chi-square distance between the two vectors.
1284
+
1285
+ # References
1286
+ # ----------
1287
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1288
+ # Measures between Probability Density Functions. International
1289
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1290
+ # 1(4), 300-307.
1291
+ # """
1292
+ # u, v = np.asarray(u), np.asarray(v)
1293
+ # uvsum = u + v
1294
+ # with np.errstate(divide="ignore", invalid="ignore"):
1295
+ # return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1296
+
1297
+ # def squaredchord(self, u, v):
1298
+ # """Calculate the Squared-chord distance between two vectors.
1299
+
1300
+ # Parameters
1301
+ # ----------
1302
+ # - u, v: Input vectors between which the distance is to be calculated.
1303
+
1304
+ # Returns
1305
+ # -------
1306
+ # - The Squared-chord distance between the two vectors.
1307
+
1308
+ # References
1309
+ # ----------
1310
+ # 1. Gavin DG et al. (2003) A statistical approach to evaluating
1311
+ # distance metrics and analog assignments for pollen records.
1312
+ # Quaternary Research 60:356–367.
1313
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1314
+ # Measures between Probability Density Functions. International
1315
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1316
+ # 1(4), 300-307.
1317
+
1318
+ # Notes
1319
+ # -----
1320
+ # Equals to squared Matusita distance.
1321
+ # """
1322
+ # u, v = np.asarray(u), np.asarray(v)
1323
+ # return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
1324
+
1325
+ # def squared_euclidean(self, u, v):
1326
+ # """Calculate the Squared Euclidean distance between two vectors.
1327
+
1328
+ # Parameters
1329
+ # ----------
1330
+ # - u, v: Input vectors between which the distance is to be calculated.
1331
+
1332
+ # Returns
1333
+ # -------
1334
+ # - The Squared Euclidean distance between the two vectors.
1335
+
1336
+ # References
1337
+ # ----------
1338
+ # 1. Gavin DG et al. (2003) A statistical approach to evaluating
1339
+ # distance metrics and analog assignments for pollen records.
1340
+ # Quaternary Research 60:356–367.
1341
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1342
+ # Measures between Probability Density Functions. International
1343
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1344
+ # 1(4), 300-307.
1345
+
1346
+ # Notes
1347
+ # -----
1348
+ # Equals to squared Euclidean distance.
1349
+ # """
1350
+ # u, v = np.asarray(u), np.asarray(v)
1351
+ # return np.dot((u - v), (u - v))
1352
+
1353
+ # def taneja(self, u, v):
1354
+ # """Calculate the Taneja distance between two vectors.
1355
+
1356
+ # Parameters
1357
+ # ----------
1358
+ # - u, v: Input vectors between which the distance is to be calculated.
1359
+
1360
+ # Returns
1361
+ # -------
1362
+ # - The Taneja distance between the two vectors.
1363
+
1364
+ # References
1365
+ # ----------
1366
+ # 1. Taneja IJ. (1995), New Developments in Generalized Information
1367
+ # Measures, Chapter in: Advances in Imaging and Electron Physics,
1368
+ # Ed. P.W. Hawkes, 91, 37-135.
1369
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1370
+ # Measures between Probability Density Functions. International
1371
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1372
+ # 1(4), 300-307.
1373
+ # """
1374
+ # u, v = np.asarray(u), np.asarray(v)
1375
+ # u = np.where(u == 0, self.epsilon, u)
1376
+ # v = np.where(v == 0, self.epsilon, v)
1377
+ # uvsum = u + v
1378
+ # return np.sum((uvsum / 2) * np.log(uvsum / (2 * np.sqrt(u * v))))
1379
+
1380
+ # def tanimoto(self, u, v):
1381
+ # """Calculate the Tanimoto distance between two vectors.
1382
+
1383
+ # Parameters
1384
+ # ----------
1385
+ # - u, v: Input vectors between which the distance is to be calculated.
1386
+
1387
+ # Returns
1388
+ # -------
1389
+ # - The Tanimoto distance between the two vectors.
1390
+
1391
+ # References
1392
+ # ----------
1393
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1394
+ # Measures between Probability Density Functions. International
1395
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1396
+ # 1(4), 300-307.
1397
+
1398
+ # Notes
1399
+ # -----
1400
+ # Equals Soergel distance.
1401
+ # """
1402
+ # u, v = np.asarray(u), np.asarray(v)
1403
+ # # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
1404
+ # usum = np.sum(u)
1405
+ # vsum = np.sum(v)
1406
+ # minsum = np.sum(np.minimum(u, v))
1407
+ # return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
1408
+
1409
+ # def topsoe(self, u, v):
1410
+ # """Calculate the Topsøe distance between two vectors.
1411
+
1412
+ # Parameters
1413
+ # ----------
1414
+ # - u, v: Input vectors between which the distance is to be calculated.
1415
+
1416
+ # Returns
1417
+ # -------
1418
+ # - The Topsøe distance between the two vectors.
1419
+
1420
+ # References
1421
+ # ----------
1422
+ # 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1423
+ # Measures between Probability Density Functions. International
1424
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1425
+ # 1(4), 300-307.
1426
+
1427
+ # Notes
1428
+ # -----
1429
+ # Equals two times Jensen-Shannon divergence.
1430
+ # """
1431
+ # u, v = np.asarray(u), np.asarray(v)
1432
+ # u = np.where(u == 0, self.epsilon, u)
1433
+ # v = np.where(v == 0, self.epsilon, v)
1434
+ # dl = u * np.log(2 * u / (u + v))
1435
+ # dr = v * np.log(2 * v / (u + v))
1436
+ # return np.sum(dl + dr)
1437
+
1438
+ # def vicis_symmetric_chisq(self, u, v):
1439
+ # """Calculate the Vicis Symmetric chi-square distance.
1440
+
1441
+ # Parameters
1442
+ # ----------
1443
+ # - u, v: Input vectors between which the distance is to be calculated.
1444
+
1445
+ # Returns
1446
+ # -------
1447
+ # - The Vicis Symmetric chi-square distance between the two vectors.
1448
+
1449
+ # References
1450
+ # ----------
1451
+ # 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1452
+ # Measures between Probability Density Functions. International
1453
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1454
+ # 1(4), 300-307
1455
+ # """
1456
+ # u, v = np.asarray(u), np.asarray(v)
1457
+ # with np.errstate(divide="ignore", invalid="ignore"):
1458
+ # u_v = (u - v) ** 2
1459
+ # uvmin = np.minimum(u, v) ** 2
1460
+ # return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1461
+
1462
+ # def vicis_wave_hedges(self, u, v):
1463
+ # """Calculate the Vicis-Wave Hedges distance between two vectors.
1464
+
1465
+ # Parameters
1466
+ # ----------
1467
+ # - u, v: Input vectors between which the distance is to be calculated.
1468
+
1469
+ # Returns
1470
+ # -------
1471
+ # - The Vicis-Wave Hedges distance between the two vectors.
1472
+
1473
+ # References
1474
+ # ----------
1475
+ # 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1476
+ # Measures between Probability Density Functions. International
1477
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1478
+ # 1(4), 300-307.
1479
+ # """
1480
+ # u, v = np.asarray(u), np.asarray(v)
1481
+ # with np.errstate(divide="ignore", invalid="ignore"):
1482
+ # u_v = abs(u - v)
1483
+ # uvmin = np.minimum(u, v)
1484
+ # return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))