distclassipy 0.1.4__py3-none-any.whl → 0.1.6a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- distclassipy/__init__.py +22 -10
- distclassipy/classifier.py +162 -58
- distclassipy/distances.py +1177 -1141
- {distclassipy-0.1.4.dist-info → distclassipy-0.1.6a0.dist-info}/METADATA +22 -19
- distclassipy-0.1.6a0.dist-info/RECORD +8 -0
- {distclassipy-0.1.4.dist-info → distclassipy-0.1.6a0.dist-info}/WHEEL +1 -1
- distclassipy-0.1.4.dist-info/RECORD +0 -8
- {distclassipy-0.1.4.dist-info → distclassipy-0.1.6a0.dist-info}/LICENSE +0 -0
- {distclassipy-0.1.4.dist-info → distclassipy-0.1.6a0.dist-info}/top_level.txt +0 -0
distclassipy/distances.py
CHANGED
|
@@ -1,18 +1,19 @@
|
|
|
1
|
-
"""
|
|
2
|
-
A module providing a variety of distance metrics to calculate the distance between two points.
|
|
1
|
+
"""A module providing a variety of distance metrics.
|
|
3
2
|
|
|
4
|
-
This module includes implementations of various distance metrics, including both
|
|
5
|
-
common measures. It allows for the calculation of distances between
|
|
6
|
-
manner using numpy arrays.
|
|
7
|
-
A part of this code is based on the work of Andrzej Zielezinski, originally retrieved
|
|
8
|
-
|
|
3
|
+
This module includes implementations of various distance metrics, including both
|
|
4
|
+
common and less common measures. It allows for the calculation of distances between
|
|
5
|
+
data points in a vectorized manner using numpy arrays.
|
|
6
|
+
A part of this code is based on the work of Andrzej Zielezinski, originally retrieved
|
|
7
|
+
on 20 November 2022 from
|
|
8
|
+
https://github.com/aziele/statistical-distances/blob/04412b3155c59fc7238b3d8ecf6f3723ac5befff/distance.py, # noqa
|
|
9
|
+
which was released via the GNU General Public License v3.0.
|
|
9
10
|
|
|
10
11
|
It was originally modified by Siddharth Chaini on 27 November 2022.
|
|
11
12
|
|
|
12
13
|
Notes
|
|
13
14
|
-----
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
Modifications by Siddharth Chaini include the addition of the following distance
|
|
16
|
+
measures:
|
|
16
17
|
1. Meehl distance
|
|
17
18
|
2. Sorensen distance
|
|
18
19
|
3. Ruzicka distance
|
|
@@ -24,17 +25,64 @@ Notes
|
|
|
24
25
|
|
|
25
26
|
In addition, the following code was added to all functions for array conversion:
|
|
26
27
|
u,v = np.asarray(u), np.asarray(v)
|
|
28
|
+
|
|
29
|
+
Copyright (C) 2024 Siddharth Chaini
|
|
27
30
|
-----
|
|
31
|
+
This program is free software: you can redistribute it and/or modify
|
|
32
|
+
it under the terms of the GNU General Public License as published by
|
|
33
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
34
|
+
(at your option) any later version.
|
|
35
|
+
|
|
36
|
+
This program is distributed in the hope that it will be useful,
|
|
37
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
38
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
39
|
+
GNU General Public License for more details.
|
|
40
|
+
|
|
41
|
+
You should have received a copy of the GNU General Public License
|
|
42
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
28
43
|
"""
|
|
29
44
|
|
|
45
|
+
import warnings
|
|
46
|
+
|
|
30
47
|
import numpy as np
|
|
31
48
|
|
|
49
|
+
import scipy
|
|
50
|
+
|
|
32
51
|
|
|
33
52
|
class Distance:
|
|
53
|
+
"""A class to calculate various distance metrics between vectors.
|
|
54
|
+
|
|
55
|
+
This class provides methods to compute different types of distances between
|
|
56
|
+
two vectors, such as Euclidean, Manhattan, Canberra, and other statistical
|
|
57
|
+
distances. Each method takes two vectors as input and returns the calculated
|
|
58
|
+
distance. The class can handle both numpy arrays and lists, converting them
|
|
59
|
+
internally to numpy arrays for computation.
|
|
60
|
+
|
|
61
|
+
Attributes
|
|
62
|
+
----------
|
|
63
|
+
epsilon : float, optional
|
|
64
|
+
A small value to avoid division by zero errors in certain distance
|
|
65
|
+
calculations. Default is the machine precision for float data type.
|
|
66
|
+
|
|
67
|
+
Methods
|
|
68
|
+
-------
|
|
69
|
+
acc(u, v)
|
|
70
|
+
Returns the average of Cityblock/Manhattan and Chebyshev distances.
|
|
71
|
+
add_chisq(u, v)
|
|
72
|
+
Returns the Additive Symmetric Chi-square distance.
|
|
73
|
+
(Other methods are not listed here for brevity)
|
|
74
|
+
|
|
75
|
+
Examples
|
|
76
|
+
--------
|
|
77
|
+
>>> dist = Distance()
|
|
78
|
+
>>> u = [1, 2, 3]
|
|
79
|
+
>>> v = [4, 5, 6]
|
|
80
|
+
>>> print(dist.acc(u, v))
|
|
81
|
+
5.0
|
|
82
|
+
"""
|
|
34
83
|
|
|
35
84
|
def __init__(self, epsilon=None):
|
|
36
|
-
"""
|
|
37
|
-
Initialize the Distance class with an optional epsilon value.
|
|
85
|
+
"""Initialize the Distance class with an optional epsilon value.
|
|
38
86
|
|
|
39
87
|
Parameters
|
|
40
88
|
----------
|
|
@@ -42,34 +90,11 @@ class Distance:
|
|
|
42
90
|
"""
|
|
43
91
|
self.epsilon = np.finfo(float).eps if not epsilon else epsilon
|
|
44
92
|
|
|
45
|
-
def
|
|
46
|
-
"""
|
|
47
|
-
Calculate the average of Cityblock/Manhattan and Chebyshev distances.
|
|
48
|
-
This function computes the ACC distance, also known as the Average distance, between two
|
|
49
|
-
vectors u and v. It is the average of the Cityblock (or Manhattan) and Chebyshev distances.
|
|
50
|
-
|
|
51
|
-
Parameters
|
|
52
|
-
----------
|
|
53
|
-
- u, v: Input vectors between which the distance is to be calculated.
|
|
54
|
-
|
|
55
|
-
Returns
|
|
56
|
-
-------
|
|
57
|
-
- The ACC distance between the two vectors.
|
|
58
|
-
|
|
59
|
-
References
|
|
60
|
-
----------
|
|
61
|
-
1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean Geometry. Dover Publications.
|
|
62
|
-
2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity Measures between Probability
|
|
63
|
-
Density Functions. International Journal of Mathematical Models and Methods in Applied Sciences.
|
|
64
|
-
vol. 1(4), pp. 300-307.
|
|
65
|
-
"""
|
|
66
|
-
return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
|
|
93
|
+
def euclidean(self, u, v, w=None):
|
|
94
|
+
"""Calculate the Euclidean distance between two vectors.
|
|
67
95
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
Compute the Additive Symmetric Chi-square distance between two vectors.
|
|
71
|
-
The Additive Symmetric Chi-square distance is a measure that can be used to compare two vectors.
|
|
72
|
-
This function calculates it based on the input vectors u and v.
|
|
96
|
+
The Euclidean distance is the "ordinary" straight-line distance between two
|
|
97
|
+
points in Euclidean space.
|
|
73
98
|
|
|
74
99
|
Parameters
|
|
75
100
|
----------
|
|
@@ -77,55 +102,26 @@ class Distance:
|
|
|
77
102
|
|
|
78
103
|
Returns
|
|
79
104
|
-------
|
|
80
|
-
- The
|
|
105
|
+
- The Euclidean distance between the two vectors.
|
|
81
106
|
|
|
82
107
|
References
|
|
83
108
|
----------
|
|
84
|
-
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
85
|
-
Density Functions. International
|
|
86
|
-
|
|
109
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
110
|
+
Measures between Probability Density Functions. International
|
|
111
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
112
|
+
1(4), 300-307.
|
|
87
113
|
"""
|
|
88
114
|
u, v = np.asarray(u), np.asarray(v)
|
|
89
|
-
|
|
90
|
-
with np.errstate(divide="ignore", invalid="ignore"):
|
|
91
|
-
return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
|
|
115
|
+
return scipy.spatial.distance.euclidean(u, v, w)
|
|
92
116
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
# Calculate the Bhattacharyya distance between two vectors.
|
|
96
|
-
|
|
97
|
-
# Returns a distance value between 0 and 1.
|
|
98
|
-
|
|
99
|
-
# Parameters
|
|
100
|
-
# ----------
|
|
101
|
-
# - u, v: Input vectors between which the distance is to be calculated.
|
|
102
|
-
|
|
103
|
-
# Returns
|
|
104
|
-
# -------
|
|
105
|
-
# - The Bhattacharyya distance between the two vectors.
|
|
106
|
-
|
|
107
|
-
# References
|
|
108
|
-
# ----------
|
|
109
|
-
# 1. Bhattacharyya A (1947) On a measure of divergence between two
|
|
110
|
-
# statistical populations defined by probability distributions,
|
|
111
|
-
# Bull. Calcutta Math. Soc., 35, 99–109.
|
|
112
|
-
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
113
|
-
# Measures between Probability Density Functions. International
|
|
114
|
-
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
115
|
-
# 1(4), 300-307.
|
|
116
|
-
# 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
|
|
117
|
-
# """
|
|
118
|
-
# u, v = np.asarray(u), np.asarray(v)
|
|
119
|
-
# return -np.log(np.sum(np.sqrt(u * v)))
|
|
120
|
-
|
|
121
|
-
def braycurtis(self, u, v):
|
|
122
|
-
"""
|
|
123
|
-
Calculate the Bray-Curtis distance between two vectors.
|
|
117
|
+
def braycurtis(self, u, v, w=None):
|
|
118
|
+
"""Calculate the Bray-Curtis distance between two vectors.
|
|
124
119
|
|
|
125
|
-
The Bray-Curtis distance is a measure of dissimilarity between two non-negative
|
|
126
|
-
often used in ecology to measure the compositional dissimilarity
|
|
127
|
-
of species at both sites. It is closely
|
|
128
|
-
Bray-Curtis
|
|
120
|
+
The Bray-Curtis distance is a measure of dissimilarity between two non-negative
|
|
121
|
+
vectors, often used in ecology to measure the compositional dissimilarity
|
|
122
|
+
between two sites based on counts of species at both sites. It is closely
|
|
123
|
+
related to the Sørensen distance and is also known as Bray-Curtis
|
|
124
|
+
dissimilarity.
|
|
129
125
|
|
|
130
126
|
Notes
|
|
131
127
|
-----
|
|
@@ -151,13 +147,13 @@ class Distance:
|
|
|
151
147
|
3. https://en.wikipedia.org/wiki/Bray–Curtis_dissimilarity
|
|
152
148
|
"""
|
|
153
149
|
u, v = np.asarray(u), np.asarray(v)
|
|
154
|
-
return
|
|
150
|
+
return scipy.spatial.distance.braycurtis(u, v, w)
|
|
155
151
|
|
|
156
|
-
def canberra(self, u, v):
|
|
157
|
-
"""
|
|
158
|
-
Calculate the Canberra distance between two vectors.
|
|
152
|
+
def canberra(self, u, v, w=None):
|
|
153
|
+
"""Calculate the Canberra distance between two vectors.
|
|
159
154
|
|
|
160
|
-
The Canberra distance is a weighted version of the Manhattan distance, used
|
|
155
|
+
The Canberra distance is a weighted version of the Manhattan distance, used
|
|
156
|
+
in numerical analysis.
|
|
161
157
|
|
|
162
158
|
Notes
|
|
163
159
|
-----
|
|
@@ -180,14 +176,49 @@ class Distance:
|
|
|
180
176
|
1(4), 300-307.
|
|
181
177
|
"""
|
|
182
178
|
u, v = np.asarray(u), np.asarray(v)
|
|
183
|
-
|
|
184
|
-
|
|
179
|
+
return scipy.spatial.distance.canberra(u, v, w)
|
|
180
|
+
|
|
181
|
+
def cityblock(self, u, v, w=None):
|
|
182
|
+
"""Calculate the Cityblock (Manhattan) distance between two vectors.
|
|
183
|
+
|
|
184
|
+
Parameters
|
|
185
|
+
----------
|
|
186
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
- The Cityblock distance between the two vectors.
|
|
191
|
+
|
|
192
|
+
References
|
|
193
|
+
----------
|
|
194
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
195
|
+
Measures between Probability Density Functions. International
|
|
196
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
197
|
+
1(4):300-307.
|
|
198
|
+
|
|
199
|
+
Synonyms:
|
|
200
|
+
City block distance
|
|
201
|
+
Manhattan distance
|
|
202
|
+
Rectilinear distance
|
|
203
|
+
Taxicab norm
|
|
185
204
|
|
|
186
|
-
|
|
205
|
+
Notes
|
|
206
|
+
-----
|
|
207
|
+
Cityblock distance between two probability density functions
|
|
208
|
+
(pdfs) equals:
|
|
209
|
+
1. Non-intersection distance multiplied by 2.
|
|
210
|
+
2. Gower distance multiplied by vector length.
|
|
211
|
+
3. Bray-Curtis distance multiplied by 2.
|
|
212
|
+
4. Google distance multiplied by 2.
|
|
187
213
|
"""
|
|
188
|
-
|
|
214
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
215
|
+
return scipy.spatial.distance.cityblock(u, v, w)
|
|
189
216
|
|
|
190
|
-
|
|
217
|
+
def chebyshev(self, u, v, w=None):
|
|
218
|
+
"""Calculate the Chebyshev distance between two vectors.
|
|
219
|
+
|
|
220
|
+
The Chebyshev distance is a metric defined on a vector space where the distance
|
|
221
|
+
between two vectors
|
|
191
222
|
is the greatest of their differences along any coordinate dimension.
|
|
192
223
|
|
|
193
224
|
Synonyms:
|
|
@@ -212,13 +243,12 @@ class Distance:
|
|
|
212
243
|
1(4), 300-307.
|
|
213
244
|
"""
|
|
214
245
|
u, v = np.asarray(u), np.asarray(v)
|
|
215
|
-
return
|
|
246
|
+
return scipy.spatial.distance.chebyshev(u, v, w)
|
|
216
247
|
|
|
217
|
-
def
|
|
218
|
-
"""
|
|
219
|
-
Calculate the minimum value distance between two vectors.
|
|
248
|
+
def correlation(self, u, v, w=None, centered=True):
|
|
249
|
+
"""Calculate the Pearson correlation distance between two vectors.
|
|
220
250
|
|
|
221
|
-
|
|
251
|
+
Returns a distance value between 0 and 2.
|
|
222
252
|
|
|
223
253
|
Parameters
|
|
224
254
|
----------
|
|
@@ -226,14 +256,47 @@ class Distance:
|
|
|
226
256
|
|
|
227
257
|
Returns
|
|
228
258
|
-------
|
|
229
|
-
- The
|
|
259
|
+
- The Pearson correlation distance between the two vectors.
|
|
230
260
|
"""
|
|
231
261
|
u, v = np.asarray(u), np.asarray(v)
|
|
232
|
-
|
|
262
|
+
if len(u) < 2 or len(v) < 2:
|
|
263
|
+
warnings.warn(
|
|
264
|
+
"Pearson correlation requires vectors of length at least 2.",
|
|
265
|
+
RuntimeWarning,
|
|
266
|
+
)
|
|
267
|
+
d = 0
|
|
268
|
+
else:
|
|
269
|
+
d = scipy.spatial.distance.correlation(u, v, w, centered)
|
|
270
|
+
if np.isnan(d) and (
|
|
271
|
+
np.allclose(u - np.mean(u), 0) or np.allclose(v - np.mean(v), 0)
|
|
272
|
+
):
|
|
273
|
+
warnings.warn(
|
|
274
|
+
"One of the vectors is constant; correlation is set to 0",
|
|
275
|
+
RuntimeWarning,
|
|
276
|
+
)
|
|
277
|
+
d = 0
|
|
278
|
+
return d
|
|
279
|
+
|
|
280
|
+
def cosine(self, u, v, w=None):
|
|
281
|
+
"""Calculate the cosine distance between two vectors.
|
|
233
282
|
|
|
234
|
-
|
|
283
|
+
Parameters
|
|
284
|
+
----------
|
|
285
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
286
|
+
|
|
287
|
+
Returns
|
|
288
|
+
-------
|
|
289
|
+
- The cosine distance between the two vectors.
|
|
290
|
+
|
|
291
|
+
References
|
|
292
|
+
----------
|
|
293
|
+
1. SciPy.
|
|
235
294
|
"""
|
|
236
|
-
|
|
295
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
296
|
+
return scipy.spatial.distance.cosine(u, v, w)
|
|
297
|
+
|
|
298
|
+
def clark(self, u, v):
|
|
299
|
+
"""Calculate the Clark distance between two vectors.
|
|
237
300
|
|
|
238
301
|
The Clark distance equals the square root of half of the divergence.
|
|
239
302
|
|
|
@@ -261,9 +324,11 @@ class Distance:
|
|
|
261
324
|
with np.errstate(divide="ignore", invalid="ignore"):
|
|
262
325
|
return np.sqrt(np.nansum(np.power(np.abs(u - v) / (u + v), 2)))
|
|
263
326
|
|
|
264
|
-
def
|
|
265
|
-
"""
|
|
266
|
-
|
|
327
|
+
def hellinger(self, u, v):
|
|
328
|
+
"""Calculate the Hellinger distance between two vectors.
|
|
329
|
+
|
|
330
|
+
The Hellinger distance is a measure of similarity between two probability
|
|
331
|
+
distributions.
|
|
267
332
|
|
|
268
333
|
Parameters
|
|
269
334
|
----------
|
|
@@ -271,20 +336,28 @@ class Distance:
|
|
|
271
336
|
|
|
272
337
|
Returns
|
|
273
338
|
-------
|
|
274
|
-
- The
|
|
339
|
+
- The Hellinger distance between the two vectors.
|
|
340
|
+
|
|
341
|
+
Notes
|
|
342
|
+
-----
|
|
343
|
+
This implementation produces values two times larger than values
|
|
344
|
+
obtained by Hellinger distance described in Wikipedia and also
|
|
345
|
+
in https://gist.github.com/larsmans/3116927.
|
|
275
346
|
|
|
276
347
|
References
|
|
277
348
|
----------
|
|
278
|
-
|
|
349
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
350
|
+
Measures between Probability Density Functions. International
|
|
351
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
352
|
+
1(4), 300-307.
|
|
279
353
|
"""
|
|
280
354
|
u, v = np.asarray(u), np.asarray(v)
|
|
281
|
-
return
|
|
355
|
+
return np.sqrt(2 * np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
|
|
282
356
|
|
|
283
|
-
def
|
|
284
|
-
"""
|
|
285
|
-
Calculate the Pearson correlation distance between two vectors.
|
|
357
|
+
def jaccard(self, u, v):
|
|
358
|
+
"""Calculate the Jaccard distance between two vectors.
|
|
286
359
|
|
|
287
|
-
|
|
360
|
+
The Jaccard distance measures dissimilarity between sample sets.
|
|
288
361
|
|
|
289
362
|
Parameters
|
|
290
363
|
----------
|
|
@@ -292,16 +365,21 @@ class Distance:
|
|
|
292
365
|
|
|
293
366
|
Returns
|
|
294
367
|
-------
|
|
295
|
-
- The
|
|
296
|
-
"""
|
|
368
|
+
- The Jaccard distance between the two vectors.
|
|
297
369
|
|
|
370
|
+
References
|
|
371
|
+
----------
|
|
372
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
373
|
+
Measures between Probability Density Functions. International
|
|
374
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
375
|
+
1(4), 300-307.
|
|
376
|
+
"""
|
|
298
377
|
u, v = np.asarray(u), np.asarray(v)
|
|
299
|
-
|
|
300
|
-
return 1.
|
|
378
|
+
uv = np.dot(u, v)
|
|
379
|
+
return 1 - (uv / (np.dot(u, u) + np.dot(v, v) - uv))
|
|
301
380
|
|
|
302
|
-
def
|
|
303
|
-
"""
|
|
304
|
-
Calculate the Czekanowski distance between two vectors.
|
|
381
|
+
def lorentzian(self, u, v):
|
|
382
|
+
"""Calculate the Lorentzian distance between two vectors.
|
|
305
383
|
|
|
306
384
|
Parameters
|
|
307
385
|
----------
|
|
@@ -309,24 +387,25 @@ class Distance:
|
|
|
309
387
|
|
|
310
388
|
Returns
|
|
311
389
|
-------
|
|
312
|
-
- The
|
|
390
|
+
- The Lorentzian distance between the two vectors.
|
|
313
391
|
|
|
314
392
|
References
|
|
315
393
|
----------
|
|
316
394
|
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
317
395
|
Measures between Probability Density Functions. International
|
|
318
396
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
319
|
-
1(4)
|
|
320
|
-
"""
|
|
321
|
-
u, v = np.asarray(u), np.asarray(v)
|
|
322
|
-
return np.sum(np.abs(u - v)) / np.sum(u + v)
|
|
397
|
+
1(4):300-307.
|
|
323
398
|
|
|
324
|
-
|
|
399
|
+
Notes
|
|
400
|
+
-----
|
|
401
|
+
One (1) is added to guarantee the non-negativity property and to
|
|
402
|
+
eschew the log of zero.
|
|
325
403
|
"""
|
|
326
|
-
|
|
404
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
405
|
+
return np.sum(np.log(np.abs(u - v) + 1))
|
|
327
406
|
|
|
328
|
-
|
|
329
|
-
|
|
407
|
+
def marylandbridge(self, u, v):
|
|
408
|
+
"""Calculate the Maryland Bridge distance between two vectors.
|
|
330
409
|
|
|
331
410
|
Parameters
|
|
332
411
|
----------
|
|
@@ -334,26 +413,19 @@ class Distance:
|
|
|
334
413
|
|
|
335
414
|
Returns
|
|
336
415
|
-------
|
|
337
|
-
- The
|
|
416
|
+
- The Maryland Bridge distance between the two vectors.
|
|
338
417
|
|
|
339
418
|
References
|
|
340
419
|
----------
|
|
341
|
-
1.
|
|
342
|
-
|
|
343
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
344
|
-
Measures between Probability Density Functions. International
|
|
345
|
-
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
346
|
-
1(4), 300-307.
|
|
420
|
+
1. Deza M, Deza E (2009) Encyclopedia of Distances.
|
|
421
|
+
Springer-Verlag Berlin Heidelberg. 1-590.
|
|
347
422
|
"""
|
|
348
423
|
u, v = np.asarray(u), np.asarray(v)
|
|
349
|
-
|
|
350
|
-
return
|
|
351
|
-
|
|
352
|
-
def divergence(self, u, v):
|
|
353
|
-
"""
|
|
354
|
-
Calculate the divergence between two vectors.
|
|
424
|
+
uvdot = np.dot(u, v)
|
|
425
|
+
return 1 - (uvdot / np.dot(u, u) + uvdot / np.dot(v, v)) / 2
|
|
355
426
|
|
|
356
|
-
|
|
427
|
+
def meehl(self, u, v):
|
|
428
|
+
"""Calculate the Meehl distance between two vectors.
|
|
357
429
|
|
|
358
430
|
Parameters
|
|
359
431
|
----------
|
|
@@ -361,24 +433,30 @@ class Distance:
|
|
|
361
433
|
|
|
362
434
|
Returns
|
|
363
435
|
-------
|
|
364
|
-
- The
|
|
436
|
+
- The Meehl distance between the two vectors.
|
|
437
|
+
|
|
438
|
+
Notes
|
|
439
|
+
-----
|
|
440
|
+
Added by SC.
|
|
365
441
|
|
|
366
442
|
References
|
|
367
443
|
----------
|
|
368
|
-
1.
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
1(4), 300-307.
|
|
444
|
+
1. Deza M. and Deza E. (2013) Encyclopedia of Distances.
|
|
445
|
+
Berlin, Heidelberg: Springer Berlin Heidelberg.
|
|
446
|
+
https://doi.org/10.1007/978-3-642-30958-8.
|
|
372
447
|
"""
|
|
373
448
|
u, v = np.asarray(u), np.asarray(v)
|
|
374
|
-
with np.errstate(invalid="ignore"):
|
|
375
|
-
return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
|
|
376
449
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
450
|
+
xi = u[:-1]
|
|
451
|
+
yi = v[:-1]
|
|
452
|
+
xiplus1 = np.roll(u, 1)[:-1]
|
|
453
|
+
yiplus1 = np.roll(v, 1)[:-1]
|
|
380
454
|
|
|
381
|
-
|
|
455
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
456
|
+
return np.nansum((xi - yi - xiplus1 + yiplus1) ** 2)
|
|
457
|
+
|
|
458
|
+
def motyka(self, u, v):
|
|
459
|
+
"""Calculate the Motyka distance between two vectors.
|
|
382
460
|
|
|
383
461
|
Parameters
|
|
384
462
|
----------
|
|
@@ -386,7 +464,11 @@ class Distance:
|
|
|
386
464
|
|
|
387
465
|
Returns
|
|
388
466
|
-------
|
|
389
|
-
- The
|
|
467
|
+
- The Motyka distance between the two vectors.
|
|
468
|
+
|
|
469
|
+
Notes
|
|
470
|
+
-----
|
|
471
|
+
The distance between identical vectors is not equal to 0 but 0.5.
|
|
390
472
|
|
|
391
473
|
References
|
|
392
474
|
----------
|
|
@@ -396,34 +478,10 @@ class Distance:
|
|
|
396
478
|
1(4), 300-307.
|
|
397
479
|
"""
|
|
398
480
|
u, v = np.asarray(u), np.asarray(v)
|
|
399
|
-
return np.
|
|
400
|
-
|
|
401
|
-
# def fidelity(self, u, v):
|
|
402
|
-
# """
|
|
403
|
-
# Calculate the fidelity distance between two vectors.
|
|
404
|
-
|
|
405
|
-
# The fidelity distance measures the similarity between two probability distributions.
|
|
406
|
-
|
|
407
|
-
# Parameters
|
|
408
|
-
# ----------
|
|
409
|
-
# - u, v: Input vectors between which the distance is to be calculated.
|
|
410
|
-
|
|
411
|
-
# Returns
|
|
412
|
-
# -------
|
|
413
|
-
# - The fidelity distance between the two vectors.
|
|
414
|
-
|
|
415
|
-
# Notes
|
|
416
|
-
# -----
|
|
417
|
-
# Added by SC.
|
|
418
|
-
# """
|
|
419
|
-
# u, v = np.asarray(u), np.asarray(v)
|
|
420
|
-
# return 1 - (np.sum(np.sqrt(u * v)))
|
|
421
|
-
|
|
422
|
-
def google(self, u, v):
|
|
423
|
-
"""
|
|
424
|
-
Calculate the Normalized Google Distance (NGD) between two vectors.
|
|
481
|
+
return np.sum(np.maximum(u, v)) / np.sum(u + v)
|
|
425
482
|
|
|
426
|
-
|
|
483
|
+
def soergel(self, u, v):
|
|
484
|
+
"""Calculate the Soergel distance between two vectors.
|
|
427
485
|
|
|
428
486
|
Parameters
|
|
429
487
|
----------
|
|
@@ -431,29 +489,24 @@ class Distance:
|
|
|
431
489
|
|
|
432
490
|
Returns
|
|
433
491
|
-------
|
|
434
|
-
- The
|
|
492
|
+
- The Soergel distance between the two vectors.
|
|
435
493
|
|
|
436
494
|
Notes
|
|
437
495
|
-----
|
|
438
|
-
|
|
439
|
-
Google distance equals half of Cityblock distance.
|
|
496
|
+
Equals Tanimoto distance.
|
|
440
497
|
|
|
441
498
|
References
|
|
442
499
|
----------
|
|
443
|
-
1.
|
|
444
|
-
|
|
500
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
501
|
+
Measures between Probability Density Functions. International
|
|
502
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
503
|
+
1(4), 300-307.
|
|
445
504
|
"""
|
|
446
505
|
u, v = np.asarray(u), np.asarray(v)
|
|
447
|
-
|
|
448
|
-
y = float(np.sum(v))
|
|
449
|
-
summin = float(np.sum(np.minimum(u, v)))
|
|
450
|
-
return (max([x, y]) - summin) / ((x + y) - min([x, y]))
|
|
451
|
-
|
|
452
|
-
def gower(self, u, v):
|
|
453
|
-
"""
|
|
454
|
-
Calculate the Gower distance between two vectors.
|
|
506
|
+
return np.sum(np.abs(u - v)) / np.sum(np.maximum(u, v))
|
|
455
507
|
|
|
456
|
-
|
|
508
|
+
def wave_hedges(self, u, v):
|
|
509
|
+
"""Calculate the Wave Hedges distance between two vectors.
|
|
457
510
|
|
|
458
511
|
Parameters
|
|
459
512
|
----------
|
|
@@ -461,36 +514,23 @@ class Distance:
|
|
|
461
514
|
|
|
462
515
|
Returns
|
|
463
516
|
-------
|
|
464
|
-
- The
|
|
517
|
+
- The Wave Hedges distance between the two vectors.
|
|
465
518
|
|
|
466
519
|
References
|
|
467
520
|
----------
|
|
468
|
-
1.
|
|
469
|
-
and Some of Its Properties, Biometrics 27, 857-874.
|
|
470
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
521
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
471
522
|
Measures between Probability Density Functions. International
|
|
472
523
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
473
|
-
1(4), 300-307
|
|
524
|
+
1(4), 300-307
|
|
474
525
|
"""
|
|
475
526
|
u, v = np.asarray(u), np.asarray(v)
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
# """
|
|
481
|
-
# Harmonic mean distance.
|
|
482
|
-
# Notes:
|
|
483
|
-
# Added by SC.
|
|
484
|
-
# """
|
|
485
|
-
# u,v = np.asarray(u), np.asarray(v)
|
|
486
|
-
# return 1 - 2.*np.sum(u*v/(u+v))
|
|
487
|
-
#########
|
|
488
|
-
|
|
489
|
-
def hellinger(self, u, v):
|
|
490
|
-
"""
|
|
491
|
-
Calculate the Hellinger distance between two vectors.
|
|
527
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
528
|
+
u_v = abs(u - v)
|
|
529
|
+
uvmax = np.maximum(u, v)
|
|
530
|
+
return np.sum(np.where(((u_v != 0) & (uvmax != 0)), u_v / uvmax, 0))
|
|
492
531
|
|
|
493
|
-
|
|
532
|
+
def kulczynski(self, u, v):
|
|
533
|
+
"""Calculate the Kulczynski distance between two vectors.
|
|
494
534
|
|
|
495
535
|
Parameters
|
|
496
536
|
----------
|
|
@@ -498,50 +538,24 @@ class Distance:
|
|
|
498
538
|
|
|
499
539
|
Returns
|
|
500
540
|
-------
|
|
501
|
-
- The
|
|
502
|
-
|
|
503
|
-
Notes
|
|
504
|
-
-----
|
|
505
|
-
This implementation produces values two times larger than values
|
|
506
|
-
obtained by Hellinger distance described in Wikipedia and also
|
|
507
|
-
in https://gist.github.com/larsmans/3116927.
|
|
541
|
+
- The Kulczynski distance between the two vectors.
|
|
508
542
|
|
|
509
543
|
References
|
|
510
544
|
----------
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
545
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
546
|
+
Measures between Probability Density Functions. International
|
|
547
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
548
|
+
1(4):300-307.
|
|
515
549
|
"""
|
|
516
550
|
u, v = np.asarray(u), np.asarray(v)
|
|
517
|
-
return np.
|
|
518
|
-
|
|
519
|
-
# def inner(self, u, v):
|
|
520
|
-
# """
|
|
521
|
-
# Calculate the inner product distance between two vectors.
|
|
522
|
-
|
|
523
|
-
# The inner product distance is a measure of similarity between two vectors, based on their inner product.
|
|
524
|
-
|
|
525
|
-
# Parameters
|
|
526
|
-
# ----------
|
|
527
|
-
# - u, v: Input vectors between which the distance is to be calculated.
|
|
528
|
-
|
|
529
|
-
# Returns
|
|
530
|
-
# -------
|
|
531
|
-
# - The inner product distance between the two vectors.
|
|
532
|
-
|
|
533
|
-
# Notes
|
|
534
|
-
# -----
|
|
535
|
-
# Added by SC.
|
|
536
|
-
# """
|
|
537
|
-
# u, v = np.asarray(u), np.asarray(v)
|
|
538
|
-
# return 1 - np.dot(u, v)
|
|
551
|
+
return np.sum(np.abs(u - v)) / np.sum(np.minimum(u, v))
|
|
539
552
|
|
|
540
|
-
def
|
|
541
|
-
"""
|
|
542
|
-
Calculate the Jaccard distance between two vectors.
|
|
553
|
+
def add_chisq(self, u, v):
|
|
554
|
+
"""Compute the Additive Symmetric Chi-square distance between two vectors.
|
|
543
555
|
|
|
544
|
-
The
|
|
556
|
+
The Additive Symmetric Chi-square distance is a measure that
|
|
557
|
+
can be used to compare two vectors. This function calculates it based
|
|
558
|
+
on the input vectors u and v.
|
|
545
559
|
|
|
546
560
|
Parameters
|
|
547
561
|
----------
|
|
@@ -549,900 +563,922 @@ class Distance:
|
|
|
549
563
|
|
|
550
564
|
Returns
|
|
551
565
|
-------
|
|
552
|
-
- The
|
|
566
|
+
- The Additive Symmetric Chi-square distance between the two vectors.
|
|
553
567
|
|
|
554
568
|
References
|
|
555
569
|
----------
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
570
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
571
|
+
Measures between Probability Density Functions.
|
|
572
|
+
International Journal of Mathematical Models and Methods in
|
|
573
|
+
Applied Sciences.
|
|
574
|
+
vol. 1(4), pp. 300-307.
|
|
560
575
|
"""
|
|
561
576
|
u, v = np.asarray(u), np.asarray(v)
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
def jeffreys(self, u, v):
|
|
566
|
-
"""
|
|
567
|
-
Calculate the Jeffreys divergence between two vectors.
|
|
568
|
-
|
|
569
|
-
The Jeffreys divergence is a symmetric version of the Kullback-Leibler divergence.
|
|
570
|
-
|
|
571
|
-
Parameters
|
|
572
|
-
----------
|
|
573
|
-
- u, v: Input vectors between which the divergence is to be calculated.
|
|
577
|
+
uvmult = u * v
|
|
578
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
579
|
+
return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
|
|
574
580
|
|
|
575
|
-
Returns
|
|
576
|
-
-------
|
|
577
|
-
- The Jeffreys divergence between the two vectors.
|
|
578
581
|
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
582
|
+
# NOTE: THE FOLLOWING DISTANCES HAVE NOT BEEN TESTED EXTENSIVELY AND ARE
|
|
583
|
+
# CURRENTLY IN ALPHA AND SO HAVE BEEN COMMENTED
|
|
584
|
+
|
|
585
|
+
# def acc(self, u, v):
|
|
586
|
+
# """Calculate the average of Cityblock and Chebyshev distance.
|
|
587
|
+
|
|
588
|
+
# This function computes the ACC distance, also known as the
|
|
589
|
+
# Average distance, between two vectors u and v. It is the average of the
|
|
590
|
+
# Cityblock (or Manhattan) and Chebyshev distances.
|
|
591
|
+
|
|
592
|
+
# Parameters
|
|
593
|
+
# ----------
|
|
594
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
595
|
+
|
|
596
|
+
# Returns
|
|
597
|
+
# -------
|
|
598
|
+
# - The ACC distance between the two vectors.
|
|
599
|
+
|
|
600
|
+
# References
|
|
601
|
+
# ----------
|
|
602
|
+
# 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
|
|
603
|
+
# Geometry. Dover Publications.
|
|
604
|
+
# 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
605
|
+
# Measures between Probability Density Functions. International
|
|
606
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
607
|
+
# vol. 1(4), pp. 300-307.
|
|
608
|
+
# """
|
|
609
|
+
# return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
|
|
610
|
+
|
|
611
|
+
# # def bhattacharyya(self, u, v):
|
|
612
|
+
# # """
|
|
613
|
+
# # Calculate the Bhattacharyya distance between two vectors.
|
|
614
|
+
|
|
615
|
+
# # Returns a distance value between 0 and 1.
|
|
616
|
+
|
|
617
|
+
# # Parameters
|
|
618
|
+
# # ----------
|
|
619
|
+
# # - u, v: Input vectors between which the distance is to be calculated.
|
|
620
|
+
|
|
621
|
+
# # Returns
|
|
622
|
+
# # -------
|
|
623
|
+
# # - The Bhattacharyya distance between the two vectors.
|
|
624
|
+
|
|
625
|
+
# # References
|
|
626
|
+
# # ----------
|
|
627
|
+
# # 1. Bhattacharyya A (1947) On a measure of divergence between two
|
|
628
|
+
# # statistical populations defined by probability distributions,
|
|
629
|
+
# # Bull. Calcutta Math. Soc., 35, 99–109.
|
|
630
|
+
# # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
631
|
+
# # Measures between Probability Density Functions. International
|
|
632
|
+
# # Journal of Mathematical Models and Methods in Applied Sciences.
|
|
633
|
+
# # 1(4), 300-307.
|
|
634
|
+
# # 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
|
|
635
|
+
# # """
|
|
636
|
+
# # u, v = np.asarray(u), np.asarray(v)
|
|
637
|
+
# # return -np.log(np.sum(np.sqrt(u * v)))
|
|
638
|
+
|
|
639
|
+
# def chebyshev_min(self, u, v):
|
|
640
|
+
# """Calculate the minimum value distance between two vectors.
|
|
641
|
+
|
|
642
|
+
# This measure represents a custom approach by Zielezinski to distance
|
|
643
|
+
# measurement, focusing on the minimum absolute difference.
|
|
644
|
+
|
|
645
|
+
# Parameters
|
|
646
|
+
# ----------
|
|
647
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
648
|
+
|
|
649
|
+
# Returns
|
|
650
|
+
# -------
|
|
651
|
+
# - The minimum value distance between the two vectors.
|
|
652
|
+
# """
|
|
653
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
654
|
+
# return np.amin(np.abs(u - v))
|
|
655
|
+
|
|
656
|
+
# def czekanowski(self, u, v):
|
|
657
|
+
# """Calculate the Czekanowski distance between two vectors.
|
|
658
|
+
|
|
659
|
+
# Parameters
|
|
660
|
+
# ----------
|
|
661
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
662
|
+
|
|
663
|
+
# Returns
|
|
664
|
+
# -------
|
|
665
|
+
# - The Czekanowski distance between the two vectors.
|
|
666
|
+
|
|
667
|
+
# References
|
|
668
|
+
# ----------
|
|
669
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
670
|
+
# Measures between Probability Density Functions. International
|
|
671
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
672
|
+
# 1(4), 300-307.
|
|
673
|
+
# """
|
|
674
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
675
|
+
# return np.sum(np.abs(u - v)) / np.sum(u + v)
|
|
676
|
+
|
|
677
|
+
# def dice(self, u, v):
|
|
678
|
+
# """Calculate the Dice dissimilarity between two vectors.
|
|
679
|
+
|
|
680
|
+
# Synonyms:
|
|
681
|
+
# Sorensen distance
|
|
682
|
+
|
|
683
|
+
# Parameters
|
|
684
|
+
# ----------
|
|
685
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
686
|
+
|
|
687
|
+
# Returns
|
|
688
|
+
# -------
|
|
689
|
+
# - The Dice dissimilarity between the two vectors.
|
|
690
|
+
|
|
691
|
+
# References
|
|
692
|
+
# ----------
|
|
693
|
+
# 1. Dice LR (1945) Measures of the amount of ecologic association
|
|
694
|
+
# between species. Ecology. 26, 297-302.
|
|
695
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
696
|
+
# Measures between Probability Density Functions. International
|
|
697
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
698
|
+
# 1(4), 300-307.
|
|
699
|
+
# """
|
|
700
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
701
|
+
# u_v = u - v
|
|
702
|
+
# return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
|
|
703
|
+
|
|
704
|
+
# def divergence(self, u, v):
|
|
705
|
+
# """Calculate the divergence between two vectors.
|
|
706
|
+
|
|
707
|
+
# Divergence equals squared Clark distance multiplied by 2.
|
|
708
|
+
|
|
709
|
+
# Parameters
|
|
710
|
+
# ----------
|
|
711
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
712
|
+
|
|
713
|
+
# Returns
|
|
714
|
+
# -------
|
|
715
|
+
# - The divergence between the two vectors.
|
|
716
|
+
|
|
717
|
+
# References
|
|
718
|
+
# ----------
|
|
719
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
720
|
+
# Measures between Probability Density Functions. International
|
|
721
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
722
|
+
# 1(4), 300-307.
|
|
723
|
+
# """
|
|
724
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
725
|
+
# with np.errstate(invalid="ignore"):
|
|
726
|
+
# return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
|
|
727
|
+
|
|
728
|
+
# # def fidelity(self, u, v):
|
|
729
|
+
# # """
|
|
730
|
+
# # Calculate the fidelity distance between two vectors.
|
|
731
|
+
|
|
732
|
+
# # The fidelity distance measures the similarity between two probability
|
|
733
|
+
# # distributions.
|
|
734
|
+
|
|
735
|
+
# # Parameters
|
|
736
|
+
# # ----------
|
|
737
|
+
# # - u, v: Input vectors between which the distance is to be calculated.
|
|
738
|
+
|
|
739
|
+
# # Returns
|
|
740
|
+
# # -------
|
|
741
|
+
# # - The fidelity distance between the two vectors.
|
|
742
|
+
|
|
743
|
+
# # Notes
|
|
744
|
+
# # -----
|
|
745
|
+
# # Added by SC.
|
|
746
|
+
# # """
|
|
747
|
+
# # u, v = np.asarray(u), np.asarray(v)
|
|
748
|
+
# # return 1 - (np.sum(np.sqrt(u * v)))
|
|
749
|
+
|
|
750
|
+
# def google(self, u, v):
|
|
751
|
+
# """Calculate the Normalized Google Distance (NGD) between two vectors.
|
|
752
|
+
|
|
753
|
+
# NGD is a measure of similarity derived from the number of hits returned by the
|
|
754
|
+
# Google search engine for a given set of keywords.
|
|
755
|
+
|
|
756
|
+
# Parameters
|
|
757
|
+
# ----------
|
|
758
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
759
|
+
|
|
760
|
+
# Returns
|
|
761
|
+
# -------
|
|
762
|
+
# - The Normalized Google Distance between the two vectors.
|
|
763
|
+
|
|
764
|
+
# Notes
|
|
765
|
+
# -----
|
|
766
|
+
# When used for comparing two probability density functions (pdfs),
|
|
767
|
+
# Google distance equals half of Cityblock distance.
|
|
768
|
+
|
|
769
|
+
# References
|
|
770
|
+
# ----------
|
|
771
|
+
# 1. Lee & Rashid (2008) Information Technology, ITSim 2008.
|
|
772
|
+
# doi:10.1109/ITSIM.2008.4631601.
|
|
773
|
+
# """
|
|
774
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
775
|
+
# x = float(np.sum(u))
|
|
776
|
+
# y = float(np.sum(v))
|
|
777
|
+
# summin = float(np.sum(np.minimum(u, v)))
|
|
778
|
+
# return (max([x, y]) - summin) / ((x + y) - min([x, y]))
|
|
779
|
+
|
|
780
|
+
# def gower(self, u, v):
|
|
781
|
+
# """Calculate the Gower distance between two vectors.
|
|
782
|
+
|
|
783
|
+
# The Gower distance equals the Cityblock distance divided by the vector length.
|
|
784
|
+
|
|
785
|
+
# Parameters
|
|
786
|
+
# ----------
|
|
787
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
788
|
+
|
|
789
|
+
# Returns
|
|
790
|
+
# -------
|
|
791
|
+
# - The Gower distance between the two vectors.
|
|
792
|
+
|
|
793
|
+
# References
|
|
794
|
+
# ----------
|
|
795
|
+
# 1. Gower JC. (1971) General Coefficient of Similarity
|
|
796
|
+
# and Some of Its Properties, Biometrics 27, 857-874.
|
|
797
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
798
|
+
# Measures between Probability Density Functions. International
|
|
799
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
800
|
+
# 1(4), 300-307.
|
|
801
|
+
# """
|
|
802
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
803
|
+
# return np.sum(np.abs(u - v)) / u.size
|
|
804
|
+
|
|
805
|
+
# # NEEDS CHECKING
|
|
806
|
+
# # def harmonicmean(self, u, v):
|
|
807
|
+
# # """
|
|
808
|
+
# # Harmonic mean distance.
|
|
809
|
+
# # Notes:
|
|
810
|
+
# # Added by SC.
|
|
811
|
+
# # """
|
|
812
|
+
# # u,v = np.asarray(u), np.asarray(v)
|
|
813
|
+
# # return 1 - 2.*np.sum(u*v/(u+v))
|
|
814
|
+
|
|
815
|
+
# # def inner(self, u, v):
|
|
816
|
+
# # """
|
|
817
|
+
# # Calculate the inner product distance between two vectors.
|
|
818
|
+
|
|
819
|
+
# # The inner product distance is a measure of similarity between two vectors,
|
|
820
|
+
# # based on their inner product.
|
|
821
|
+
|
|
822
|
+
# # Parameters
|
|
823
|
+
# # ----------
|
|
824
|
+
# # - u, v: Input vectors between which the distance is to be calculated.
|
|
825
|
+
|
|
826
|
+
# # Returns
|
|
827
|
+
# # -------
|
|
828
|
+
# # - The inner product distance between the two vectors.
|
|
829
|
+
|
|
830
|
+
# # Notes
|
|
831
|
+
# # -----
|
|
832
|
+
# # Added by SC.
|
|
833
|
+
# # """
|
|
834
|
+
# # u, v = np.asarray(u), np.asarray(v)
|
|
835
|
+
# # return 1 - np.dot(u, v)
|
|
836
|
+
|
|
837
|
+
# def jeffreys(self, u, v):
|
|
838
|
+
# """Calculate the Jeffreys divergence between two vectors.
|
|
839
|
+
|
|
840
|
+
# The Jeffreys divergence is a symmetric version of the Kullback-Leibler
|
|
841
|
+
# divergence.
|
|
842
|
+
|
|
843
|
+
# Parameters
|
|
844
|
+
# ----------
|
|
845
|
+
# - u, v: Input vectors between which the divergence is to be calculated.
|
|
846
|
+
|
|
847
|
+
# Returns
|
|
848
|
+
# -------
|
|
849
|
+
# - The Jeffreys divergence between the two vectors.
|
|
850
|
+
|
|
851
|
+
# References
|
|
852
|
+
# ----------
|
|
853
|
+
# 1. Jeffreys H (1946) An Invariant Form for the Prior Probability
|
|
854
|
+
# in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
|
|
855
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
856
|
+
# Measures between Probability Density Functions. International
|
|
857
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
858
|
+
# 1(4), 300-307.
|
|
859
|
+
# """
|
|
860
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
861
|
+
# # Add epsilon to zeros in vectors to avoid division
|
|
862
|
+
# # by 0 and/or log of 0. Alternatively, zeros in the
|
|
863
|
+
# # vectors could be ignored or masked (see below).
|
|
864
|
+
# # u = ma.masked_where(u == 0, u)
|
|
865
|
+
# # v = ma.masked_where(v == 0, u)
|
|
866
|
+
# u = np.where(u == 0, self.epsilon, u)
|
|
867
|
+
# v = np.where(v == 0, self.epsilon, v)
|
|
868
|
+
# return np.sum((u - v) * np.log(u / v))
|
|
869
|
+
|
|
870
|
+
# def jensenshannon_divergence(self, u, v):
|
|
871
|
+
# """Calculate the Jensen-Shannon divergence between two vectors.
|
|
872
|
+
|
|
873
|
+
# The Jensen-Shannon divergence is a symmetric and finite measure of similarity
|
|
874
|
+
# between two probability distributions.
|
|
875
|
+
|
|
876
|
+
# Parameters
|
|
877
|
+
# ----------
|
|
878
|
+
# - u, v: Input vectors between which the divergence is to be calculated.
|
|
879
|
+
|
|
880
|
+
# Returns
|
|
881
|
+
# -------
|
|
882
|
+
# - The Jensen-Shannon divergence between the two vectors.
|
|
883
|
+
|
|
884
|
+
# References
|
|
885
|
+
# ----------
|
|
886
|
+
# 1. Lin J. (1991) Divergence measures based on the Shannon entropy.
|
|
887
|
+
# IEEE Transactions on Information Theory, 37(1):145–151.
|
|
888
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
889
|
+
# Measures between Probability Density Functions. International
|
|
890
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
891
|
+
# 1(4), 300-307.
|
|
892
|
+
# Comments:
|
|
893
|
+
# Equals Jensen difference in Sung-Hyuk (2007):
|
|
894
|
+
# u = np.where(u==0, self.epsilon, u)
|
|
895
|
+
# v = np.where(v==0, self.epsilon, v)
|
|
896
|
+
# el1 = (u * np.log(u) + v * np.log(v)) / 2
|
|
897
|
+
# el2 = (u + v)/2
|
|
898
|
+
# el3 = np.log(el2)
|
|
899
|
+
# return np.sum(el1 - el2 * el3)
|
|
900
|
+
# """
|
|
901
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
902
|
+
# u = np.where(u == 0, self.epsilon, u)
|
|
903
|
+
# v = np.where(v == 0, self.epsilon, v)
|
|
904
|
+
# dl = u * np.log(2 * u / (u + v))
|
|
905
|
+
# dr = v * np.log(2 * v / (u + v))
|
|
906
|
+
# return (np.sum(dl) + np.sum(dr)) / 2
|
|
907
|
+
|
|
908
|
+
# def jensen_difference(self, u, v):
|
|
909
|
+
# """Calculate the Jensen difference between two vectors.
|
|
910
|
+
|
|
911
|
+
# The Jensen difference is considered similar to the Jensen-Shannon divergence.
|
|
912
|
+
|
|
913
|
+
# Parameters
|
|
914
|
+
# ----------
|
|
915
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
916
|
+
|
|
917
|
+
# Returns
|
|
918
|
+
# -------
|
|
919
|
+
# - The Jensen difference between the two vectors.
|
|
920
|
+
|
|
921
|
+
# Notes
|
|
922
|
+
# -----
|
|
923
|
+
# 1. Equals half of Topsøe distance
|
|
924
|
+
# 2. Equals squared jensenshannon_distance.
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
# References
|
|
928
|
+
# ----------
|
|
929
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
930
|
+
# Measures between Probability Density Functions. International
|
|
931
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
932
|
+
# 1(4), 300-307.
|
|
933
|
+
# """
|
|
934
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
935
|
+
# u = np.where(u == 0, self.epsilon, u)
|
|
936
|
+
# v = np.where(v == 0, self.epsilon, v)
|
|
937
|
+
# el1 = (u * np.log(u) + v * np.log(v)) / 2
|
|
938
|
+
# el2 = (u + v) / 2
|
|
939
|
+
# return np.sum(el1 - el2 * np.log(el2))
|
|
940
|
+
|
|
941
|
+
# def k_divergence(self, u, v):
|
|
942
|
+
# """Calculate the K divergence between two vectors.
|
|
943
|
+
|
|
944
|
+
# Parameters
|
|
945
|
+
# ----------
|
|
946
|
+
# - u, v: Input vectors between which the divergence is to be calculated.
|
|
947
|
+
|
|
948
|
+
# Returns
|
|
949
|
+
# -------
|
|
950
|
+
# - The K divergence between the two vectors.
|
|
951
|
+
|
|
952
|
+
# References
|
|
953
|
+
# ----------
|
|
954
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
955
|
+
# Measures between Probability Density Functions. International
|
|
956
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
957
|
+
# 1(4), 300-307.
|
|
958
|
+
# """
|
|
959
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
960
|
+
# u = np.where(u == 0, self.epsilon, u)
|
|
961
|
+
# v = np.where(v == 0, self.epsilon, v)
|
|
962
|
+
# return np.sum(u * np.log(2 * u / (u + v)))
|
|
963
|
+
|
|
964
|
+
# def kl_divergence(self, u, v):
|
|
965
|
+
# """Calculate the Kullback-Leibler divergence between two vectors.
|
|
966
|
+
|
|
967
|
+
# The Kullback-Leibler divergence measures the difference between two
|
|
968
|
+
# probability distributions.
|
|
969
|
+
|
|
970
|
+
# Parameters
|
|
971
|
+
# ----------
|
|
972
|
+
# - u, v: Input vectors between which the divergence is to be calculated.
|
|
973
|
+
|
|
974
|
+
# Returns
|
|
975
|
+
# -------
|
|
976
|
+
# - The Kullback-Leibler divergence between the two vectors.
|
|
977
|
+
|
|
978
|
+
# References
|
|
979
|
+
# ----------
|
|
980
|
+
# 1. Kullback S, Leibler RA (1951) On information and sufficiency.
|
|
981
|
+
# Ann. Math. Statist. 22:79–86
|
|
982
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
983
|
+
# Measures between Probability Density Functions. International
|
|
984
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
985
|
+
# 1(4):300-307.
|
|
986
|
+
# """
|
|
987
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
988
|
+
# u = np.where(u == 0, self.epsilon, u)
|
|
989
|
+
# v = np.where(v == 0, self.epsilon, v)
|
|
990
|
+
# return np.sum(u * np.log(u / v))
|
|
991
|
+
|
|
992
|
+
# def kumarjohnson(self, u, v):
|
|
993
|
+
# """Calculate the Kumar-Johnson distance between two vectors.
|
|
994
|
+
|
|
995
|
+
# Parameters
|
|
996
|
+
# ----------
|
|
997
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
998
|
+
|
|
999
|
+
# Returns
|
|
1000
|
+
# -------
|
|
1001
|
+
# - The Kumar-Johnson distance between the two vectors.
|
|
1002
|
+
|
|
1003
|
+
# References
|
|
1004
|
+
# ----------
|
|
1005
|
+
# 1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
|
|
1006
|
+
# and information inequalities, Journal of Inequalities in pure
|
|
1007
|
+
# and applied Mathematics. 6(3).
|
|
1008
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1009
|
+
# Measures between Probability Density Functions. International
|
|
1010
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1011
|
+
# 1(4):300-307.
|
|
1012
|
+
# """
|
|
1013
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1014
|
+
# uvmult = u * v
|
|
1015
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1016
|
+
# numer = np.power(u**2 - v**2, 2)
|
|
1017
|
+
# denom = 2 * np.power(uvmult, 3 / 2)
|
|
1018
|
+
# return np.sum(np.where(uvmult != 0, numer / denom, 0))
|
|
1019
|
+
|
|
1020
|
+
# def matusita(self, u, v):
|
|
1021
|
+
# """Calculate the Matusita distance between two vectors.
|
|
1022
|
+
|
|
1023
|
+
# Parameters
|
|
1024
|
+
# ----------
|
|
1025
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1026
|
+
|
|
1027
|
+
# Returns
|
|
1028
|
+
# -------
|
|
1029
|
+
# - The Matusita distance between the two vectors.
|
|
1030
|
+
|
|
1031
|
+
# References
|
|
1032
|
+
# ----------
|
|
1033
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1034
|
+
# Measures between Probability Density Functions. International
|
|
1035
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1036
|
+
# 1(4):300-307.
|
|
1037
|
+
|
|
1038
|
+
# Notes
|
|
1039
|
+
# -----
|
|
1040
|
+
# Equals square root of Squared-chord distance.
|
|
1041
|
+
# """
|
|
1042
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1043
|
+
# return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
|
|
1044
|
+
|
|
1045
|
+
# def max_symmetric_chisq(self, u, v):
|
|
1046
|
+
# """Calculate the maximum symmetric chi-square distance.
|
|
1047
|
+
|
|
1048
|
+
# Parameters
|
|
1049
|
+
# ----------
|
|
1050
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1051
|
+
|
|
1052
|
+
# Returns
|
|
1053
|
+
# -------
|
|
1054
|
+
# - The maximum symmetric chi-square distance between the two vectors.
|
|
1055
|
+
|
|
1056
|
+
# References
|
|
1057
|
+
# ----------
|
|
1058
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1059
|
+
# Measures between Probability Density Functions. International
|
|
1060
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1061
|
+
# 1(4):300-307.
|
|
1062
|
+
# """
|
|
1063
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1064
|
+
# return max(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
|
|
1065
|
+
|
|
1066
|
+
# def min_symmetric_chisq(self, u, v):
|
|
1067
|
+
# """Calculate the minimum symmetric chi-square distance.
|
|
1068
|
+
|
|
1069
|
+
# Parameters
|
|
1070
|
+
# ----------
|
|
1071
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1072
|
+
|
|
1073
|
+
# Returns
|
|
1074
|
+
# -------
|
|
1075
|
+
# - The minimum symmetric chi-square distance between the two vectors.
|
|
1076
|
+
|
|
1077
|
+
# Notes
|
|
1078
|
+
# -----
|
|
1079
|
+
# Added by SC.
|
|
1080
|
+
# """
|
|
1081
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1082
|
+
# return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
|
|
1083
|
+
|
|
1084
|
+
# def minkowski(self, u, v, p=2):
|
|
1085
|
+
# """Calculate the Minkowski distance between two vectors.
|
|
1086
|
+
|
|
1087
|
+
# Parameters
|
|
1088
|
+
# ----------
|
|
1089
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1090
|
+
# - p: The order of the norm of the difference.
|
|
1091
|
+
|
|
1092
|
+
# Returns
|
|
1093
|
+
# -------
|
|
1094
|
+
# - The Minkowski distance between the two vectors.
|
|
1095
|
+
|
|
1096
|
+
# Notes
|
|
1097
|
+
# -----
|
|
1098
|
+
# When p goes to infinite, the Chebyshev distance is derived.
|
|
1099
|
+
|
|
1100
|
+
# References
|
|
1101
|
+
# ----------
|
|
1102
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1103
|
+
# Measures between Probability Density Functions. International
|
|
1104
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1105
|
+
# 1(4):300-307.
|
|
1106
|
+
# """
|
|
1107
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1108
|
+
# return np.linalg.norm(u - v, ord=p)
|
|
1109
|
+
|
|
1110
|
+
# def neyman_chisq(self, u, v):
|
|
1111
|
+
# """Calculate the Neyman chi-square distance between two vectors.
|
|
1112
|
+
|
|
1113
|
+
# Parameters
|
|
1114
|
+
# ----------
|
|
1115
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1116
|
+
|
|
1117
|
+
# Returns
|
|
1118
|
+
# -------
|
|
1119
|
+
# - The Neyman chi-square distance between the two vectors.
|
|
1120
|
+
|
|
1121
|
+
# References
|
|
1122
|
+
# ----------
|
|
1123
|
+
# 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
|
|
1124
|
+
# In Proceedings of the First Berkley Symposium on Mathematical
|
|
1125
|
+
# Statistics and Probability.
|
|
1126
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1127
|
+
# Measures between Probability Density Functions. International
|
|
1128
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1129
|
+
# 1(4), 300-307.
|
|
1130
|
+
# """
|
|
1131
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1132
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1133
|
+
# return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
|
|
1134
|
+
|
|
1135
|
+
# # def nonintersection(self, u, v):
|
|
1136
|
+
# # """
|
|
1137
|
+
# # Calculate the Nonintersection distance between two vectors.
|
|
1138
|
+
|
|
1139
|
+
# # Parameters
|
|
1140
|
+
# # ----------
|
|
1141
|
+
# # - u, v: Input vectors between which the distance is to be calculated.
|
|
1142
|
+
|
|
1143
|
+
# # Returns
|
|
1144
|
+
# # -------
|
|
1145
|
+
# # - The Nonintersection distance between the two vectors.
|
|
1146
|
+
|
|
1147
|
+
# # References
|
|
1148
|
+
# # ----------
|
|
1149
|
+
# # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1150
|
+
# # Measures between Probability Density Functions. International
|
|
1151
|
+
# # Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1152
|
+
# # 1(4), 300-307.
|
|
1153
|
+
|
|
1154
|
+
# # Notes
|
|
1155
|
+
# # -----
|
|
1156
|
+
# # When used for comparing two probability density functions (pdfs),
|
|
1157
|
+
# # Nonintersection distance equals half of Cityblock distance.
|
|
1158
|
+
# # """
|
|
1159
|
+
# # u, v = np.asarray(u), np.asarray(v)
|
|
1160
|
+
# # return 1 - np.sum(np.minimum(u, v))
|
|
1161
|
+
|
|
1162
|
+
# def pearson_chisq(self, u, v):
|
|
1163
|
+
# """Calculate the Pearson chi-square divergence between two vectors.
|
|
1164
|
+
|
|
1165
|
+
# Parameters
|
|
1166
|
+
# ----------
|
|
1167
|
+
# - u, v: Input vectors between which the divergence is to be calculated.
|
|
1168
|
+
|
|
1169
|
+
# Returns
|
|
1170
|
+
# -------
|
|
1171
|
+
# - The Pearson chi-square divergence between the two vectors.
|
|
1172
|
+
|
|
1173
|
+
# References
|
|
1174
|
+
# ----------
|
|
1175
|
+
# 1. Pearson K. (1900) On the Criterion that a given system of
|
|
1176
|
+
# deviations from the probable in the case of correlated system
|
|
1177
|
+
# of variables is such that it can be reasonable supposed to have
|
|
1178
|
+
# arisen from random sampling, Phil. Mag. 50, 157-172.
|
|
1179
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1180
|
+
# Measures between Probability Density Functions. International
|
|
1181
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1182
|
+
# 1(4), 300-307.
|
|
1183
|
+
|
|
1184
|
+
# Notes
|
|
1185
|
+
# -----
|
|
1186
|
+
# Pearson chi-square divergence is asymmetric.
|
|
1187
|
+
# """
|
|
1188
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1189
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1190
|
+
# return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
|
|
1191
|
+
|
|
1192
|
+
# def penroseshape(self, u, v):
|
|
1193
|
+
# """Calculate the Penrose shape distance between two vectors.
|
|
1194
|
+
|
|
1195
|
+
# Parameters
|
|
1196
|
+
# ----------
|
|
1197
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1198
|
+
|
|
1199
|
+
# Returns
|
|
1200
|
+
# -------
|
|
1201
|
+
# - The Penrose shape distance between the two vectors.
|
|
1202
|
+
|
|
1203
|
+
# References
|
|
1204
|
+
# ----------
|
|
1205
|
+
# 1. Deza M, Deza E (2009) Encyclopedia of Distances.
|
|
1206
|
+
# Springer-Verlag Berlin Heidelberg. 1-590.
|
|
1207
|
+
# """
|
|
1208
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1209
|
+
# umu = np.mean(u)
|
|
1210
|
+
# vmu = np.mean(v)
|
|
1211
|
+
# return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
|
|
1212
|
+
|
|
1213
|
+
# def prob_chisq(self, u, v):
|
|
1214
|
+
# """Calculate the Probabilistic chi-square distance between two vectors.
|
|
1215
|
+
|
|
1216
|
+
# Parameters
|
|
1217
|
+
# ----------
|
|
1218
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1219
|
+
|
|
1220
|
+
# Returns
|
|
1221
|
+
# -------
|
|
1222
|
+
# - The Probabilistic chi-square distance between the two vectors.
|
|
1223
|
+
|
|
1224
|
+
# Notes
|
|
1225
|
+
# -----
|
|
1226
|
+
# Added by SC.
|
|
1227
|
+
# """
|
|
1228
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1229
|
+
# uvsum = u + v
|
|
1230
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1231
|
+
# return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
|
|
1232
|
+
|
|
1233
|
+
# def ruzicka(self, u, v):
|
|
1234
|
+
# """Calculate the Ruzicka distance between two vectors.
|
|
1235
|
+
|
|
1236
|
+
# Parameters
|
|
1237
|
+
# ----------
|
|
1238
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1239
|
+
|
|
1240
|
+
# Returns
|
|
1241
|
+
# -------
|
|
1242
|
+
# - The Ruzicka distance between the two vectors.
|
|
1243
|
+
|
|
1244
|
+
# Notes
|
|
1245
|
+
# -----
|
|
1246
|
+
# Added by SC.
|
|
1247
|
+
# """
|
|
1248
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1249
|
+
# den = np.sum(np.maximum(u, v))
|
|
1250
|
+
|
|
1251
|
+
# return 1 - np.sum(np.minimum(u, v)) / den
|
|
1252
|
+
|
|
1253
|
+
# def sorensen(self, u, v):
|
|
1254
|
+
# """Calculate the Sorensen distance between two vectors.
|
|
1255
|
+
|
|
1256
|
+
# Parameters
|
|
1257
|
+
# ----------
|
|
1258
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1259
|
+
|
|
1260
|
+
# Returns
|
|
1261
|
+
# -------
|
|
1262
|
+
# - The Sorensen distance between the two vectors.
|
|
1263
|
+
|
|
1264
|
+
# Notes
|
|
1265
|
+
# -----
|
|
1266
|
+
# The Sorensen distance equals the Manhattan distance divided by the sum of
|
|
1267
|
+
# the two vectors.
|
|
1268
|
+
|
|
1269
|
+
# Added by SC.
|
|
1270
|
+
# """
|
|
1271
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1272
|
+
# return np.sum(np.abs(u - v)) / np.sum(u + v)
|
|
1273
|
+
|
|
1274
|
+
# def squared_chisq(self, u, v):
|
|
1275
|
+
# """Calculate the Squared chi-square distance between two vectors.
|
|
1276
|
+
|
|
1277
|
+
# Parameters
|
|
1278
|
+
# ----------
|
|
1279
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1280
|
+
|
|
1281
|
+
# Returns
|
|
1282
|
+
# -------
|
|
1283
|
+
# - The Squared chi-square distance between the two vectors.
|
|
1284
|
+
|
|
1285
|
+
# References
|
|
1286
|
+
# ----------
|
|
1287
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1288
|
+
# Measures between Probability Density Functions. International
|
|
1289
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1290
|
+
# 1(4), 300-307.
|
|
1291
|
+
# """
|
|
1292
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1293
|
+
# uvsum = u + v
|
|
1294
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1295
|
+
# return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
|
|
1296
|
+
|
|
1297
|
+
# def squaredchord(self, u, v):
|
|
1298
|
+
# """Calculate the Squared-chord distance between two vectors.
|
|
1299
|
+
|
|
1300
|
+
# Parameters
|
|
1301
|
+
# ----------
|
|
1302
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1303
|
+
|
|
1304
|
+
# Returns
|
|
1305
|
+
# -------
|
|
1306
|
+
# - The Squared-chord distance between the two vectors.
|
|
1307
|
+
|
|
1308
|
+
# References
|
|
1309
|
+
# ----------
|
|
1310
|
+
# 1. Gavin DG et al. (2003) A statistical approach to evaluating
|
|
1311
|
+
# distance metrics and analog assignments for pollen records.
|
|
1312
|
+
# Quaternary Research 60:356–367.
|
|
1313
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1314
|
+
# Measures between Probability Density Functions. International
|
|
1315
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1316
|
+
# 1(4), 300-307.
|
|
1317
|
+
|
|
1318
|
+
# Notes
|
|
1319
|
+
# -----
|
|
1320
|
+
# Equals to squared Matusita distance.
|
|
1321
|
+
# """
|
|
1322
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1323
|
+
# return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
|
|
1324
|
+
|
|
1325
|
+
# def squared_euclidean(self, u, v):
|
|
1326
|
+
# """Calculate the Squared Euclidean distance between two vectors.
|
|
1327
|
+
|
|
1328
|
+
# Parameters
|
|
1329
|
+
# ----------
|
|
1330
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1331
|
+
|
|
1332
|
+
# Returns
|
|
1333
|
+
# -------
|
|
1334
|
+
# - The Squared Euclidean distance between the two vectors.
|
|
1335
|
+
|
|
1336
|
+
# References
|
|
1337
|
+
# ----------
|
|
1338
|
+
# 1. Gavin DG et al. (2003) A statistical approach to evaluating
|
|
1339
|
+
# distance metrics and analog assignments for pollen records.
|
|
1340
|
+
# Quaternary Research 60:356–367.
|
|
1341
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1342
|
+
# Measures between Probability Density Functions. International
|
|
1343
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1344
|
+
# 1(4), 300-307.
|
|
1345
|
+
|
|
1346
|
+
# Notes
|
|
1347
|
+
# -----
|
|
1348
|
+
# Equals to squared Euclidean distance.
|
|
1349
|
+
# """
|
|
1350
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1351
|
+
# return np.dot((u - v), (u - v))
|
|
1352
|
+
|
|
1353
|
+
# def taneja(self, u, v):
|
|
1354
|
+
# """Calculate the Taneja distance between two vectors.
|
|
1355
|
+
|
|
1356
|
+
# Parameters
|
|
1357
|
+
# ----------
|
|
1358
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1359
|
+
|
|
1360
|
+
# Returns
|
|
1361
|
+
# -------
|
|
1362
|
+
# - The Taneja distance between the two vectors.
|
|
1363
|
+
|
|
1364
|
+
# References
|
|
1365
|
+
# ----------
|
|
1366
|
+
# 1. Taneja IJ. (1995), New Developments in Generalized Information
|
|
1367
|
+
# Measures, Chapter in: Advances in Imaging and Electron Physics,
|
|
1368
|
+
# Ed. P.W. Hawkes, 91, 37-135.
|
|
1369
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1370
|
+
# Measures between Probability Density Functions. International
|
|
1371
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1372
|
+
# 1(4), 300-307.
|
|
1373
|
+
# """
|
|
1374
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1375
|
+
# u = np.where(u == 0, self.epsilon, u)
|
|
1376
|
+
# v = np.where(v == 0, self.epsilon, v)
|
|
1377
|
+
# uvsum = u + v
|
|
1378
|
+
# return np.sum((uvsum / 2) * np.log(uvsum / (2 * np.sqrt(u * v))))
|
|
1379
|
+
|
|
1380
|
+
# def tanimoto(self, u, v):
|
|
1381
|
+
# """Calculate the Tanimoto distance between two vectors.
|
|
1382
|
+
|
|
1383
|
+
# Parameters
|
|
1384
|
+
# ----------
|
|
1385
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1386
|
+
|
|
1387
|
+
# Returns
|
|
1388
|
+
# -------
|
|
1389
|
+
# - The Tanimoto distance between the two vectors.
|
|
1390
|
+
|
|
1391
|
+
# References
|
|
1392
|
+
# ----------
|
|
1393
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1394
|
+
# Measures between Probability Density Functions. International
|
|
1395
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1396
|
+
# 1(4), 300-307.
|
|
1397
|
+
|
|
1398
|
+
# Notes
|
|
1399
|
+
# -----
|
|
1400
|
+
# Equals Soergel distance.
|
|
1401
|
+
# """
|
|
1402
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1403
|
+
# # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
|
|
1404
|
+
# usum = np.sum(u)
|
|
1405
|
+
# vsum = np.sum(v)
|
|
1406
|
+
# minsum = np.sum(np.minimum(u, v))
|
|
1407
|
+
# return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
|
|
1408
|
+
|
|
1409
|
+
# def topsoe(self, u, v):
|
|
1410
|
+
# """Calculate the Topsøe distance between two vectors.
|
|
1411
|
+
|
|
1412
|
+
# Parameters
|
|
1413
|
+
# ----------
|
|
1414
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1415
|
+
|
|
1416
|
+
# Returns
|
|
1417
|
+
# -------
|
|
1418
|
+
# - The Topsøe distance between the two vectors.
|
|
1419
|
+
|
|
1420
|
+
# References
|
|
1421
|
+
# ----------
|
|
1422
|
+
# 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1423
|
+
# Measures between Probability Density Functions. International
|
|
1424
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1425
|
+
# 1(4), 300-307.
|
|
1426
|
+
|
|
1427
|
+
# Notes
|
|
1428
|
+
# -----
|
|
1429
|
+
# Equals two times Jensen-Shannon divergence.
|
|
1430
|
+
# """
|
|
1431
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1432
|
+
# u = np.where(u == 0, self.epsilon, u)
|
|
1433
|
+
# v = np.where(v == 0, self.epsilon, v)
|
|
1434
|
+
# dl = u * np.log(2 * u / (u + v))
|
|
1435
|
+
# dr = v * np.log(2 * v / (u + v))
|
|
1436
|
+
# return np.sum(dl + dr)
|
|
1437
|
+
|
|
1438
|
+
# def vicis_symmetric_chisq(self, u, v):
|
|
1439
|
+
# """Calculate the Vicis Symmetric chi-square distance.
|
|
1440
|
+
|
|
1441
|
+
# Parameters
|
|
1442
|
+
# ----------
|
|
1443
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1444
|
+
|
|
1445
|
+
# Returns
|
|
1446
|
+
# -------
|
|
1447
|
+
# - The Vicis Symmetric chi-square distance between the two vectors.
|
|
1448
|
+
|
|
1449
|
+
# References
|
|
1450
|
+
# ----------
|
|
1451
|
+
# 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1452
|
+
# Measures between Probability Density Functions. International
|
|
1453
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1454
|
+
# 1(4), 300-307
|
|
1455
|
+
# """
|
|
1456
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1457
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1458
|
+
# u_v = (u - v) ** 2
|
|
1459
|
+
# uvmin = np.minimum(u, v) ** 2
|
|
1460
|
+
# return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
|
|
1461
|
+
|
|
1462
|
+
# def vicis_wave_hedges(self, u, v):
|
|
1463
|
+
# """Calculate the Vicis-Wave Hedges distance between two vectors.
|
|
1464
|
+
|
|
1465
|
+
# Parameters
|
|
1466
|
+
# ----------
|
|
1467
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1468
|
+
|
|
1469
|
+
# Returns
|
|
1470
|
+
# -------
|
|
1471
|
+
# - The Vicis-Wave Hedges distance between the two vectors.
|
|
1472
|
+
|
|
1473
|
+
# References
|
|
1474
|
+
# ----------
|
|
1475
|
+
# 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1476
|
+
# Measures between Probability Density Functions. International
|
|
1477
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1478
|
+
# 1(4), 300-307.
|
|
1479
|
+
# """
|
|
1480
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1481
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1482
|
+
# u_v = abs(u - v)
|
|
1483
|
+
# uvmin = np.minimum(u, v)
|
|
1484
|
+
# return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
|