distclassipy 0.0.2__py3-none-any.whl → 0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- distclassipy/__init__.py +14 -2
- distclassipy/classifier.py +121 -95
- distclassipy/distances.py +759 -401
- distclassipy-0.0.5.dist-info/METADATA +702 -0
- distclassipy-0.0.5.dist-info/RECORD +11 -0
- {distclassipy-0.0.2.dist-info → distclassipy-0.0.5.dist-info}/top_level.txt +0 -1
- distclassipy-0.0.2.dist-info/METADATA +0 -25
- distclassipy-0.0.2.dist-info/RECORD +0 -11
- {distclassipy-0.0.2.dist-info → distclassipy-0.0.5.dist-info}/LICENSE +0 -0
- {distclassipy-0.0.2.dist-info → distclassipy-0.0.5.dist-info}/WHEEL +0 -0
distclassipy/distances.py
CHANGED
|
@@ -1,310 +1,439 @@
|
|
|
1
|
-
# This code is based on the work of Andrzej Zielezinski, originally retrieved on 20 November 2022 from
|
|
2
|
-
# https://github.com/aziele/statistical-distances/blob/04412b3155c59fc7238b3d8ecf6f3723ac5befff/distance.py
|
|
3
|
-
#
|
|
4
|
-
# It has been modified by Siddharth Chaini on 27 November 2022.
|
|
5
|
-
#
|
|
6
|
-
# Licensed GNU General Public License v3.0;
|
|
7
|
-
# you may not use this file except in compliance with the License.
|
|
8
|
-
# You may obtain a copy of the License at
|
|
9
|
-
#
|
|
10
|
-
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
|
11
|
-
#
|
|
12
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
-
# See the License for the specific language governing permissions and
|
|
16
|
-
# limitations under the License.
|
|
17
|
-
#
|
|
18
|
-
# Modifications by Siddharth Chaini include the addition of the following distance measures:
|
|
19
|
-
# 1. Meehl distance
|
|
20
|
-
# 2. Sorensen distance
|
|
21
|
-
# 3. Ruzicka distance
|
|
22
|
-
# 4. Inner product distance
|
|
23
|
-
# 5. Harmonic mean distance
|
|
24
|
-
# 6. Fidelity
|
|
25
|
-
# 7. Minimimum Symmetric Chi Squared
|
|
26
|
-
# 8. Probabilistic Symmetric Chi Squared
|
|
27
|
-
#
|
|
28
|
-
# In addition, the following code was added to all functions for array conversion:
|
|
29
|
-
# u,v = np.asarray(u), np.asarray(v)
|
|
30
|
-
#
|
|
31
|
-
# Todos:
|
|
32
|
-
# ALSO COMPARE RUNTIME OF THIS v/s custom v/s Tschopp
|
|
33
|
-
|
|
34
|
-
|
|
35
1
|
"""
|
|
36
|
-
A variety of distance metrics to calculate the distance between two points.
|
|
2
|
+
A module providing a variety of distance metrics to calculate the distance between two points.
|
|
3
|
+
|
|
4
|
+
This module includes implementations of various distance metrics, including both common and less
|
|
5
|
+
common measures. It allows for the calculation of distances between data points in a vectorized
|
|
6
|
+
manner using numpy arrays.
|
|
7
|
+
This code is based on the work of Andrzej Zielezinski, originally retrieved on 20 November 2022 from
|
|
8
|
+
https://github.com/aziele/statistical-distances/blob/04412b3155c59fc7238b3d8ecf6f3723ac5befff/distance.py
|
|
9
|
+
|
|
10
|
+
It has been modified by Siddharth Chaini on 27 November 2022.
|
|
11
|
+
|
|
12
|
+
Licensed GNU General Public License v3.0;
|
|
13
|
+
you may not use this file except in compliance with the License.
|
|
14
|
+
You may obtain a copy of the License at
|
|
15
|
+
|
|
16
|
+
https://www.gnu.org/licenses/gpl-3.0.en.html
|
|
17
|
+
|
|
18
|
+
Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
See the License for the specific language governing permissions and
|
|
22
|
+
limitations under the License.
|
|
23
|
+
|
|
24
|
+
Modifications by Siddharth Chaini include the addition of the following distance measures:
|
|
25
|
+
1. Meehl distance
|
|
26
|
+
2. Sorensen distance
|
|
27
|
+
3. Ruzicka distance
|
|
28
|
+
4. Inner product distance
|
|
29
|
+
5. Harmonic mean distance
|
|
30
|
+
6. Fidelity
|
|
31
|
+
7. Minimimum Symmetric Chi Squared
|
|
32
|
+
8. Probabilistic Symmetric Chi Squared
|
|
33
|
+
|
|
34
|
+
In addition, the following code was added to all functions for array conversion:
|
|
35
|
+
u,v = np.asarray(u), np.asarray(v)
|
|
36
|
+
|
|
37
|
+
Todos:
|
|
38
|
+
ALSO COMPARE RUNTIME OF THIS v/s custom v/s Tschopp
|
|
37
39
|
"""
|
|
38
40
|
|
|
39
41
|
import numpy as np
|
|
40
42
|
|
|
43
|
+
|
|
41
44
|
class Distance:
|
|
42
45
|
|
|
43
46
|
def __init__(self, epsilon=None):
|
|
44
|
-
|
|
47
|
+
"""
|
|
48
|
+
Initialize the Distance class with an optional epsilon value.
|
|
45
49
|
|
|
50
|
+
Parameters:
|
|
51
|
+
- epsilon: A small value to avoid division by zero errors.
|
|
52
|
+
"""
|
|
53
|
+
self.epsilon = np.finfo(float).eps if not epsilon else epsilon
|
|
46
54
|
|
|
47
55
|
def acc(self, u, v):
|
|
48
56
|
"""
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
57
|
+
Calculate the average of Cityblock/Manhattan and Chebyshev distances.
|
|
58
|
+
|
|
59
|
+
This function computes the ACC distance, also known as the Average distance, between two
|
|
60
|
+
vectors u and v. It is the average of the Cityblock (or Manhattan) and Chebyshev distances.
|
|
61
|
+
|
|
62
|
+
Parameters:
|
|
63
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
- The ACC distance between the two vectors.
|
|
67
|
+
|
|
53
68
|
References:
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
59
|
-
vol. 1(4), pp. 300-307.
|
|
69
|
+
1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean Geometry. Dover Publications.
|
|
70
|
+
2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity Measures between Probability
|
|
71
|
+
Density Functions. International Journal of Mathematical Models and Methods in Applied Sciences.
|
|
72
|
+
vol. 1(4), pp. 300-307.
|
|
60
73
|
"""
|
|
61
|
-
return (self.cityblock(u, v) + self.chebyshev(u, v))/2
|
|
62
|
-
|
|
74
|
+
return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
|
|
63
75
|
|
|
64
76
|
def add_chisq(self, u, v):
|
|
65
77
|
"""
|
|
66
|
-
Additive Symmetric Chi-square distance.
|
|
78
|
+
Compute the Additive Symmetric Chi-square distance between two vectors.
|
|
79
|
+
|
|
80
|
+
The Additive Symmetric Chi-square distance is a measure that can be used to compare two vectors.
|
|
81
|
+
This function calculates it based on the input vectors u and v.
|
|
82
|
+
|
|
83
|
+
Parameters:
|
|
84
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
- The Additive Symmetric Chi-square distance between the two vectors.
|
|
88
|
+
|
|
67
89
|
References:
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
vol. 1(4), pp. 300-307.
|
|
90
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity Measures between Probability
|
|
91
|
+
Density Functions. International Journal of Mathematical Models and Methods in Applied Sciences.
|
|
92
|
+
vol. 1(4), pp. 300-307.
|
|
72
93
|
"""
|
|
73
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
94
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
74
95
|
uvmult = u * v
|
|
75
|
-
with np.errstate(divide=
|
|
76
|
-
return np.sum(np.where(uvmult != 0, ((u-v)**2 * (u+v))/uvmult, 0))
|
|
77
|
-
|
|
96
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
97
|
+
return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
|
|
78
98
|
|
|
79
99
|
def bhattacharyya(self, u, v):
|
|
80
100
|
"""
|
|
81
|
-
Bhattacharyya distance.
|
|
101
|
+
Calculate the Bhattacharyya distance between two vectors.
|
|
102
|
+
|
|
82
103
|
Returns a distance value between 0 and 1.
|
|
83
|
-
References:
|
|
84
|
-
1. Bhattacharyya A (1947) On a measure of divergence between two
|
|
85
|
-
statistical populations defined by probability distributions,
|
|
86
|
-
Bull. Calcutta Math. Soc., 35, 99–109.
|
|
87
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
88
|
-
Measures between Probability Density Functions. International
|
|
89
|
-
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
90
|
-
1(4), 300-307.
|
|
91
|
-
3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
|
|
92
|
-
"""
|
|
93
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
94
|
-
return -np.log(np.sum(np.sqrt(u*v)))
|
|
95
104
|
|
|
105
|
+
Parameters:
|
|
106
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
- The Bhattacharyya distance between the two vectors.
|
|
110
|
+
|
|
111
|
+
References:
|
|
112
|
+
1. Bhattacharyya A (1947) On a measure of divergence between two
|
|
113
|
+
statistical populations defined by probability distributions,
|
|
114
|
+
Bull. Calcutta Math. Soc., 35, 99–109.
|
|
115
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
116
|
+
Measures between Probability Density Functions. International
|
|
117
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
118
|
+
1(4), 300-307.
|
|
119
|
+
3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
|
|
120
|
+
"""
|
|
121
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
122
|
+
return -np.log(np.sum(np.sqrt(u * v)))
|
|
96
123
|
|
|
97
124
|
def braycurtis(self, u, v):
|
|
98
125
|
"""
|
|
99
|
-
Bray-Curtis distance.
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
126
|
+
Calculate the Bray-Curtis distance between two vectors.
|
|
127
|
+
|
|
128
|
+
The Bray-Curtis distance is a measure of dissimilarity between two non-negative vectors,
|
|
129
|
+
often used in ecology to measure the compositional dissimilarity between two sites based on counts
|
|
130
|
+
of species at both sites. It is closely related to the Sørensen distance and is also known as
|
|
131
|
+
Bray-Curtis dissimilarity.
|
|
132
|
+
|
|
104
133
|
Notes:
|
|
105
134
|
When used for comparing two probability density functions (pdfs),
|
|
106
|
-
Bray-Curtis distance equals Cityblock distance divided by 2.
|
|
135
|
+
the Bray-Curtis distance equals the Cityblock distance divided by 2.
|
|
136
|
+
|
|
137
|
+
Parameters:
|
|
138
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
- The Bray-Curtis distance between the two vectors.
|
|
142
|
+
|
|
107
143
|
References:
|
|
108
144
|
1. Bray JR, Curtis JT (1957) An ordination of the upland forest of
|
|
109
|
-
|
|
110
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
145
|
+
southern Wisconsin. Ecological Monographs, 27, 325-349.
|
|
146
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
111
147
|
Measures between Probability Density Functions. International
|
|
112
148
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
113
149
|
1(4), 300-307.
|
|
114
150
|
3. https://en.wikipedia.org/wiki/Bray–Curtis_dissimilarity
|
|
115
151
|
"""
|
|
116
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
152
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
117
153
|
return np.sum(np.abs(u - v)) / np.sum(np.abs(u + v))
|
|
118
154
|
|
|
119
|
-
|
|
120
155
|
def canberra(self, u, v):
|
|
121
156
|
"""
|
|
122
|
-
Canberra distance.
|
|
157
|
+
Calculate the Canberra distance between two vectors.
|
|
158
|
+
|
|
159
|
+
The Canberra distance is a weighted version of the Manhattan distance, used in numerical analysis.
|
|
160
|
+
|
|
123
161
|
Notes:
|
|
124
162
|
When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
|
|
125
163
|
is used in the calculation.
|
|
164
|
+
|
|
165
|
+
Parameters:
|
|
166
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
- The Canberra distance between the two vectors.
|
|
170
|
+
|
|
126
171
|
References:
|
|
127
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
172
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
128
173
|
Measures between Probability Density Functions. International
|
|
129
174
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
130
175
|
1(4), 300-307.
|
|
131
176
|
"""
|
|
132
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
133
|
-
with np.errstate(invalid=
|
|
177
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
178
|
+
with np.errstate(invalid="ignore"):
|
|
134
179
|
return np.nansum(np.abs(u - v) / (np.abs(u) + np.abs(v)))
|
|
135
180
|
|
|
136
|
-
|
|
137
181
|
def chebyshev(self, u, v):
|
|
138
182
|
"""
|
|
139
|
-
Chebyshev distance.
|
|
183
|
+
Calculate the Chebyshev distance between two vectors.
|
|
184
|
+
|
|
185
|
+
The Chebyshev distance is a metric defined on a vector space where the distance between two vectors
|
|
186
|
+
is the greatest of their differences along any coordinate dimension.
|
|
187
|
+
|
|
140
188
|
Synonyms:
|
|
141
189
|
Chessboard distance
|
|
142
190
|
King-move metric
|
|
143
191
|
Maximum value distance
|
|
144
192
|
Minimax approximation
|
|
193
|
+
|
|
194
|
+
Parameters:
|
|
195
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
- The Chebyshev distance between the two vectors.
|
|
199
|
+
|
|
145
200
|
References:
|
|
146
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
201
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
147
202
|
Measures between Probability Density Functions. International
|
|
148
203
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
149
|
-
1(4), 300-307.
|
|
204
|
+
1(4), 300-307.
|
|
150
205
|
"""
|
|
151
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
206
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
152
207
|
return np.amax(np.abs(u - v))
|
|
153
208
|
|
|
154
|
-
|
|
155
209
|
def chebyshev_min(self, u, v):
|
|
156
210
|
"""
|
|
157
|
-
|
|
211
|
+
Calculate the minimum value distance between two vectors.
|
|
212
|
+
|
|
213
|
+
This measure represents a custom approach by Zielezinski to distance measurement, focusing on the minimum absolute difference.
|
|
214
|
+
|
|
215
|
+
Parameters:
|
|
216
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
- The minimum value distance between the two vectors.
|
|
158
220
|
"""
|
|
159
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
221
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
160
222
|
return np.amin(np.abs(u - v))
|
|
161
223
|
|
|
162
|
-
|
|
163
224
|
def clark(self, u, v):
|
|
164
225
|
"""
|
|
165
|
-
Clark distance.
|
|
166
|
-
|
|
226
|
+
Calculate the Clark distance between two vectors.
|
|
227
|
+
|
|
228
|
+
The Clark distance equals the square root of half of the divergence.
|
|
229
|
+
|
|
167
230
|
Notes:
|
|
168
231
|
When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
|
|
169
232
|
is used in the calculation.
|
|
233
|
+
|
|
234
|
+
Parameters:
|
|
235
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
- The Clark distance between the two vectors.
|
|
239
|
+
|
|
170
240
|
References:
|
|
171
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
241
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
172
242
|
Measures between Probability Density Functions. International
|
|
173
243
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
174
244
|
1(4), 300-307.
|
|
175
245
|
"""
|
|
176
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
177
|
-
with np.errstate(divide=
|
|
178
|
-
return np.sqrt(np.nansum(np.power(np.abs(u-v)/(u+v),2)))
|
|
179
|
-
|
|
246
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
247
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
248
|
+
return np.sqrt(np.nansum(np.power(np.abs(u - v) / (u + v), 2)))
|
|
180
249
|
|
|
181
250
|
def cosine(self, u, v):
|
|
182
251
|
"""
|
|
183
|
-
|
|
252
|
+
Calculate the cosine distance between two vectors.
|
|
253
|
+
|
|
254
|
+
Parameters:
|
|
255
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
- The cosine distance between the two vectors.
|
|
259
|
+
|
|
184
260
|
References:
|
|
185
261
|
1. SciPy.
|
|
186
262
|
"""
|
|
187
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
188
|
-
return 1 - np.dot(u, v)/(np.sqrt(np.dot(u, u))*np.sqrt(np.dot(v, v)))
|
|
189
|
-
|
|
263
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
264
|
+
return 1 - np.dot(u, v) / (np.sqrt(np.dot(u, u)) * np.sqrt(np.dot(v, v)))
|
|
190
265
|
|
|
191
266
|
def correlation_pearson(self, u, v):
|
|
192
267
|
"""
|
|
193
|
-
Pearson correlation distance.
|
|
268
|
+
Calculate the Pearson correlation distance between two vectors.
|
|
269
|
+
|
|
194
270
|
Returns a distance value between 0 and 2.
|
|
271
|
+
|
|
272
|
+
Parameters:
|
|
273
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
- The Pearson correlation distance between the two vectors.
|
|
195
277
|
"""
|
|
196
|
-
|
|
278
|
+
|
|
279
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
197
280
|
r = np.ma.corrcoef(u, v)[0, 1]
|
|
198
281
|
return 1.0 - r
|
|
199
282
|
|
|
200
|
-
|
|
201
283
|
def czekanowski(self, u, v):
|
|
202
284
|
"""
|
|
203
|
-
Czekanowski distance.
|
|
285
|
+
Calculate the Czekanowski distance between two vectors.
|
|
286
|
+
|
|
287
|
+
Parameters:
|
|
288
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
- The Czekanowski distance between the two vectors.
|
|
292
|
+
|
|
204
293
|
References:
|
|
205
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
294
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
206
295
|
Measures between Probability Density Functions. International
|
|
207
296
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
208
297
|
1(4), 300-307.
|
|
209
298
|
"""
|
|
210
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
299
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
211
300
|
return np.sum(np.abs(u - v)) / np.sum(u + v)
|
|
212
301
|
|
|
213
|
-
|
|
214
302
|
def dice(self, u, v):
|
|
215
303
|
"""
|
|
216
|
-
Dice dissimilarity.
|
|
304
|
+
Calculate the Dice dissimilarity between two vectors.
|
|
305
|
+
|
|
217
306
|
Synonyms:
|
|
218
307
|
Sorensen distance
|
|
219
|
-
|
|
308
|
+
|
|
309
|
+
Parameters:
|
|
310
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
- The Dice dissimilarity between the two vectors.
|
|
314
|
+
|
|
315
|
+
References:
|
|
220
316
|
1. Dice LR (1945) Measures of the amount of ecologic association
|
|
221
317
|
between species. Ecology. 26, 297-302.
|
|
222
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
318
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
223
319
|
Measures between Probability Density Functions. International
|
|
224
320
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
225
321
|
1(4), 300-307.
|
|
226
322
|
"""
|
|
227
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
323
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
228
324
|
u_v = u - v
|
|
229
325
|
return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
|
|
230
326
|
|
|
231
|
-
|
|
232
327
|
def divergence(self, u, v):
|
|
233
328
|
"""
|
|
234
|
-
|
|
329
|
+
Calculate the divergence between two vectors.
|
|
330
|
+
|
|
235
331
|
Divergence equals squared Clark distance multiplied by 2.
|
|
332
|
+
|
|
333
|
+
Parameters:
|
|
334
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
- The divergence between the two vectors.
|
|
338
|
+
|
|
236
339
|
References:
|
|
237
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
340
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
238
341
|
Measures between Probability Density Functions. International
|
|
239
342
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
240
343
|
1(4), 300-307.
|
|
241
344
|
"""
|
|
242
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
345
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
243
346
|
with np.errstate(invalid="ignore"):
|
|
244
|
-
return 2 * np.nansum(np.power(u-v,2) / np.power(u+v,2))
|
|
245
|
-
|
|
347
|
+
return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
|
|
246
348
|
|
|
247
349
|
def euclidean(self, u, v):
|
|
248
350
|
"""
|
|
249
|
-
Euclidean distance.
|
|
250
|
-
|
|
251
|
-
|
|
351
|
+
Calculate the Euclidean distance between two vectors.
|
|
352
|
+
|
|
353
|
+
The Euclidean distance is the "ordinary" straight-line distance between two points in Euclidean space.
|
|
354
|
+
|
|
355
|
+
Parameters:
|
|
356
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
- The Euclidean distance between the two vectors.
|
|
360
|
+
|
|
252
361
|
References:
|
|
253
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
362
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
254
363
|
Measures between Probability Density Functions. International
|
|
255
364
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
256
|
-
1(4), 300-307.
|
|
365
|
+
1(4), 300-307.
|
|
257
366
|
"""
|
|
258
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
259
|
-
return np.linalg.norm(u-v)
|
|
260
|
-
|
|
367
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
368
|
+
return np.linalg.norm(u - v)
|
|
261
369
|
|
|
262
370
|
def fidelity(self, u, v):
|
|
263
371
|
"""
|
|
264
|
-
|
|
372
|
+
Calculate the fidelity distance between two vectors.
|
|
373
|
+
|
|
374
|
+
The fidelity distance measures the similarity between two probability distributions.
|
|
375
|
+
|
|
376
|
+
Parameters:
|
|
377
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
- The fidelity distance between the two vectors.
|
|
381
|
+
|
|
265
382
|
Notes:
|
|
266
383
|
Added by SC.
|
|
267
384
|
"""
|
|
268
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
269
|
-
return 1 - (np.sum(np.sqrt(u*v)))
|
|
270
|
-
|
|
385
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
386
|
+
return 1 - (np.sum(np.sqrt(u * v)))
|
|
271
387
|
|
|
272
388
|
def google(self, u, v):
|
|
273
389
|
"""
|
|
274
|
-
Normalized Google Distance (NGD).
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
390
|
+
Calculate the Normalized Google Distance (NGD) between two vectors.
|
|
391
|
+
|
|
392
|
+
NGD is a measure of similarity derived from the number of hits returned by the Google search engine for a given set of keywords.
|
|
393
|
+
|
|
394
|
+
Parameters:
|
|
395
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
- The Normalized Google Distance between the two vectors.
|
|
399
|
+
|
|
278
400
|
Notes:
|
|
279
401
|
When used for comparing two probability density functions (pdfs),
|
|
280
402
|
Google distance equals half of Cityblock distance.
|
|
403
|
+
|
|
281
404
|
References:
|
|
282
405
|
1. Lee & Rashid (2008) Information Technology, ITSim 2008.
|
|
283
406
|
doi:10.1109/ITSIM.2008.4631601.
|
|
284
407
|
"""
|
|
285
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
408
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
286
409
|
x = float(np.sum(u))
|
|
287
410
|
y = float(np.sum(v))
|
|
288
411
|
summin = float(np.sum(np.minimum(u, v)))
|
|
289
412
|
return (max([x, y]) - summin) / ((x + y) - min([x, y]))
|
|
290
413
|
|
|
291
|
-
|
|
292
414
|
def gower(self, u, v):
|
|
293
415
|
"""
|
|
294
|
-
Gower distance.
|
|
295
|
-
|
|
416
|
+
Calculate the Gower distance between two vectors.
|
|
417
|
+
|
|
418
|
+
The Gower distance equals the Cityblock distance divided by the vector length.
|
|
419
|
+
|
|
420
|
+
Parameters:
|
|
421
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
- The Gower distance between the two vectors.
|
|
425
|
+
|
|
296
426
|
References:
|
|
297
427
|
1. Gower JC. (1971) General Coefficient of Similarity
|
|
298
428
|
and Some of Its Properties, Biometrics 27, 857-874.
|
|
299
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
429
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
300
430
|
Measures between Probability Density Functions. International
|
|
301
431
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
302
432
|
1(4), 300-307.
|
|
303
433
|
"""
|
|
304
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
434
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
305
435
|
return np.sum(np.abs(u - v)) / u.size
|
|
306
436
|
|
|
307
|
-
|
|
308
437
|
#### NEEDS CHECKING ####
|
|
309
438
|
# def harmonicmean(self, u, v):
|
|
310
439
|
# """
|
|
@@ -316,84 +445,118 @@ class Distance:
|
|
|
316
445
|
# return 1 - 2.*np.sum(u*v/(u+v))
|
|
317
446
|
#########
|
|
318
447
|
|
|
319
|
-
|
|
320
448
|
def hellinger(self, u, v):
|
|
321
449
|
"""
|
|
322
|
-
Hellinger distance.
|
|
450
|
+
Calculate the Hellinger distance between two vectors.
|
|
451
|
+
|
|
452
|
+
The Hellinger distance is a measure of similarity between two probability distributions.
|
|
453
|
+
|
|
454
|
+
Parameters:
|
|
455
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
- The Hellinger distance between the two vectors.
|
|
459
|
+
|
|
323
460
|
Notes:
|
|
324
461
|
This implementation produces values two times larger than values
|
|
325
462
|
obtained by Hellinger distance described in Wikipedia and also
|
|
326
463
|
in https://gist.github.com/larsmans/3116927.
|
|
327
|
-
|
|
328
|
-
np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)) / np.sqrt(2)
|
|
464
|
+
|
|
329
465
|
References:
|
|
330
|
-
1.
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
466
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
467
|
+
Measures between Probability Density Functions. International
|
|
468
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
469
|
+
1(4), 300-307.
|
|
334
470
|
"""
|
|
335
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
336
|
-
return np.sqrt(2*np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
|
|
337
|
-
|
|
471
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
472
|
+
return np.sqrt(2 * np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
|
|
338
473
|
|
|
339
474
|
def inner(self, u, v):
|
|
340
475
|
"""
|
|
341
|
-
|
|
476
|
+
Calculate the inner product distance between two vectors.
|
|
477
|
+
|
|
478
|
+
The inner product distance is a measure of similarity between two vectors, based on their inner product.
|
|
479
|
+
|
|
480
|
+
Parameters:
|
|
481
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
- The inner product distance between the two vectors.
|
|
485
|
+
|
|
342
486
|
Notes:
|
|
343
487
|
Added by SC.
|
|
344
488
|
"""
|
|
345
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
489
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
346
490
|
return 1 - np.dot(u, v)
|
|
347
491
|
|
|
348
|
-
|
|
349
492
|
def jaccard(self, u, v):
|
|
350
493
|
"""
|
|
351
|
-
Jaccard distance.
|
|
494
|
+
Calculate the Jaccard distance between two vectors.
|
|
495
|
+
|
|
496
|
+
The Jaccard distance measures dissimilarity between sample sets.
|
|
497
|
+
|
|
498
|
+
Parameters:
|
|
499
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
- The Jaccard distance between the two vectors.
|
|
503
|
+
|
|
352
504
|
References:
|
|
353
|
-
1.
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
505
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
506
|
+
Measures between Probability Density Functions. International
|
|
507
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
508
|
+
1(4), 300-307.
|
|
357
509
|
"""
|
|
358
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
510
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
359
511
|
uv = np.dot(u, v)
|
|
360
512
|
return 1 - (uv / (np.dot(u, u) + np.dot(v, v) - uv))
|
|
361
513
|
|
|
362
|
-
|
|
363
514
|
def jeffreys(self, u, v):
|
|
364
515
|
"""
|
|
365
|
-
Jeffreys divergence.
|
|
366
|
-
|
|
367
|
-
|
|
516
|
+
Calculate the Jeffreys divergence between two vectors.
|
|
517
|
+
|
|
518
|
+
The Jeffreys divergence is a symmetric version of the Kullback-Leibler divergence.
|
|
519
|
+
|
|
520
|
+
Parameters:
|
|
521
|
+
- u, v: Input vectors between which the divergence is to be calculated.
|
|
522
|
+
|
|
523
|
+
Returns:
|
|
524
|
+
- The Jeffreys divergence between the two vectors.
|
|
525
|
+
|
|
368
526
|
References:
|
|
369
527
|
1. Jeffreys H (1946) An Invariant Form for the Prior Probability
|
|
370
528
|
in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
|
|
371
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
529
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
372
530
|
Measures between Probability Density Functions. International
|
|
373
531
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
374
532
|
1(4), 300-307.
|
|
375
533
|
"""
|
|
376
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
534
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
377
535
|
# Add epsilon to zeros in vectors to avoid division
|
|
378
536
|
# by 0 and/or log of 0. Alternatively, zeros in the
|
|
379
537
|
# vectors could be ignored or masked (see below).
|
|
380
538
|
# u = ma.masked_where(u == 0, u)
|
|
381
539
|
# v = ma.masked_where(v == 0, u)
|
|
382
|
-
u = np.where(u==0, self.epsilon, u)
|
|
383
|
-
v = np.where(v==0, self.epsilon, v)
|
|
384
|
-
return np.sum((u-v) * np.log(u / v))
|
|
385
|
-
|
|
540
|
+
u = np.where(u == 0, self.epsilon, u)
|
|
541
|
+
v = np.where(v == 0, self.epsilon, v)
|
|
542
|
+
return np.sum((u - v) * np.log(u / v))
|
|
386
543
|
|
|
387
544
|
def jensenshannon_divergence(self, u, v):
|
|
388
545
|
"""
|
|
389
|
-
Jensen-Shannon divergence.
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
546
|
+
Calculate the Jensen-Shannon divergence between two vectors.
|
|
547
|
+
|
|
548
|
+
The Jensen-Shannon divergence is a symmetric and finite measure of similarity between two probability distributions.
|
|
549
|
+
|
|
550
|
+
Parameters:
|
|
551
|
+
- u, v: Input vectors between which the divergence is to be calculated.
|
|
552
|
+
|
|
553
|
+
Returns:
|
|
554
|
+
- The Jensen-Shannon divergence between the two vectors.
|
|
555
|
+
|
|
393
556
|
References:
|
|
394
557
|
1. Lin J. (1991) Divergence measures based on the Shannon entropy.
|
|
395
558
|
IEEE Transactions on Information Theory, 37(1):145–151.
|
|
396
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
559
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
397
560
|
Measures between Probability Density Functions. International
|
|
398
561
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
399
562
|
1(4), 300-307.
|
|
@@ -404,523 +567,718 @@ class Distance:
|
|
|
404
567
|
el1 = (u * np.log(u) + v * np.log(v)) / 2
|
|
405
568
|
el2 = (u + v)/2
|
|
406
569
|
el3 = np.log(el2)
|
|
407
|
-
return np.sum(el1 - el2 * el3)
|
|
570
|
+
return np.sum(el1 - el2 * el3)
|
|
408
571
|
"""
|
|
409
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
410
|
-
u = np.where(u==0, self.epsilon, u)
|
|
411
|
-
v = np.where(v==0, self.epsilon, v)
|
|
412
|
-
dl = u * np.log(2*u/(u+v))
|
|
413
|
-
dr = v * np.log(2*v/(u+v))
|
|
572
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
573
|
+
u = np.where(u == 0, self.epsilon, u)
|
|
574
|
+
v = np.where(v == 0, self.epsilon, v)
|
|
575
|
+
dl = u * np.log(2 * u / (u + v))
|
|
576
|
+
dr = v * np.log(2 * v / (u + v))
|
|
414
577
|
return (np.sum(dl) + np.sum(dr)) / 2
|
|
415
578
|
|
|
416
|
-
|
|
417
579
|
def jensen_difference(self, u, v):
|
|
418
580
|
"""
|
|
419
|
-
Jensen difference
|
|
420
|
-
|
|
421
|
-
|
|
581
|
+
Calculate the Jensen difference between two vectors.
|
|
582
|
+
|
|
583
|
+
The Jensen difference is considered similar to the Jensen-Shannon divergence.
|
|
584
|
+
|
|
585
|
+
Parameters:
|
|
586
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
587
|
+
|
|
588
|
+
Returns:
|
|
589
|
+
- The Jensen difference between the two vectors.
|
|
590
|
+
|
|
591
|
+
Notes:
|
|
592
|
+
1. Equals half of Topsøe distance
|
|
593
|
+
2. Equals squared jensenshannon_distance.
|
|
594
|
+
|
|
595
|
+
|
|
422
596
|
References:
|
|
423
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
597
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
424
598
|
Measures between Probability Density Functions. International
|
|
425
599
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
426
|
-
1(4), 300-307.
|
|
600
|
+
1(4), 300-307.
|
|
427
601
|
"""
|
|
428
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
429
|
-
u = np.where(u==0, self.epsilon, u)
|
|
430
|
-
v = np.where(v==0, self.epsilon, v)
|
|
602
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
603
|
+
u = np.where(u == 0, self.epsilon, u)
|
|
604
|
+
v = np.where(v == 0, self.epsilon, v)
|
|
431
605
|
el1 = (u * np.log(u) + v * np.log(v)) / 2
|
|
432
606
|
el2 = (u + v) / 2
|
|
433
607
|
return np.sum(el1 - el2 * np.log(el2))
|
|
434
608
|
|
|
435
|
-
|
|
436
609
|
def k_divergence(self, u, v):
|
|
437
610
|
"""
|
|
438
|
-
K divergence.
|
|
611
|
+
Calculate the K divergence between two vectors.
|
|
612
|
+
|
|
613
|
+
Parameters:
|
|
614
|
+
- u, v: Input vectors between which the divergence is to be calculated.
|
|
615
|
+
|
|
616
|
+
Returns:
|
|
617
|
+
- The K divergence between the two vectors.
|
|
618
|
+
|
|
439
619
|
References:
|
|
440
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
620
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
441
621
|
Measures between Probability Density Functions. International
|
|
442
622
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
443
623
|
1(4), 300-307.
|
|
444
624
|
"""
|
|
445
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
446
|
-
u = np.where(u==0, self.epsilon, u)
|
|
447
|
-
v = np.where(v==0, self.epsilon, v)
|
|
448
|
-
return np.sum(u*np.log(2*u/(u+v)))
|
|
449
|
-
|
|
625
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
626
|
+
u = np.where(u == 0, self.epsilon, u)
|
|
627
|
+
v = np.where(v == 0, self.epsilon, v)
|
|
628
|
+
return np.sum(u * np.log(2 * u / (u + v)))
|
|
450
629
|
|
|
451
630
|
def kl_divergence(self, u, v):
|
|
452
631
|
"""
|
|
453
|
-
Kullback-Leibler divergence.
|
|
454
|
-
|
|
455
|
-
|
|
632
|
+
Calculate the Kullback-Leibler divergence between two vectors.
|
|
633
|
+
|
|
634
|
+
The Kullback-Leibler divergence measures the difference between two probability distributions.
|
|
635
|
+
|
|
636
|
+
Parameters:
|
|
637
|
+
- u, v: Input vectors between which the divergence is to be calculated.
|
|
638
|
+
|
|
639
|
+
Returns:
|
|
640
|
+
- The Kullback-Leibler divergence between the two vectors.
|
|
641
|
+
|
|
456
642
|
References:
|
|
457
643
|
1. Kullback S, Leibler RA (1951) On information and sufficiency.
|
|
458
644
|
Ann. Math. Statist. 22:79–86
|
|
459
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
645
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
460
646
|
Measures between Probability Density Functions. International
|
|
461
647
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
462
648
|
1(4):300-307.
|
|
463
649
|
"""
|
|
464
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
465
|
-
u = np.where(u==0, self.epsilon, u)
|
|
466
|
-
v = np.where(v==0, self.epsilon, v)
|
|
650
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
651
|
+
u = np.where(u == 0, self.epsilon, u)
|
|
652
|
+
v = np.where(v == 0, self.epsilon, v)
|
|
467
653
|
return np.sum(u * np.log(u / v))
|
|
468
654
|
|
|
469
|
-
|
|
470
655
|
def kulczynski(self, u, v):
|
|
471
656
|
"""
|
|
472
|
-
Kulczynski distance.
|
|
657
|
+
Calculate the Kulczynski distance between two vectors.
|
|
658
|
+
|
|
659
|
+
Parameters:
|
|
660
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
661
|
+
|
|
662
|
+
Returns:
|
|
663
|
+
- The Kulczynski distance between the two vectors.
|
|
664
|
+
|
|
473
665
|
References:
|
|
474
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
666
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
475
667
|
Measures between Probability Density Functions. International
|
|
476
668
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
477
669
|
1(4):300-307.
|
|
478
670
|
"""
|
|
479
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
671
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
480
672
|
return np.sum(np.abs(u - v)) / np.sum(np.minimum(u, v))
|
|
481
673
|
|
|
482
|
-
|
|
483
674
|
def kumarjohnson(self, u, v):
|
|
484
675
|
"""
|
|
485
|
-
Kumar-Johnson distance.
|
|
676
|
+
Calculate the Kumar-Johnson distance between two vectors.
|
|
677
|
+
|
|
678
|
+
Parameters:
|
|
679
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
680
|
+
|
|
681
|
+
Returns:
|
|
682
|
+
- The Kumar-Johnson distance between the two vectors.
|
|
683
|
+
|
|
486
684
|
References:
|
|
487
685
|
1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
|
|
488
686
|
and information inequalities, Journal of Inequalities in pure
|
|
489
687
|
and applied Mathematics. 6(3).
|
|
490
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
688
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
491
689
|
Measures between Probability Density Functions. International
|
|
492
690
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
493
691
|
1(4):300-307.
|
|
494
692
|
"""
|
|
495
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
496
|
-
uvmult = u*v
|
|
497
|
-
with np.errstate(divide=
|
|
693
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
694
|
+
uvmult = u * v
|
|
695
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
498
696
|
numer = np.power(u**2 - v**2, 2)
|
|
499
|
-
denom = 2 * np.power(uvmult, 3/2)
|
|
500
|
-
return np.sum(np.where(uvmult != 0, numer/denom, 0))
|
|
501
|
-
|
|
697
|
+
denom = 2 * np.power(uvmult, 3 / 2)
|
|
698
|
+
return np.sum(np.where(uvmult != 0, numer / denom, 0))
|
|
502
699
|
|
|
503
700
|
def lorentzian(self, u, v):
|
|
504
701
|
"""
|
|
505
|
-
Lorentzian distance.
|
|
702
|
+
Calculate the Lorentzian distance between two vectors.
|
|
703
|
+
|
|
704
|
+
Parameters:
|
|
705
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
706
|
+
|
|
707
|
+
Returns:
|
|
708
|
+
- The Lorentzian distance between the two vectors.
|
|
709
|
+
|
|
506
710
|
References:
|
|
507
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
711
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
508
712
|
Measures between Probability Density Functions. International
|
|
509
713
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
510
|
-
1(4):300-307.
|
|
714
|
+
1(4):300-307.
|
|
715
|
+
|
|
511
716
|
Notes:
|
|
512
|
-
One (1) is added to guarantee the non-negativity property and to
|
|
513
|
-
eschew the log of zero
|
|
717
|
+
One (1) is added to guarantee the non-negativity property and to
|
|
718
|
+
eschew the log of zero.
|
|
514
719
|
"""
|
|
515
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
516
|
-
return np.sum(np.log(np.abs(u-v)+1))
|
|
517
|
-
|
|
720
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
721
|
+
return np.sum(np.log(np.abs(u - v) + 1))
|
|
518
722
|
|
|
519
723
|
def cityblock(self, u, v):
|
|
520
724
|
"""
|
|
521
|
-
Manhattan distance.
|
|
725
|
+
Calculate the Cityblock (Manhattan) distance between two vectors.
|
|
726
|
+
|
|
727
|
+
Parameters:
|
|
728
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
729
|
+
|
|
730
|
+
Returns:
|
|
731
|
+
- The Cityblock distance between the two vectors.
|
|
732
|
+
|
|
733
|
+
References:
|
|
734
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
735
|
+
Measures between Probability Density Functions. International
|
|
736
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
737
|
+
1(4):300-307.
|
|
738
|
+
|
|
522
739
|
Synonyms:
|
|
523
740
|
City block distance
|
|
524
741
|
Manhattan distance
|
|
525
742
|
Rectilinear distance
|
|
526
743
|
Taxicab norm
|
|
744
|
+
|
|
527
745
|
Notes:
|
|
528
|
-
Cityblock distance between two probability density functions
|
|
746
|
+
Cityblock distance between two probability density functions
|
|
529
747
|
(pdfs) equals:
|
|
530
748
|
1. Non-intersection distance multiplied by 2.
|
|
531
749
|
2. Gower distance multiplied by vector length.
|
|
532
750
|
3. Bray-Curtis distance multiplied by 2.
|
|
533
751
|
4. Google distance multiplied by 2.
|
|
534
|
-
References:
|
|
535
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
536
|
-
Measures between Probability Density Functions. International
|
|
537
|
-
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
538
|
-
1(4):300-307.
|
|
539
752
|
"""
|
|
540
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
753
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
541
754
|
return np.sum(np.abs(u - v))
|
|
542
755
|
|
|
543
|
-
|
|
544
756
|
def marylandbridge(self, u, v):
|
|
545
757
|
"""
|
|
546
|
-
Maryland Bridge distance.
|
|
758
|
+
Calculate the Maryland Bridge distance between two vectors.
|
|
759
|
+
|
|
760
|
+
Parameters:
|
|
761
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
762
|
+
|
|
763
|
+
Returns:
|
|
764
|
+
- The Maryland Bridge distance between the two vectors.
|
|
765
|
+
|
|
547
766
|
References:
|
|
548
|
-
1. Deza M, Deza E (2009) Encyclopedia of Distances.
|
|
767
|
+
1. Deza M, Deza E (2009) Encyclopedia of Distances.
|
|
549
768
|
Springer-Verlag Berlin Heidelberg. 1-590.
|
|
550
769
|
"""
|
|
551
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
770
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
552
771
|
uvdot = np.dot(u, v)
|
|
553
|
-
return 1 - (uvdot/np.dot(u, u) + uvdot/np.dot(v, v))/2
|
|
554
|
-
|
|
772
|
+
return 1 - (uvdot / np.dot(u, u) + uvdot / np.dot(v, v)) / 2
|
|
555
773
|
|
|
556
774
|
def matusita(self, u, v):
|
|
557
775
|
"""
|
|
558
|
-
Matusita distance.
|
|
559
|
-
|
|
560
|
-
|
|
776
|
+
Calculate the Matusita distance between two vectors.
|
|
777
|
+
|
|
778
|
+
Parameters:
|
|
779
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
780
|
+
|
|
781
|
+
Returns:
|
|
782
|
+
- The Matusita distance between the two vectors.
|
|
783
|
+
|
|
561
784
|
References:
|
|
562
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
785
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
563
786
|
Measures between Probability Density Functions. International
|
|
564
787
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
565
788
|
1(4):300-307.
|
|
566
|
-
"""
|
|
567
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
568
|
-
return np.sqrt(np.sum((np.sqrt(u)-np.sqrt(v))**2))
|
|
569
789
|
|
|
790
|
+
Notes:
|
|
791
|
+
Equals square root of Squared-chord distance.
|
|
792
|
+
"""
|
|
793
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
794
|
+
return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
|
|
570
795
|
|
|
571
796
|
def max_symmetric_chisq(self, u, v):
|
|
572
797
|
"""
|
|
573
|
-
|
|
798
|
+
Calculate the maximum symmetric chi-square distance between two vectors.
|
|
799
|
+
|
|
800
|
+
Parameters:
|
|
801
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
802
|
+
|
|
803
|
+
Returns:
|
|
804
|
+
- The maximum symmetric chi-square distance between the two vectors.
|
|
805
|
+
|
|
574
806
|
References:
|
|
575
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
807
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
576
808
|
Measures between Probability Density Functions. International
|
|
577
809
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
578
810
|
1(4):300-307.
|
|
579
811
|
"""
|
|
580
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
812
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
581
813
|
return max(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
|
|
582
814
|
|
|
583
|
-
|
|
584
815
|
def min_symmetric_chisq(self, u, v):
|
|
585
816
|
"""
|
|
586
|
-
|
|
817
|
+
Calculate the minimum symmetric chi-square distance between two vectors.
|
|
818
|
+
|
|
819
|
+
Parameters:
|
|
820
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
821
|
+
|
|
822
|
+
Returns:
|
|
823
|
+
- The minimum symmetric chi-square distance between the two vectors.
|
|
824
|
+
|
|
587
825
|
Notes:
|
|
588
826
|
Added by SC.
|
|
589
827
|
"""
|
|
590
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
828
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
591
829
|
return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
|
|
592
830
|
|
|
593
|
-
|
|
594
831
|
def meehl(self, u, v):
|
|
595
832
|
"""
|
|
596
|
-
|
|
833
|
+
Calculate the Meehl distance between two vectors.
|
|
834
|
+
|
|
835
|
+
Parameters:
|
|
836
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
837
|
+
|
|
838
|
+
Returns:
|
|
839
|
+
- The Meehl distance between the two vectors.
|
|
840
|
+
|
|
597
841
|
Notes:
|
|
598
842
|
Added by SC.
|
|
843
|
+
|
|
599
844
|
References:
|
|
600
845
|
1. Deza M. and Deza E. (2013) Encyclopedia of Distances.
|
|
601
|
-
Berlin, Heidelberg: Springer Berlin Heidelberg.
|
|
846
|
+
Berlin, Heidelberg: Springer Berlin Heidelberg.
|
|
602
847
|
https://doi.org/10.1007/978-3-642-30958-8.
|
|
603
848
|
"""
|
|
604
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
849
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
605
850
|
|
|
606
851
|
xi = u[:-1]
|
|
607
852
|
yi = v[:-1]
|
|
608
|
-
xiplus1 = np.roll(u,1)[:-1]
|
|
609
|
-
yiplus1 = np.roll(v,1)[:-1]
|
|
610
|
-
|
|
611
|
-
with np.errstate(divide='ignore', invalid="ignore"):
|
|
612
|
-
return np.nansum((xi - yi - xiplus1 + yiplus1)**2)
|
|
853
|
+
xiplus1 = np.roll(u, 1)[:-1]
|
|
854
|
+
yiplus1 = np.roll(v, 1)[:-1]
|
|
613
855
|
|
|
856
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
857
|
+
return np.nansum((xi - yi - xiplus1 + yiplus1) ** 2)
|
|
614
858
|
|
|
615
859
|
def minkowski(self, u, v, p=2):
|
|
616
860
|
"""
|
|
617
|
-
Minkowski distance.
|
|
861
|
+
Calculate the Minkowski distance between two vectors.
|
|
862
|
+
|
|
618
863
|
Parameters:
|
|
619
|
-
|
|
620
|
-
|
|
864
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
865
|
+
- p: The order of the norm of the difference.
|
|
866
|
+
|
|
867
|
+
Returns:
|
|
868
|
+
- The Minkowski distance between the two vectors.
|
|
869
|
+
|
|
621
870
|
Notes:
|
|
622
871
|
When p goes to infinite, the Chebyshev distance is derived.
|
|
872
|
+
|
|
623
873
|
References:
|
|
624
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
874
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
625
875
|
Measures between Probability Density Functions. International
|
|
626
876
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
627
877
|
1(4):300-307.
|
|
628
878
|
"""
|
|
629
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
879
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
630
880
|
return np.linalg.norm(u - v, ord=p)
|
|
631
881
|
|
|
632
|
-
|
|
633
882
|
def motyka(self, u, v):
|
|
634
883
|
"""
|
|
635
|
-
Motyka distance.
|
|
884
|
+
Calculate the Motyka distance between two vectors.
|
|
885
|
+
|
|
886
|
+
Parameters:
|
|
887
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
888
|
+
|
|
889
|
+
Returns:
|
|
890
|
+
- The Motyka distance between the two vectors.
|
|
891
|
+
|
|
636
892
|
Notes:
|
|
637
893
|
The distance between identical vectors is not equal to 0 but 0.5.
|
|
638
|
-
|
|
894
|
+
|
|
639
895
|
References:
|
|
640
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
896
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
641
897
|
Measures between Probability Density Functions. International
|
|
642
898
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
643
899
|
1(4), 300-307.
|
|
644
900
|
"""
|
|
645
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
901
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
646
902
|
return np.sum(np.maximum(u, v)) / np.sum(u + v)
|
|
647
903
|
|
|
648
|
-
|
|
649
904
|
def neyman_chisq(self, u, v):
|
|
650
905
|
"""
|
|
651
|
-
Neyman chi-square distance.
|
|
906
|
+
Calculate the Neyman chi-square distance between two vectors.
|
|
907
|
+
|
|
908
|
+
Parameters:
|
|
909
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
910
|
+
|
|
911
|
+
Returns:
|
|
912
|
+
- The Neyman chi-square distance between the two vectors.
|
|
913
|
+
|
|
652
914
|
References:
|
|
653
|
-
1. Neyman J (1949) Contributions to the theory of the chi^2 test.
|
|
915
|
+
1. Neyman J (1949) Contributions to the theory of the chi^2 test.
|
|
654
916
|
In Proceedings of the First Berkley Symposium on Mathematical
|
|
655
917
|
Statistics and Probability.
|
|
656
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
918
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
657
919
|
Measures between Probability Density Functions. International
|
|
658
920
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
659
921
|
1(4), 300-307.
|
|
660
922
|
"""
|
|
661
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
662
|
-
with np.errstate(divide=
|
|
663
|
-
return np.sum(np.where(u != 0, (u-v)**2/u, 0))
|
|
664
|
-
|
|
923
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
924
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
925
|
+
return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
|
|
665
926
|
|
|
666
927
|
def nonintersection(self, u, v):
|
|
667
928
|
"""
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
929
|
+
Calculate the Nonintersection distance between two vectors.
|
|
930
|
+
|
|
931
|
+
Parameters:
|
|
932
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
933
|
+
|
|
934
|
+
Returns:
|
|
935
|
+
- The Nonintersection distance between the two vectors.
|
|
936
|
+
|
|
675
937
|
References:
|
|
676
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
938
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
677
939
|
Measures between Probability Density Functions. International
|
|
678
940
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
679
941
|
1(4), 300-307.
|
|
942
|
+
|
|
943
|
+
Notes:
|
|
944
|
+
When used for comparing two probability density functions (pdfs),
|
|
945
|
+
Nonintersection distance equals half of Cityblock distance.
|
|
680
946
|
"""
|
|
681
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
947
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
682
948
|
return 1 - np.sum(np.minimum(u, v))
|
|
683
949
|
|
|
684
|
-
|
|
685
950
|
def pearson_chisq(self, u, v):
|
|
686
951
|
"""
|
|
687
|
-
Pearson chi-square divergence.
|
|
688
|
-
|
|
689
|
-
|
|
952
|
+
Calculate the Pearson chi-square divergence between two vectors.
|
|
953
|
+
|
|
954
|
+
Parameters:
|
|
955
|
+
- u, v: Input vectors between which the divergence is to be calculated.
|
|
956
|
+
|
|
957
|
+
Returns:
|
|
958
|
+
- The Pearson chi-square divergence between the two vectors.
|
|
959
|
+
|
|
690
960
|
References:
|
|
691
|
-
1. Pearson K. (1900) On the Criterion that a given system of
|
|
961
|
+
1. Pearson K. (1900) On the Criterion that a given system of
|
|
692
962
|
deviations from the probable in the case of correlated system
|
|
693
963
|
of variables is such that it can be reasonable supposed to have
|
|
694
964
|
arisen from random sampling, Phil. Mag. 50, 157-172.
|
|
695
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
965
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
696
966
|
Measures between Probability Density Functions. International
|
|
697
967
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
698
968
|
1(4), 300-307.
|
|
699
|
-
"""
|
|
700
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
701
|
-
with np.errstate(divide='ignore', invalid='ignore'):
|
|
702
|
-
return np.sum(np.where(v != 0, (u-v)**2/v, 0))
|
|
703
969
|
|
|
970
|
+
Notes:
|
|
971
|
+
Pearson chi-square divergence is asymmetric.
|
|
972
|
+
"""
|
|
973
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
974
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
975
|
+
return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
|
|
704
976
|
|
|
705
977
|
def penroseshape(self, u, v):
|
|
706
978
|
"""
|
|
707
|
-
Penrose shape distance.
|
|
979
|
+
Calculate the Penrose shape distance between two vectors.
|
|
980
|
+
|
|
981
|
+
Parameters:
|
|
982
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
983
|
+
|
|
984
|
+
Returns:
|
|
985
|
+
- The Penrose shape distance between the two vectors.
|
|
986
|
+
|
|
708
987
|
References:
|
|
709
|
-
1. Deza M, Deza E (2009) Encyclopedia of Distances.
|
|
988
|
+
1. Deza M, Deza E (2009) Encyclopedia of Distances.
|
|
710
989
|
Springer-Verlag Berlin Heidelberg. 1-590.
|
|
711
990
|
"""
|
|
712
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
991
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
713
992
|
umu = np.mean(u)
|
|
714
993
|
vmu = np.mean(v)
|
|
715
|
-
return np.sqrt(np.sum(((u-umu)-(v-vmu))**2))
|
|
716
|
-
|
|
994
|
+
return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
|
|
717
995
|
|
|
718
996
|
def prob_chisq(self, u, v):
|
|
719
997
|
"""
|
|
720
|
-
Probabilistic chi-square distance.
|
|
998
|
+
Calculate the Probabilistic chi-square distance between two vectors.
|
|
999
|
+
|
|
1000
|
+
Parameters:
|
|
1001
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1002
|
+
|
|
1003
|
+
Returns:
|
|
1004
|
+
- The Probabilistic chi-square distance between the two vectors.
|
|
1005
|
+
|
|
721
1006
|
Notes:
|
|
722
1007
|
Added by SC.
|
|
723
1008
|
"""
|
|
724
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
1009
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
725
1010
|
uvsum = u + v
|
|
726
|
-
with np.errstate(divide=
|
|
727
|
-
return 2*np.sum(np.where(uvsum != 0, (u-v)**2/uvsum, 0))
|
|
728
|
-
|
|
1011
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1012
|
+
return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
|
|
729
1013
|
|
|
730
1014
|
def ruzicka(self, u, v):
|
|
731
1015
|
"""
|
|
732
|
-
Ruzicka distance.
|
|
1016
|
+
Calculate the Ruzicka distance between two vectors.
|
|
1017
|
+
|
|
1018
|
+
Parameters:
|
|
1019
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1020
|
+
|
|
1021
|
+
Returns:
|
|
1022
|
+
- The Ruzicka distance between the two vectors.
|
|
1023
|
+
|
|
733
1024
|
Notes:
|
|
734
|
-
Added by SC.
|
|
1025
|
+
Added by SC.
|
|
735
1026
|
"""
|
|
736
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
1027
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
737
1028
|
den = np.sum(np.maximum(u, v))
|
|
738
|
-
|
|
739
|
-
return 1 - np.sum(np.minimum(u, v)) / den
|
|
740
1029
|
|
|
1030
|
+
return 1 - np.sum(np.minimum(u, v)) / den
|
|
741
1031
|
|
|
742
1032
|
def sorensen(self, u, v):
|
|
743
1033
|
"""
|
|
744
|
-
Sorensen distance.
|
|
745
|
-
|
|
1034
|
+
Calculate the Sorensen distance between two vectors.
|
|
1035
|
+
|
|
1036
|
+
Parameters:
|
|
1037
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1038
|
+
|
|
1039
|
+
Returns:
|
|
1040
|
+
- The Sorensen distance between the two vectors.
|
|
1041
|
+
|
|
746
1042
|
Notes:
|
|
1043
|
+
The Sorensen distance equals the Manhattan distance divided by the sum of the two vectors.
|
|
1044
|
+
|
|
747
1045
|
Added by SC.
|
|
748
1046
|
"""
|
|
749
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
1047
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
750
1048
|
return np.sum(np.abs(u - v)) / np.sum(u + v)
|
|
751
1049
|
|
|
752
|
-
|
|
753
1050
|
def soergel(self, u, v):
|
|
754
1051
|
"""
|
|
755
|
-
Soergel distance.
|
|
1052
|
+
Calculate the Soergel distance between two vectors.
|
|
1053
|
+
|
|
1054
|
+
Parameters:
|
|
1055
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1056
|
+
|
|
1057
|
+
Returns:
|
|
1058
|
+
- The Soergel distance between the two vectors.
|
|
1059
|
+
|
|
756
1060
|
Notes:
|
|
757
1061
|
Equals Tanimoto distance.
|
|
1062
|
+
|
|
758
1063
|
References:
|
|
759
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1064
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
760
1065
|
Measures between Probability Density Functions. International
|
|
761
1066
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
762
1067
|
1(4), 300-307.
|
|
763
1068
|
"""
|
|
764
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
1069
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
765
1070
|
return np.sum(np.abs(u - v)) / np.sum(np.maximum(u, v))
|
|
766
1071
|
|
|
767
|
-
|
|
768
1072
|
def squared_chisq(self, u, v):
|
|
769
1073
|
"""
|
|
770
|
-
Squared chi-square distance.
|
|
771
|
-
|
|
772
|
-
|
|
1074
|
+
Calculate the Squared chi-square distance between two vectors.
|
|
1075
|
+
|
|
1076
|
+
Parameters:
|
|
1077
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1078
|
+
|
|
1079
|
+
Returns:
|
|
1080
|
+
- The Squared chi-square distance between the two vectors.
|
|
1081
|
+
|
|
773
1082
|
References:
|
|
774
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1083
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
775
1084
|
Measures between Probability Density Functions. International
|
|
776
1085
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
777
1086
|
1(4), 300-307.
|
|
778
1087
|
"""
|
|
779
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
1088
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
780
1089
|
uvsum = u + v
|
|
781
|
-
with np.errstate(divide=
|
|
782
|
-
return np.sum(np.where(uvsum != 0, (u-v)**2/uvsum, 0))
|
|
783
|
-
|
|
1090
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1091
|
+
return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
|
|
784
1092
|
|
|
785
1093
|
def squaredchord(self, u, v):
|
|
786
1094
|
"""
|
|
787
|
-
Squared-chord distance.
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
1095
|
+
Calculate the Squared-chord distance between two vectors.
|
|
1096
|
+
|
|
1097
|
+
Parameters:
|
|
1098
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1099
|
+
|
|
1100
|
+
Returns:
|
|
1101
|
+
- The Squared-chord distance between the two vectors.
|
|
1102
|
+
|
|
1103
|
+
References:
|
|
1104
|
+
1. Gavin DG et al. (2003) A statistical approach to evaluating
|
|
792
1105
|
distance metrics and analog assignments for pollen records.
|
|
793
1106
|
Quaternary Research 60:356–367.
|
|
794
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1107
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
795
1108
|
Measures between Probability Density Functions. International
|
|
796
1109
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
797
1110
|
1(4), 300-307.
|
|
798
|
-
|
|
799
|
-
"""
|
|
800
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
801
|
-
return np.sum((np.sqrt(u) - np.sqrt(v))**2)
|
|
802
1111
|
|
|
1112
|
+
Notes:
|
|
1113
|
+
Equals to squared Matusita distance.
|
|
1114
|
+
"""
|
|
1115
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1116
|
+
return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
|
|
803
1117
|
|
|
804
1118
|
def squared_euclidean(self, u, v):
|
|
805
1119
|
"""
|
|
806
|
-
Squared Euclidean distance.
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
1120
|
+
Calculate the Squared Euclidean distance between two vectors.
|
|
1121
|
+
|
|
1122
|
+
Parameters:
|
|
1123
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1124
|
+
|
|
1125
|
+
Returns:
|
|
1126
|
+
- The Squared Euclidean distance between the two vectors.
|
|
1127
|
+
|
|
1128
|
+
References:
|
|
1129
|
+
1. Gavin DG et al. (2003) A statistical approach to evaluating
|
|
811
1130
|
distance metrics and analog assignments for pollen records.
|
|
812
1131
|
Quaternary Research 60:356–367.
|
|
813
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1132
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
814
1133
|
Measures between Probability Density Functions. International
|
|
815
1134
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
816
1135
|
1(4), 300-307.
|
|
1136
|
+
|
|
1137
|
+
Notes:
|
|
1138
|
+
Equals to squared Euclidean distance.
|
|
817
1139
|
"""
|
|
818
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
1140
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
819
1141
|
return np.dot((u - v), (u - v))
|
|
820
1142
|
|
|
821
|
-
|
|
822
1143
|
def taneja(self, u, v):
|
|
823
1144
|
"""
|
|
824
|
-
Taneja distance.
|
|
1145
|
+
Calculate the Taneja distance between two vectors.
|
|
1146
|
+
|
|
1147
|
+
Parameters:
|
|
1148
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1149
|
+
|
|
1150
|
+
Returns:
|
|
1151
|
+
- The Taneja distance between the two vectors.
|
|
1152
|
+
|
|
825
1153
|
References:
|
|
826
1154
|
1. Taneja IJ. (1995), New Developments in Generalized Information
|
|
827
1155
|
Measures, Chapter in: Advances in Imaging and Electron Physics,
|
|
828
1156
|
Ed. P.W. Hawkes, 91, 37-135.
|
|
829
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1157
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
830
1158
|
Measures between Probability Density Functions. International
|
|
831
1159
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
832
1160
|
1(4), 300-307.
|
|
833
1161
|
"""
|
|
834
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
835
|
-
u = np.where(u==0, self.epsilon, u)
|
|
836
|
-
v = np.where(v==0, self.epsilon, v)
|
|
1162
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1163
|
+
u = np.where(u == 0, self.epsilon, u)
|
|
1164
|
+
v = np.where(v == 0, self.epsilon, v)
|
|
837
1165
|
uvsum = u + v
|
|
838
|
-
return np.sum((uvsum/2)*np.log(uvsum/(2*np.sqrt(u*v))))
|
|
839
|
-
|
|
1166
|
+
return np.sum((uvsum / 2) * np.log(uvsum / (2 * np.sqrt(u * v))))
|
|
840
1167
|
|
|
841
1168
|
def tanimoto(self, u, v):
|
|
842
1169
|
"""
|
|
843
|
-
Tanimoto distance.
|
|
844
|
-
|
|
845
|
-
|
|
1170
|
+
Calculate the Tanimoto distance between two vectors.
|
|
1171
|
+
|
|
1172
|
+
Parameters:
|
|
1173
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1174
|
+
|
|
1175
|
+
Returns:
|
|
1176
|
+
- The Tanimoto distance between the two vectors.
|
|
1177
|
+
|
|
846
1178
|
References:
|
|
847
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1179
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
848
1180
|
Measures between Probability Density Functions. International
|
|
849
1181
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
850
|
-
1(4), 300-307
|
|
1182
|
+
1(4), 300-307.
|
|
1183
|
+
|
|
1184
|
+
Notes:
|
|
1185
|
+
Equals Soergel distance.
|
|
851
1186
|
"""
|
|
852
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
853
|
-
#return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
|
|
1187
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1188
|
+
# return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
|
|
854
1189
|
usum = np.sum(u)
|
|
855
1190
|
vsum = np.sum(v)
|
|
856
1191
|
minsum = np.sum(np.minimum(u, v))
|
|
857
|
-
return (usum + vsum - 2*minsum) / (usum + vsum - minsum)
|
|
858
|
-
|
|
1192
|
+
return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
|
|
859
1193
|
|
|
860
1194
|
def topsoe(self, u, v):
|
|
861
1195
|
"""
|
|
862
|
-
Topsøe distance.
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
1196
|
+
Calculate the Topsøe distance between two vectors.
|
|
1197
|
+
|
|
1198
|
+
Parameters:
|
|
1199
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1200
|
+
|
|
1201
|
+
Returns:
|
|
1202
|
+
- The Topsøe distance between the two vectors.
|
|
1203
|
+
|
|
867
1204
|
References:
|
|
868
|
-
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1205
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
869
1206
|
Measures between Probability Density Functions. International
|
|
870
1207
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
871
|
-
1(4), 300-307
|
|
1208
|
+
1(4), 300-307.
|
|
1209
|
+
|
|
1210
|
+
Notes:
|
|
1211
|
+
Equals two times Jensen-Shannon divergence.
|
|
872
1212
|
"""
|
|
873
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
874
|
-
u = np.where(u==0, self.epsilon, u)
|
|
875
|
-
v = np.where(v==0, self.epsilon, v)
|
|
876
|
-
dl = u * np.log(2*u/(u+v))
|
|
877
|
-
dr = v * np.log(2*v/(u+v))
|
|
1213
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1214
|
+
u = np.where(u == 0, self.epsilon, u)
|
|
1215
|
+
v = np.where(v == 0, self.epsilon, v)
|
|
1216
|
+
dl = u * np.log(2 * u / (u + v))
|
|
1217
|
+
dr = v * np.log(2 * v / (u + v))
|
|
878
1218
|
return np.sum(dl + dr)
|
|
879
1219
|
|
|
880
|
-
|
|
881
1220
|
def vicis_symmetric_chisq(self, u, v):
|
|
882
1221
|
"""
|
|
883
|
-
Vicis Symmetric chi-square distance.
|
|
1222
|
+
Calculate the Vicis Symmetric chi-square distance between two vectors.
|
|
1223
|
+
|
|
1224
|
+
Parameters:
|
|
1225
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1226
|
+
|
|
1227
|
+
Returns:
|
|
1228
|
+
- The Vicis Symmetric chi-square distance between the two vectors.
|
|
1229
|
+
|
|
884
1230
|
References:
|
|
885
|
-
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1231
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
886
1232
|
Measures between Probability Density Functions. International
|
|
887
1233
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
888
1234
|
1(4), 300-307
|
|
889
1235
|
"""
|
|
890
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
891
|
-
with np.errstate(divide=
|
|
892
|
-
u_v = (u - v)**2
|
|
893
|
-
uvmin = np.minimum(u, v)**2
|
|
894
|
-
return np.sum(np.where(uvmin != 0, u_v/uvmin, 0))
|
|
895
|
-
|
|
1236
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1237
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1238
|
+
u_v = (u - v) ** 2
|
|
1239
|
+
uvmin = np.minimum(u, v) ** 2
|
|
1240
|
+
return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
|
|
896
1241
|
|
|
897
1242
|
def vicis_wave_hedges(self, u, v):
|
|
898
1243
|
"""
|
|
899
|
-
Vicis-Wave Hedges distance.
|
|
1244
|
+
Calculate the Vicis-Wave Hedges distance between two vectors.
|
|
1245
|
+
|
|
1246
|
+
Parameters:
|
|
1247
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1248
|
+
|
|
1249
|
+
Returns:
|
|
1250
|
+
- The Vicis-Wave Hedges distance between the two vectors.
|
|
1251
|
+
|
|
900
1252
|
References:
|
|
901
|
-
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1253
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
902
1254
|
Measures between Probability Density Functions. International
|
|
903
1255
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
904
|
-
1(4), 300-307
|
|
1256
|
+
1(4), 300-307.
|
|
905
1257
|
"""
|
|
906
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
907
|
-
with np.errstate(divide=
|
|
1258
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1259
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
908
1260
|
u_v = abs(u - v)
|
|
909
1261
|
uvmin = np.minimum(u, v)
|
|
910
|
-
return np.sum(np.where(uvmin != 0, u_v/uvmin, 0))
|
|
911
|
-
|
|
1262
|
+
return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
|
|
912
1263
|
|
|
913
1264
|
def wave_hedges(self, u, v):
|
|
914
1265
|
"""
|
|
915
|
-
Wave Hedges distance.
|
|
1266
|
+
Calculate the Wave Hedges distance between two vectors.
|
|
1267
|
+
|
|
1268
|
+
Parameters:
|
|
1269
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1270
|
+
|
|
1271
|
+
Returns:
|
|
1272
|
+
- The Wave Hedges distance between the two vectors.
|
|
1273
|
+
|
|
916
1274
|
References:
|
|
917
|
-
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1275
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
918
1276
|
Measures between Probability Density Functions. International
|
|
919
1277
|
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
920
1278
|
1(4), 300-307
|
|
921
1279
|
"""
|
|
922
|
-
u,v = np.asarray(u), np.asarray(v)
|
|
923
|
-
with np.errstate(divide=
|
|
1280
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1281
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
924
1282
|
u_v = abs(u - v)
|
|
925
1283
|
uvmax = np.maximum(u, v)
|
|
926
|
-
return np.sum(np.where(((u_v != 0) & (uvmax != 0)), u_v/uvmax, 0))
|
|
1284
|
+
return np.sum(np.where(((u_v != 0) & (uvmax != 0)), u_v / uvmax, 0))
|