distclassipy 0.2.1__py3-none-any.whl → 0.2.2a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- distclassipy/__init__.py +2 -2
- distclassipy/classifier.py +36 -25
- distclassipy/distances.py +1455 -1451
- {distclassipy-0.2.1.dist-info → distclassipy-0.2.2a2.dist-info}/METADATA +4 -3
- distclassipy-0.2.2a2.dist-info/RECORD +8 -0
- {distclassipy-0.2.1.dist-info → distclassipy-0.2.2a2.dist-info}/WHEEL +1 -1
- distclassipy-0.2.1.dist-info/RECORD +0 -8
- {distclassipy-0.2.1.dist-info → distclassipy-0.2.2a2.dist-info/licenses}/LICENSE +0 -0
- {distclassipy-0.2.1.dist-info → distclassipy-0.2.2a2.dist-info}/top_level.txt +0 -0
distclassipy/distances.py
CHANGED
|
@@ -48,6 +48,8 @@ import numpy as np
|
|
|
48
48
|
|
|
49
49
|
import scipy
|
|
50
50
|
|
|
51
|
+
# Default epsilon value to avoid division by zero
|
|
52
|
+
EPSILON = np.finfo(float).eps
|
|
51
53
|
_ALL_METRICS = [
|
|
52
54
|
"euclidean",
|
|
53
55
|
"braycurtis",
|
|
@@ -95,1466 +97,1468 @@ _ALL_METRICS = [
|
|
|
95
97
|
]
|
|
96
98
|
|
|
97
99
|
|
|
98
|
-
|
|
99
|
-
"""
|
|
100
|
+
def euclidean(u, v, w=None):
|
|
101
|
+
"""Calculate the Euclidean distance between two vectors.
|
|
100
102
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
distances. Each method takes two vectors as input and returns the calculated
|
|
104
|
-
distance. The class can handle both numpy arrays and lists, converting them
|
|
105
|
-
internally to numpy arrays for computation.
|
|
103
|
+
The Euclidean distance is the "ordinary" straight-line distance between two
|
|
104
|
+
points in Euclidean space.
|
|
106
105
|
|
|
107
|
-
|
|
106
|
+
Parameters
|
|
108
107
|
----------
|
|
109
|
-
|
|
110
|
-
A small value to avoid division by zero errors in certain distance
|
|
111
|
-
calculations. Default is the machine precision for float data type.
|
|
108
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
112
109
|
|
|
113
|
-
|
|
110
|
+
Returns
|
|
114
111
|
-------
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
112
|
+
- The Euclidean distance between the two vectors.
|
|
113
|
+
|
|
114
|
+
References
|
|
115
|
+
----------
|
|
116
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
117
|
+
Measures between Probability Density Functions. International
|
|
118
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
119
|
+
1(4), 300-307.
|
|
120
|
+
"""
|
|
121
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
122
|
+
return scipy.spatial.distance.euclidean(u, v, w)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def braycurtis(u, v, w=None):
|
|
126
|
+
"""Calculate the Bray-Curtis distance between two vectors.
|
|
127
|
+
|
|
128
|
+
The Bray-Curtis distance is a measure of dissimilarity between two non-negative
|
|
129
|
+
vectors, often used in ecology to measure the compositional dissimilarity
|
|
130
|
+
between two sites based on counts of species at both sites. It is closely
|
|
131
|
+
related to the Sørensen distance and is also known as Bray-Curtis
|
|
132
|
+
dissimilarity.
|
|
133
|
+
|
|
134
|
+
Notes
|
|
135
|
+
-----
|
|
136
|
+
When used for comparing two probability density functions (pdfs),
|
|
137
|
+
the Bray-Curtis distance equals the Cityblock distance divided by 2.
|
|
138
|
+
|
|
139
|
+
Parameters
|
|
140
|
+
----------
|
|
141
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
- The Bray-Curtis distance between the two vectors.
|
|
146
|
+
|
|
147
|
+
References
|
|
148
|
+
----------
|
|
149
|
+
1. Bray JR, Curtis JT (1957) An ordination of the upland forest of
|
|
150
|
+
southern Wisconsin. Ecological Monographs, 27, 325-349.
|
|
151
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
152
|
+
Measures between Probability Density Functions. International
|
|
153
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
154
|
+
1(4), 300-307.
|
|
155
|
+
3. https://en.wikipedia.org/wiki/Bray–Curtis_dissimilarity
|
|
156
|
+
"""
|
|
157
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
158
|
+
return scipy.spatial.distance.braycurtis(u, v, w)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def canberra(u, v, w=None):
|
|
162
|
+
"""Calculate the Canberra distance between two vectors.
|
|
163
|
+
|
|
164
|
+
The Canberra distance is a weighted version of the Manhattan distance, used
|
|
165
|
+
in numerical analysis.
|
|
166
|
+
|
|
167
|
+
Notes
|
|
168
|
+
-----
|
|
169
|
+
When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
|
|
170
|
+
is used in the calculation.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
175
|
+
|
|
176
|
+
Returns
|
|
177
|
+
-------
|
|
178
|
+
- The Canberra distance between the two vectors.
|
|
179
|
+
|
|
180
|
+
References
|
|
181
|
+
----------
|
|
182
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
183
|
+
Measures between Probability Density Functions. International
|
|
184
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
185
|
+
1(4), 300-307.
|
|
186
|
+
"""
|
|
187
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
188
|
+
return scipy.spatial.distance.canberra(u, v, w)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def cityblock(u, v, w=None):
|
|
192
|
+
"""Calculate the Cityblock (Manhattan) distance between two vectors.
|
|
193
|
+
|
|
194
|
+
Parameters
|
|
195
|
+
----------
|
|
196
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
197
|
+
|
|
198
|
+
Returns
|
|
199
|
+
-------
|
|
200
|
+
- The Cityblock distance between the two vectors.
|
|
201
|
+
|
|
202
|
+
References
|
|
203
|
+
----------
|
|
204
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
205
|
+
Measures between Probability Density Functions. International
|
|
206
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
207
|
+
1(4):300-307.
|
|
208
|
+
|
|
209
|
+
Synonyms:
|
|
210
|
+
City block distance
|
|
211
|
+
Manhattan distance
|
|
212
|
+
Rectilinear distance
|
|
213
|
+
Taxicab norm
|
|
214
|
+
|
|
215
|
+
Notes
|
|
216
|
+
-----
|
|
217
|
+
Cityblock distance between two probability density functions
|
|
218
|
+
(pdfs) equals:
|
|
219
|
+
1. Non-intersection distance multiplied by 2.
|
|
220
|
+
2. Gower distance multiplied by vector length.
|
|
221
|
+
3. Bray-Curtis distance multiplied by 2.
|
|
222
|
+
4. Google distance multiplied by 2.
|
|
223
|
+
"""
|
|
224
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
225
|
+
return scipy.spatial.distance.cityblock(u, v, w)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def chebyshev(u, v, w=None):
|
|
229
|
+
"""Calculate the Chebyshev distance between two vectors.
|
|
230
|
+
|
|
231
|
+
The Chebyshev distance is a metric defined on a vector space where the distance
|
|
232
|
+
between two vectors
|
|
233
|
+
is the greatest of their differences along any coordinate dimension.
|
|
234
|
+
|
|
235
|
+
Synonyms:
|
|
236
|
+
Chessboard distance
|
|
237
|
+
King-move metric
|
|
238
|
+
Maximum value distance
|
|
239
|
+
Minimax approximation
|
|
240
|
+
|
|
241
|
+
Parameters
|
|
242
|
+
----------
|
|
243
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
244
|
+
|
|
245
|
+
Returns
|
|
246
|
+
-------
|
|
247
|
+
- The Chebyshev distance between the two vectors.
|
|
248
|
+
|
|
249
|
+
References
|
|
250
|
+
----------
|
|
251
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
252
|
+
Measures between Probability Density Functions. International
|
|
253
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
254
|
+
1(4), 300-307.
|
|
128
255
|
"""
|
|
256
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
257
|
+
return scipy.spatial.distance.chebyshev(u, v, w)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def correlation(u, v, w=None, centered=True):
|
|
261
|
+
"""Calculate the Pearson correlation distance between two vectors.
|
|
129
262
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
References
|
|
154
|
-
----------
|
|
155
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
156
|
-
Measures between Probability Density Functions. International
|
|
157
|
-
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
158
|
-
1(4), 300-307.
|
|
159
|
-
"""
|
|
160
|
-
u, v = np.asarray(u), np.asarray(v)
|
|
161
|
-
return scipy.spatial.distance.euclidean(u, v, w)
|
|
162
|
-
|
|
163
|
-
def braycurtis(self, u, v, w=None):
|
|
164
|
-
"""Calculate the Bray-Curtis distance between two vectors.
|
|
165
|
-
|
|
166
|
-
The Bray-Curtis distance is a measure of dissimilarity between two non-negative
|
|
167
|
-
vectors, often used in ecology to measure the compositional dissimilarity
|
|
168
|
-
between two sites based on counts of species at both sites. It is closely
|
|
169
|
-
related to the Sørensen distance and is also known as Bray-Curtis
|
|
170
|
-
dissimilarity.
|
|
171
|
-
|
|
172
|
-
Notes
|
|
173
|
-
-----
|
|
174
|
-
When used for comparing two probability density functions (pdfs),
|
|
175
|
-
the Bray-Curtis distance equals the Cityblock distance divided by 2.
|
|
176
|
-
|
|
177
|
-
Parameters
|
|
178
|
-
----------
|
|
179
|
-
- u, v: Input vectors between which the distance is to be calculated.
|
|
180
|
-
|
|
181
|
-
Returns
|
|
182
|
-
-------
|
|
183
|
-
- The Bray-Curtis distance between the two vectors.
|
|
184
|
-
|
|
185
|
-
References
|
|
186
|
-
----------
|
|
187
|
-
1. Bray JR, Curtis JT (1957) An ordination of the upland forest of
|
|
188
|
-
southern Wisconsin. Ecological Monographs, 27, 325-349.
|
|
189
|
-
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
190
|
-
Measures between Probability Density Functions. International
|
|
191
|
-
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
192
|
-
1(4), 300-307.
|
|
193
|
-
3. https://en.wikipedia.org/wiki/Bray–Curtis_dissimilarity
|
|
194
|
-
"""
|
|
195
|
-
u, v = np.asarray(u), np.asarray(v)
|
|
196
|
-
return scipy.spatial.distance.braycurtis(u, v, w)
|
|
197
|
-
|
|
198
|
-
def canberra(self, u, v, w=None):
|
|
199
|
-
"""Calculate the Canberra distance between two vectors.
|
|
200
|
-
|
|
201
|
-
The Canberra distance is a weighted version of the Manhattan distance, used
|
|
202
|
-
in numerical analysis.
|
|
203
|
-
|
|
204
|
-
Notes
|
|
205
|
-
-----
|
|
206
|
-
When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
|
|
207
|
-
is used in the calculation.
|
|
208
|
-
|
|
209
|
-
Parameters
|
|
210
|
-
----------
|
|
211
|
-
- u, v: Input vectors between which the distance is to be calculated.
|
|
212
|
-
|
|
213
|
-
Returns
|
|
214
|
-
-------
|
|
215
|
-
- The Canberra distance between the two vectors.
|
|
216
|
-
|
|
217
|
-
References
|
|
218
|
-
----------
|
|
219
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
220
|
-
Measures between Probability Density Functions. International
|
|
221
|
-
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
222
|
-
1(4), 300-307.
|
|
223
|
-
"""
|
|
224
|
-
u, v = np.asarray(u), np.asarray(v)
|
|
225
|
-
return scipy.spatial.distance.canberra(u, v, w)
|
|
226
|
-
|
|
227
|
-
def cityblock(self, u, v, w=None):
|
|
228
|
-
"""Calculate the Cityblock (Manhattan) distance between two vectors.
|
|
229
|
-
|
|
230
|
-
Parameters
|
|
231
|
-
----------
|
|
232
|
-
- u, v: Input vectors between which the distance is to be calculated.
|
|
233
|
-
|
|
234
|
-
Returns
|
|
235
|
-
-------
|
|
236
|
-
- The Cityblock distance between the two vectors.
|
|
237
|
-
|
|
238
|
-
References
|
|
239
|
-
----------
|
|
240
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
241
|
-
Measures between Probability Density Functions. International
|
|
242
|
-
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
243
|
-
1(4):300-307.
|
|
244
|
-
|
|
245
|
-
Synonyms:
|
|
246
|
-
City block distance
|
|
247
|
-
Manhattan distance
|
|
248
|
-
Rectilinear distance
|
|
249
|
-
Taxicab norm
|
|
250
|
-
|
|
251
|
-
Notes
|
|
252
|
-
-----
|
|
253
|
-
Cityblock distance between two probability density functions
|
|
254
|
-
(pdfs) equals:
|
|
255
|
-
1. Non-intersection distance multiplied by 2.
|
|
256
|
-
2. Gower distance multiplied by vector length.
|
|
257
|
-
3. Bray-Curtis distance multiplied by 2.
|
|
258
|
-
4. Google distance multiplied by 2.
|
|
259
|
-
"""
|
|
260
|
-
u, v = np.asarray(u), np.asarray(v)
|
|
261
|
-
return scipy.spatial.distance.cityblock(u, v, w)
|
|
262
|
-
|
|
263
|
-
def chebyshev(self, u, v, w=None):
|
|
264
|
-
"""Calculate the Chebyshev distance between two vectors.
|
|
265
|
-
|
|
266
|
-
The Chebyshev distance is a metric defined on a vector space where the distance
|
|
267
|
-
between two vectors
|
|
268
|
-
is the greatest of their differences along any coordinate dimension.
|
|
269
|
-
|
|
270
|
-
Synonyms:
|
|
271
|
-
Chessboard distance
|
|
272
|
-
King-move metric
|
|
273
|
-
Maximum value distance
|
|
274
|
-
Minimax approximation
|
|
275
|
-
|
|
276
|
-
Parameters
|
|
277
|
-
----------
|
|
278
|
-
- u, v: Input vectors between which the distance is to be calculated.
|
|
279
|
-
|
|
280
|
-
Returns
|
|
281
|
-
-------
|
|
282
|
-
- The Chebyshev distance between the two vectors.
|
|
283
|
-
|
|
284
|
-
References
|
|
285
|
-
----------
|
|
286
|
-
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
287
|
-
Measures between Probability Density Functions. International
|
|
288
|
-
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
289
|
-
1(4), 300-307.
|
|
290
|
-
"""
|
|
291
|
-
u, v = np.asarray(u), np.asarray(v)
|
|
292
|
-
return scipy.spatial.distance.chebyshev(u, v, w)
|
|
293
|
-
|
|
294
|
-
def correlation(self, u, v, w=None, centered=True):
|
|
295
|
-
"""Calculate the Pearson correlation distance between two vectors.
|
|
296
|
-
|
|
297
|
-
Returns a distance value between 0 and 2.
|
|
298
|
-
|
|
299
|
-
Parameters
|
|
300
|
-
----------
|
|
301
|
-
- u, v: Input vectors between which the distance is to be calculated.
|
|
302
|
-
|
|
303
|
-
Returns
|
|
304
|
-
-------
|
|
305
|
-
- The Pearson correlation distance between the two vectors.
|
|
306
|
-
"""
|
|
307
|
-
u, v = np.asarray(u), np.asarray(v)
|
|
308
|
-
if len(u) < 2 or len(v) < 2:
|
|
263
|
+
Returns a distance value between 0 and 2.
|
|
264
|
+
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
268
|
+
|
|
269
|
+
Returns
|
|
270
|
+
-------
|
|
271
|
+
- The Pearson correlation distance between the two vectors.
|
|
272
|
+
"""
|
|
273
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
274
|
+
if len(u) < 2 or len(v) < 2:
|
|
275
|
+
warnings.warn(
|
|
276
|
+
"Pearson correlation requires vectors of length at least 2.",
|
|
277
|
+
RuntimeWarning,
|
|
278
|
+
)
|
|
279
|
+
d = 0
|
|
280
|
+
else:
|
|
281
|
+
d = scipy.spatial.distance.correlation(u, v, w, centered)
|
|
282
|
+
if np.isnan(d) and (
|
|
283
|
+
np.allclose(u - np.mean(u), 0) or np.allclose(v - np.mean(v), 0)
|
|
284
|
+
):
|
|
309
285
|
warnings.warn(
|
|
310
|
-
"
|
|
286
|
+
"One of the vectors is constant; correlation is set to 0",
|
|
311
287
|
RuntimeWarning,
|
|
312
288
|
)
|
|
313
289
|
d = 0
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
290
|
+
return d
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def cosine(u, v, w=None):
|
|
294
|
+
"""Calculate the cosine distance between two vectors.
|
|
295
|
+
|
|
296
|
+
Parameters
|
|
297
|
+
----------
|
|
298
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
299
|
+
|
|
300
|
+
Returns
|
|
301
|
+
-------
|
|
302
|
+
- The cosine distance between the two vectors.
|
|
303
|
+
|
|
304
|
+
References
|
|
305
|
+
----------
|
|
306
|
+
1. SciPy.
|
|
307
|
+
"""
|
|
308
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
309
|
+
return scipy.spatial.distance.cosine(u, v, w)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def clark(u, v):
|
|
313
|
+
"""Calculate the Clark distance between two vectors.
|
|
314
|
+
|
|
315
|
+
The Clark distance equals the square root of half of the divergence.
|
|
316
|
+
|
|
317
|
+
Notes
|
|
318
|
+
-----
|
|
319
|
+
When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
|
|
320
|
+
is used in the calculation.
|
|
321
|
+
|
|
322
|
+
Parameters
|
|
323
|
+
----------
|
|
324
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
325
|
+
|
|
326
|
+
Returns
|
|
327
|
+
-------
|
|
328
|
+
- The Clark distance between the two vectors.
|
|
329
|
+
|
|
330
|
+
References
|
|
331
|
+
----------
|
|
332
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
333
|
+
Measures between Probability Density Functions. International
|
|
334
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
335
|
+
1(4), 300-307.
|
|
336
|
+
"""
|
|
337
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
338
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
339
|
+
return np.sqrt(np.nansum(np.power(np.abs(u - v) / (u + v), 2)))
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def hellinger(u, v):
|
|
343
|
+
"""Calculate the Hellinger distance between two vectors.
|
|
344
|
+
|
|
345
|
+
The Hellinger distance is a measure of similarity between two probability
|
|
346
|
+
distributions.
|
|
347
|
+
|
|
348
|
+
Parameters
|
|
349
|
+
----------
|
|
350
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
351
|
+
|
|
352
|
+
Returns
|
|
353
|
+
-------
|
|
354
|
+
- The Hellinger distance between the two vectors.
|
|
355
|
+
|
|
356
|
+
Notes
|
|
357
|
+
-----
|
|
358
|
+
This implementation produces values two times larger than values
|
|
359
|
+
obtained by Hellinger distance described in Wikipedia and also
|
|
360
|
+
in https://gist.github.com/larsmans/3116927.
|
|
361
|
+
|
|
362
|
+
References
|
|
363
|
+
----------
|
|
364
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
365
|
+
Measures between Probability Density Functions. International
|
|
366
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
367
|
+
1(4), 300-307.
|
|
368
|
+
"""
|
|
369
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
370
|
+
# Clip negative values to zero for valid sqrt
|
|
371
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
372
|
+
u = np.clip(u, a_min=0, a_max=None)
|
|
373
|
+
v = np.clip(v, a_min=0, a_max=None)
|
|
374
|
+
return np.sqrt(2 * np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def jaccard(u, v):
|
|
378
|
+
"""Calculate the Jaccard distance between two vectors.
|
|
379
|
+
|
|
380
|
+
The Jaccard distance measures dissimilarity between sample sets.
|
|
381
|
+
|
|
382
|
+
Parameters
|
|
383
|
+
----------
|
|
384
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
385
|
+
|
|
386
|
+
Returns
|
|
387
|
+
-------
|
|
388
|
+
- The Jaccard distance between the two vectors.
|
|
389
|
+
|
|
390
|
+
References
|
|
391
|
+
----------
|
|
392
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
393
|
+
Measures between Probability Density Functions. International
|
|
394
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
395
|
+
1(4), 300-307.
|
|
396
|
+
"""
|
|
397
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
398
|
+
uv = np.dot(u, v)
|
|
399
|
+
return 1 - (uv / (np.dot(u, u) + np.dot(v, v) - uv))
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def lorentzian(u, v):
|
|
403
|
+
"""Calculate the Lorentzian distance between two vectors.
|
|
404
|
+
|
|
405
|
+
Parameters
|
|
406
|
+
----------
|
|
407
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
408
|
+
|
|
409
|
+
Returns
|
|
410
|
+
-------
|
|
411
|
+
- The Lorentzian distance between the two vectors.
|
|
412
|
+
|
|
413
|
+
References
|
|
414
|
+
----------
|
|
415
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
416
|
+
Measures between Probability Density Functions. International
|
|
417
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
418
|
+
1(4):300-307.
|
|
419
|
+
|
|
420
|
+
Notes
|
|
421
|
+
-----
|
|
422
|
+
One (1) is added to guarantee the non-negativity property and to
|
|
423
|
+
eschew the log of zero.
|
|
424
|
+
"""
|
|
425
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
426
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
427
|
+
return np.sum(np.log(np.abs(u - v) + 1))
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def marylandbridge(u, v):
|
|
431
|
+
"""Calculate the Maryland Bridge distance between two vectors.
|
|
432
|
+
|
|
433
|
+
Parameters
|
|
434
|
+
----------
|
|
435
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
436
|
+
|
|
437
|
+
Returns
|
|
438
|
+
-------
|
|
439
|
+
- The Maryland Bridge distance between the two vectors.
|
|
440
|
+
|
|
441
|
+
References
|
|
442
|
+
----------
|
|
443
|
+
1. Deza M, Deza E (2009) Encyclopedia of Distances.
|
|
444
|
+
Springer-Verlag Berlin Heidelberg. 1-590.
|
|
445
|
+
"""
|
|
446
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
447
|
+
uvdot = np.dot(u, v)
|
|
448
|
+
return 1 - (uvdot / np.dot(u, u) + uvdot / np.dot(v, v)) / 2
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def meehl(u, v):
|
|
452
|
+
"""Calculate the Meehl distance between two vectors.
|
|
453
|
+
|
|
454
|
+
Parameters
|
|
455
|
+
----------
|
|
456
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
457
|
+
|
|
458
|
+
Returns
|
|
459
|
+
-------
|
|
460
|
+
- The Meehl distance between the two vectors.
|
|
461
|
+
|
|
462
|
+
Notes
|
|
463
|
+
-----
|
|
464
|
+
Added by SC.
|
|
465
|
+
|
|
466
|
+
References
|
|
467
|
+
----------
|
|
468
|
+
1. Deza M. and Deza E. (2013) Encyclopedia of Distances.
|
|
469
|
+
Berlin, Heidelberg: Springer Berlin Heidelberg.
|
|
470
|
+
https://doi.org/10.1007/978-3-642-30958-8.
|
|
471
|
+
"""
|
|
472
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
473
|
+
|
|
474
|
+
xi = u[:-1]
|
|
475
|
+
yi = v[:-1]
|
|
476
|
+
xiplus1 = np.roll(u, 1)[:-1]
|
|
477
|
+
yiplus1 = np.roll(v, 1)[:-1]
|
|
478
|
+
|
|
479
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
480
|
+
return np.nansum((xi - yi - xiplus1 + yiplus1) ** 2)
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def motyka(u, v):
|
|
484
|
+
"""Calculate the Motyka distance between two vectors.
|
|
485
|
+
|
|
486
|
+
Parameters
|
|
487
|
+
----------
|
|
488
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
489
|
+
|
|
490
|
+
Returns
|
|
491
|
+
-------
|
|
492
|
+
- The Motyka distance between the two vectors.
|
|
493
|
+
|
|
494
|
+
Notes
|
|
495
|
+
-----
|
|
496
|
+
The distance between identical vectors is not equal to 0 but 0.5.
|
|
497
|
+
|
|
498
|
+
References
|
|
499
|
+
----------
|
|
500
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
501
|
+
Measures between Probability Density Functions. International
|
|
502
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
503
|
+
1(4), 300-307.
|
|
504
|
+
"""
|
|
505
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
506
|
+
return np.sum(np.maximum(u, v)) / np.sum(u + v)
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def soergel(u, v):
|
|
510
|
+
"""Calculate the Soergel distance between two vectors.
|
|
511
|
+
|
|
512
|
+
Parameters
|
|
513
|
+
----------
|
|
514
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
515
|
+
|
|
516
|
+
Returns
|
|
517
|
+
-------
|
|
518
|
+
- The Soergel distance between the two vectors.
|
|
519
|
+
|
|
520
|
+
Notes
|
|
521
|
+
-----
|
|
522
|
+
Equals Tanimoto distance.
|
|
523
|
+
|
|
524
|
+
References
|
|
525
|
+
----------
|
|
526
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
527
|
+
Measures between Probability Density Functions. International
|
|
528
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
529
|
+
1(4), 300-307.
|
|
530
|
+
"""
|
|
531
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
532
|
+
return np.sum(np.abs(u - v)) / np.sum(np.maximum(u, v))
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def wave_hedges(u, v):
|
|
536
|
+
"""Calculate the Wave Hedges distance between two vectors.
|
|
537
|
+
|
|
538
|
+
Parameters
|
|
539
|
+
----------
|
|
540
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
541
|
+
|
|
542
|
+
Returns
|
|
543
|
+
-------
|
|
544
|
+
- The Wave Hedges distance between the two vectors.
|
|
545
|
+
|
|
546
|
+
References
|
|
547
|
+
----------
|
|
548
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
549
|
+
Measures between Probability Density Functions. International
|
|
550
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
551
|
+
1(4), 300-307
|
|
552
|
+
"""
|
|
553
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
554
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
555
|
+
u_v = abs(u - v)
|
|
556
|
+
uvmax = np.maximum(u, v)
|
|
557
|
+
return np.sum(np.where(((u_v != 0) & (uvmax != 0)), u_v / uvmax, 0))
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def kulczynski(u, v):
|
|
561
|
+
"""Calculate the Kulczynski distance between two vectors.
|
|
562
|
+
|
|
563
|
+
Parameters
|
|
564
|
+
----------
|
|
565
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
566
|
+
|
|
567
|
+
Returns
|
|
568
|
+
-------
|
|
569
|
+
- The Kulczynski distance between the two vectors.
|
|
570
|
+
|
|
571
|
+
References
|
|
572
|
+
----------
|
|
573
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
574
|
+
Measures between Probability Density Functions. International
|
|
575
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
576
|
+
1(4):300-307.
|
|
577
|
+
"""
|
|
578
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
579
|
+
return np.sum(np.abs(u - v)) / np.sum(np.minimum(u, v))
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def add_chisq(u, v):
|
|
583
|
+
"""Compute the Additive Symmetric Chi-square distance between two vectors.
|
|
584
|
+
|
|
585
|
+
The Additive Symmetric Chi-square distance is a measure that
|
|
586
|
+
can be used to compare two vectors. This function calculates it based
|
|
587
|
+
on the input vectors u and v.
|
|
588
|
+
|
|
589
|
+
Parameters
|
|
590
|
+
----------
|
|
591
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
592
|
+
|
|
593
|
+
Returns
|
|
594
|
+
-------
|
|
595
|
+
- The Additive Symmetric Chi-square distance between the two vectors.
|
|
596
|
+
|
|
597
|
+
References
|
|
598
|
+
----------
|
|
599
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
600
|
+
Measures between Probability Density Functions.
|
|
601
|
+
International Journal of Mathematical Models and Methods in
|
|
602
|
+
Applied Sciences.
|
|
603
|
+
vol. 1(4), pp. 300-307.
|
|
604
|
+
"""
|
|
605
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
606
|
+
uvmult = u * v
|
|
607
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
608
|
+
return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
# NOTE: THE FOLLOWING DISTANCES HAVE NOT BEEN TESTED EXTENSIVELY AND ARE
|
|
612
|
+
# CURRENTLY IN ALPHA AND SO HAVE BEEN COMMENTED
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def acc(u, v):
|
|
616
|
+
"""Calculate the average of Cityblock and Chebyshev distance.
|
|
617
|
+
|
|
618
|
+
This function computes the ACC distance, also known as the
|
|
619
|
+
Average distance, between two vectors u and v. It is the average of the
|
|
620
|
+
Cityblock (or Manhattan) and Chebyshev distances.
|
|
621
|
+
|
|
622
|
+
Parameters
|
|
623
|
+
----------
|
|
624
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
625
|
+
|
|
626
|
+
Returns
|
|
627
|
+
-------
|
|
628
|
+
- The ACC distance between the two vectors.
|
|
629
|
+
|
|
630
|
+
References
|
|
631
|
+
----------
|
|
632
|
+
1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
|
|
633
|
+
Geometry. Dover Publications.
|
|
634
|
+
2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
635
|
+
Measures between Probability Density Functions. International
|
|
636
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
637
|
+
vol. 1(4), pp. 300-307.
|
|
638
|
+
"""
|
|
639
|
+
return (cityblock(u, v) + chebyshev(u, v)) / 2
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
# def bhattacharyya(u, v):
|
|
643
|
+
# """
|
|
644
|
+
# Calculate the Bhattacharyya distance between two vectors.
|
|
645
|
+
|
|
646
|
+
# Returns a distance value between 0 and 1.
|
|
647
|
+
|
|
648
|
+
# Parameters
|
|
649
|
+
# ----------
|
|
650
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
651
|
+
|
|
652
|
+
# Returns
|
|
653
|
+
# -------
|
|
654
|
+
# - The Bhattacharyya distance between the two vectors.
|
|
655
|
+
|
|
656
|
+
# References
|
|
657
|
+
# ----------
|
|
658
|
+
# 1. Bhattacharyya A (1947) On a measure of divergence between two
|
|
659
|
+
# statistical populations defined by probability distributions,
|
|
660
|
+
# Bull. Calcutta Math. Soc., 35, 99–109.
|
|
661
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
662
|
+
# Measures between Probability Density Functions. International
|
|
663
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
664
|
+
# 1(4), 300-307.
|
|
665
|
+
# 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
|
|
666
|
+
# """
|
|
667
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
668
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
669
|
+
# return -np.log(np.sum(np.sqrt(u * v)))
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
def chebyshev_min(u, v):
|
|
673
|
+
"""Calculate the minimum value distance between two vectors.
|
|
674
|
+
|
|
675
|
+
This measure represents a custom approach by Zielezinski to distance
|
|
676
|
+
measurement, focusing on the minimum absolute difference.
|
|
677
|
+
|
|
678
|
+
Parameters
|
|
679
|
+
----------
|
|
680
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
681
|
+
|
|
682
|
+
Returns
|
|
683
|
+
-------
|
|
684
|
+
- The minimum value distance between the two vectors.
|
|
685
|
+
"""
|
|
686
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
687
|
+
return np.amin(np.abs(u - v))
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
def czekanowski(u, v):
|
|
691
|
+
"""Calculate the Czekanowski distance between two vectors.
|
|
692
|
+
|
|
693
|
+
Parameters
|
|
694
|
+
----------
|
|
695
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
696
|
+
|
|
697
|
+
Returns
|
|
698
|
+
-------
|
|
699
|
+
- The Czekanowski distance between the two vectors.
|
|
700
|
+
|
|
701
|
+
References
|
|
702
|
+
----------
|
|
703
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
704
|
+
Measures between Probability Density Functions. International
|
|
705
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
706
|
+
1(4), 300-307.
|
|
707
|
+
"""
|
|
708
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
709
|
+
return np.sum(np.abs(u - v)) / np.sum(u + v)
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def dice(u, v):
|
|
713
|
+
"""Calculate the Dice dissimilarity between two vectors.
|
|
714
|
+
|
|
715
|
+
Synonyms:
|
|
716
|
+
Sorensen distance
|
|
717
|
+
|
|
718
|
+
Parameters
|
|
719
|
+
----------
|
|
720
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
721
|
+
|
|
722
|
+
Returns
|
|
723
|
+
-------
|
|
724
|
+
- The Dice dissimilarity between the two vectors.
|
|
725
|
+
|
|
726
|
+
References
|
|
727
|
+
----------
|
|
728
|
+
1. Dice LR (1945) Measures of the amount of ecologic association
|
|
729
|
+
between species. Ecology. 26, 297-302.
|
|
730
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
731
|
+
Measures between Probability Density Functions. International
|
|
732
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
733
|
+
1(4), 300-307.
|
|
734
|
+
"""
|
|
735
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
736
|
+
u_v = u - v
|
|
737
|
+
return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
def divergence(u, v):
|
|
741
|
+
"""Calculate the divergence between two vectors.
|
|
742
|
+
|
|
743
|
+
Divergence equals squared Clark distance multiplied by 2.
|
|
744
|
+
|
|
745
|
+
Parameters
|
|
746
|
+
----------
|
|
747
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
748
|
+
|
|
749
|
+
Returns
|
|
750
|
+
-------
|
|
751
|
+
- The divergence between the two vectors.
|
|
752
|
+
|
|
753
|
+
References
|
|
754
|
+
----------
|
|
755
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
756
|
+
Measures between Probability Density Functions. International
|
|
757
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
758
|
+
1(4), 300-307.
|
|
759
|
+
"""
|
|
760
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
761
|
+
with np.errstate(invalid="ignore"):
|
|
762
|
+
return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
def google(u, v):
|
|
766
|
+
"""Calculate the Normalized Google Distance (NGD) between two vectors.
|
|
767
|
+
|
|
768
|
+
NGD is a measure of similarity derived from the number of hits returned by the
|
|
769
|
+
Google search engine for a given set of keywords.
|
|
770
|
+
|
|
771
|
+
Parameters
|
|
772
|
+
----------
|
|
773
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
774
|
+
|
|
775
|
+
Returns
|
|
776
|
+
-------
|
|
777
|
+
- The Normalized Google Distance between the two vectors.
|
|
778
|
+
|
|
779
|
+
Notes
|
|
780
|
+
-----
|
|
781
|
+
When used for comparing two probability density functions (pdfs),
|
|
782
|
+
Google distance equals half of Cityblock distance.
|
|
783
|
+
|
|
784
|
+
References
|
|
785
|
+
----------
|
|
786
|
+
1. Lee & Rashid (2008) Information Technology, ITSim 2008.
|
|
787
|
+
doi:10.1109/ITSIM.2008.4631601.
|
|
788
|
+
"""
|
|
789
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
790
|
+
x = float(np.sum(u))
|
|
791
|
+
y = float(np.sum(v))
|
|
792
|
+
summin = float(np.sum(np.minimum(u, v)))
|
|
793
|
+
return (max([x, y]) - summin) / ((x + y) - min([x, y]))
|
|
794
|
+
|
|
795
|
+
|
|
796
|
+
def gower(u, v):
|
|
797
|
+
"""Calculate the Gower distance between two vectors.
|
|
798
|
+
|
|
799
|
+
The Gower distance equals the Cityblock distance divided by the vector length.
|
|
800
|
+
|
|
801
|
+
Parameters
|
|
802
|
+
----------
|
|
803
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
804
|
+
|
|
805
|
+
Returns
|
|
806
|
+
-------
|
|
807
|
+
- The Gower distance between the two vectors.
|
|
808
|
+
|
|
809
|
+
References
|
|
810
|
+
----------
|
|
811
|
+
1. Gower JC. (1971) General Coefficient of Similarity
|
|
812
|
+
and Some of Its Properties, Biometrics 27, 857-874.
|
|
813
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
814
|
+
Measures between Probability Density Functions. International
|
|
815
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
816
|
+
1(4), 300-307.
|
|
817
|
+
"""
|
|
818
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
819
|
+
return np.sum(np.abs(u - v)) / u.size
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
def jeffreys(u, v):
|
|
823
|
+
"""Calculate the Jeffreys divergence between two vectors.
|
|
824
|
+
|
|
825
|
+
The Jeffreys divergence is a symmetric version of the Kullback-Leibler
|
|
826
|
+
divergence.
|
|
827
|
+
|
|
828
|
+
Parameters
|
|
829
|
+
----------
|
|
830
|
+
- u, v: Input vectors between which the divergence is to be calculated.
|
|
831
|
+
|
|
832
|
+
Returns
|
|
833
|
+
-------
|
|
834
|
+
- The Jeffreys divergence between the two vectors.
|
|
835
|
+
|
|
836
|
+
References
|
|
837
|
+
----------
|
|
838
|
+
1. Jeffreys H (1946) An Invariant Form for the Prior Probability
|
|
839
|
+
in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
|
|
840
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
841
|
+
Measures between Probability Density Functions. International
|
|
842
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
843
|
+
1(4), 300-307.
|
|
844
|
+
"""
|
|
845
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
846
|
+
# Add epsilon to zeros in vectors to avoid division
|
|
847
|
+
# by 0 and/or log of 0. Alternatively, zeros in the
|
|
848
|
+
# vectors could be ignored or masked (see below).
|
|
849
|
+
# u = ma.masked_where(u == 0, u)
|
|
850
|
+
# v = ma.masked_where(v == 0, u)
|
|
851
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
852
|
+
u[u == 0] = EPSILON
|
|
853
|
+
v[v == 0] = EPSILON
|
|
854
|
+
# Clip negative values to zero for valid log
|
|
855
|
+
udivv = np.clip(u / v, a_min=EPSILON, a_max=None)
|
|
856
|
+
return np.sum((u - v) * np.log(udivv))
|
|
857
|
+
|
|
858
|
+
|
|
859
|
+
def jensenshannon_divergence(u, v):
|
|
860
|
+
"""Calculate the Jensen-Shannon divergence between two vectors.
|
|
861
|
+
|
|
862
|
+
The Jensen-Shannon divergence is a symmetric and finite measure of similarity
|
|
863
|
+
between two probability distributions.
|
|
864
|
+
|
|
865
|
+
Parameters
|
|
866
|
+
----------
|
|
867
|
+
- u, v: Input vectors between which the divergence is to be calculated.
|
|
868
|
+
|
|
869
|
+
Returns
|
|
870
|
+
-------
|
|
871
|
+
- The Jensen-Shannon divergence between the two vectors.
|
|
872
|
+
|
|
873
|
+
References
|
|
874
|
+
----------
|
|
875
|
+
1. Lin J. (1991) Divergence measures based on the Shannon entropy.
|
|
876
|
+
IEEE Transactions on Information Theory, 37(1):145–151.
|
|
877
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
878
|
+
Measures between Probability Density Functions. International
|
|
879
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
880
|
+
1(4), 300-307.
|
|
881
|
+
Comments:
|
|
882
|
+
Equals Jensen difference in Sung-Hyuk (2007):
|
|
883
|
+
u = np.where(u==0, EPSILON, u)
|
|
884
|
+
v = np.where(v==0, EPSILON, v)
|
|
885
|
+
el1 = (u * np.log(u) + v * np.log(v)) / 2
|
|
886
|
+
el2 = (u + v)/2
|
|
887
|
+
el3 = np.log(el2)
|
|
888
|
+
return np.sum(el1 - el2 * el3)
|
|
889
|
+
"""
|
|
890
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
891
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
892
|
+
# Clip negative values to zero for valid log
|
|
893
|
+
u[u == 0] = EPSILON
|
|
894
|
+
v[v == 0] = EPSILON
|
|
895
|
+
|
|
896
|
+
term1 = np.clip(2 * u / (u + v), a_min=EPSILON, a_max=None)
|
|
897
|
+
term2 = np.clip(2 * v / (u + v), a_min=EPSILON, a_max=None)
|
|
898
|
+
|
|
899
|
+
dl = u * np.log(term1)
|
|
900
|
+
dr = v * np.log(term2)
|
|
901
|
+
return (np.sum(dl) + np.sum(dr)) / 2
|
|
902
|
+
|
|
903
|
+
|
|
904
|
+
def jensen_difference(u, v):
|
|
905
|
+
"""Calculate the Jensen difference between two vectors.
|
|
906
|
+
|
|
907
|
+
The Jensen difference is considered similar to the Jensen-Shannon divergence.
|
|
908
|
+
|
|
909
|
+
Parameters
|
|
910
|
+
----------
|
|
911
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
912
|
+
|
|
913
|
+
Returns
|
|
914
|
+
-------
|
|
915
|
+
- The Jensen difference between the two vectors.
|
|
916
|
+
|
|
917
|
+
Notes
|
|
918
|
+
-----
|
|
919
|
+
1. Equals half of Topsøe distance
|
|
920
|
+
2. Equals squared jensenshannon_distance.
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
References
|
|
924
|
+
----------
|
|
925
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
926
|
+
Measures between Probability Density Functions. International
|
|
927
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
928
|
+
1(4), 300-307.
|
|
929
|
+
"""
|
|
930
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
931
|
+
|
|
932
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
933
|
+
# Clip negative values to eps for valid log
|
|
934
|
+
u = np.clip(u, EPSILON, None)
|
|
935
|
+
v = np.clip(v, EPSILON, None)
|
|
936
|
+
el1 = (u * np.log(u) + v * np.log(v)) / 2
|
|
937
|
+
el2 = np.clip((u + v) / 2, a_min=EPSILON, a_max=None)
|
|
938
|
+
return np.sum(el1 - el2 * np.log(el2))
|
|
939
|
+
|
|
940
|
+
|
|
941
|
+
def kumarjohnson(u, v):
|
|
942
|
+
"""Calculate the Kumar-Johnson distance between two vectors.
|
|
943
|
+
|
|
944
|
+
Parameters
|
|
945
|
+
----------
|
|
946
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
947
|
+
|
|
948
|
+
Returns
|
|
949
|
+
-------
|
|
950
|
+
- The Kumar-Johnson distance between the two vectors.
|
|
951
|
+
|
|
952
|
+
References
|
|
953
|
+
----------
|
|
954
|
+
1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
|
|
955
|
+
and information inequalities, Journal of Inequalities in pure
|
|
956
|
+
and applied Mathematics. 6(3).
|
|
957
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
958
|
+
Measures between Probability Density Functions. International
|
|
959
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
960
|
+
1(4):300-307.
|
|
961
|
+
"""
|
|
962
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
963
|
+
uvmult = u * v
|
|
964
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
965
|
+
numer = np.power(u**2 - v**2, 2)
|
|
966
|
+
denom = 2 * np.power(uvmult, 3 / 2)
|
|
967
|
+
return np.sum(np.where(uvmult != 0, numer / denom, 0))
|
|
968
|
+
|
|
969
|
+
|
|
970
|
+
def matusita(u, v):
|
|
971
|
+
"""Calculate the Matusita distance between two vectors.
|
|
972
|
+
|
|
973
|
+
Parameters
|
|
974
|
+
----------
|
|
975
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
976
|
+
|
|
977
|
+
Returns
|
|
978
|
+
-------
|
|
979
|
+
- The Matusita distance between the two vectors.
|
|
980
|
+
|
|
981
|
+
References
|
|
982
|
+
----------
|
|
983
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
984
|
+
Measures between Probability Density Functions. International
|
|
985
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
986
|
+
1(4):300-307.
|
|
987
|
+
|
|
988
|
+
Notes
|
|
989
|
+
-----
|
|
990
|
+
Equals square root of Squared-chord distance.
|
|
991
|
+
"""
|
|
992
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
993
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
994
|
+
return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
def minkowski(u, v, p=2):
|
|
998
|
+
"""Calculate the Minkowski distance between two vectors.
|
|
999
|
+
|
|
1000
|
+
Parameters
|
|
1001
|
+
----------
|
|
1002
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1003
|
+
- p: The order of the norm of the difference.
|
|
1004
|
+
|
|
1005
|
+
Returns
|
|
1006
|
+
-------
|
|
1007
|
+
- The Minkowski distance between the two vectors.
|
|
1008
|
+
|
|
1009
|
+
Notes
|
|
1010
|
+
-----
|
|
1011
|
+
When p goes to infinite, the Chebyshev distance is derived.
|
|
1012
|
+
|
|
1013
|
+
References
|
|
1014
|
+
----------
|
|
1015
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1016
|
+
Measures between Probability Density Functions. International
|
|
1017
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1018
|
+
1(4):300-307.
|
|
1019
|
+
"""
|
|
1020
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1021
|
+
return np.linalg.norm(u - v, ord=p)
|
|
1022
|
+
|
|
1023
|
+
|
|
1024
|
+
def penroseshape(u, v):
|
|
1025
|
+
"""Calculate the Penrose shape distance between two vectors.
|
|
1026
|
+
|
|
1027
|
+
Parameters
|
|
1028
|
+
----------
|
|
1029
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1030
|
+
|
|
1031
|
+
Returns
|
|
1032
|
+
-------
|
|
1033
|
+
- The Penrose shape distance between the two vectors.
|
|
1034
|
+
|
|
1035
|
+
References
|
|
1036
|
+
----------
|
|
1037
|
+
1. Deza M, Deza E (2009) Encyclopedia of Distances.
|
|
1038
|
+
Springer-Verlag Berlin Heidelberg. 1-590.
|
|
1039
|
+
"""
|
|
1040
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1041
|
+
umu = np.mean(u)
|
|
1042
|
+
vmu = np.mean(v)
|
|
1043
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1044
|
+
return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
|
|
1045
|
+
|
|
1046
|
+
|
|
1047
|
+
def prob_chisq(u, v):
|
|
1048
|
+
"""Calculate the Probabilistic chi-square distance between two vectors.
|
|
1049
|
+
|
|
1050
|
+
Parameters
|
|
1051
|
+
----------
|
|
1052
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1053
|
+
|
|
1054
|
+
Returns
|
|
1055
|
+
-------
|
|
1056
|
+
- The Probabilistic chi-square distance between the two vectors.
|
|
1057
|
+
|
|
1058
|
+
Notes
|
|
1059
|
+
-----
|
|
1060
|
+
Added by SC.
|
|
1061
|
+
"""
|
|
1062
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1063
|
+
uvsum = u + v
|
|
1064
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1065
|
+
return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
|
|
1066
|
+
|
|
1067
|
+
|
|
1068
|
+
def ruzicka(u, v):
|
|
1069
|
+
"""Calculate the Ruzicka distance between two vectors.
|
|
1070
|
+
|
|
1071
|
+
Parameters
|
|
1072
|
+
----------
|
|
1073
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1074
|
+
|
|
1075
|
+
Returns
|
|
1076
|
+
-------
|
|
1077
|
+
- The Ruzicka distance between the two vectors.
|
|
1078
|
+
|
|
1079
|
+
Notes
|
|
1080
|
+
-----
|
|
1081
|
+
Added by SC.
|
|
1082
|
+
"""
|
|
1083
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1084
|
+
den = np.sum(np.maximum(u, v))
|
|
1085
|
+
|
|
1086
|
+
return 1 - np.sum(np.minimum(u, v)) / den
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
def sorensen(u, v):
|
|
1090
|
+
"""Calculate the Sorensen distance between two vectors.
|
|
1091
|
+
|
|
1092
|
+
Parameters
|
|
1093
|
+
----------
|
|
1094
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1095
|
+
|
|
1096
|
+
Returns
|
|
1097
|
+
-------
|
|
1098
|
+
- The Sorensen distance between the two vectors.
|
|
1099
|
+
|
|
1100
|
+
Notes
|
|
1101
|
+
-----
|
|
1102
|
+
The Sorensen distance equals the Manhattan distance divided by the sum of
|
|
1103
|
+
the two vectors.
|
|
1104
|
+
|
|
1105
|
+
Added by SC.
|
|
1106
|
+
"""
|
|
1107
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1108
|
+
return np.sum(np.abs(u - v)) / np.sum(u + v)
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
def squared_chisq(u, v):
|
|
1112
|
+
"""Calculate the Squared chi-square distance between two vectors.
|
|
1113
|
+
|
|
1114
|
+
Parameters
|
|
1115
|
+
----------
|
|
1116
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1117
|
+
|
|
1118
|
+
Returns
|
|
1119
|
+
-------
|
|
1120
|
+
- The Squared chi-square distance between the two vectors.
|
|
1121
|
+
|
|
1122
|
+
References
|
|
1123
|
+
----------
|
|
1124
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1125
|
+
Measures between Probability Density Functions. International
|
|
1126
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1127
|
+
1(4), 300-307.
|
|
1128
|
+
"""
|
|
1129
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1130
|
+
uvsum = u + v
|
|
1131
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1132
|
+
return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
|
|
1133
|
+
|
|
1134
|
+
|
|
1135
|
+
def squaredchord(u, v):
|
|
1136
|
+
"""Calculate the Squared-chord distance between two vectors.
|
|
1137
|
+
|
|
1138
|
+
Parameters
|
|
1139
|
+
----------
|
|
1140
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1141
|
+
|
|
1142
|
+
Returns
|
|
1143
|
+
-------
|
|
1144
|
+
- The Squared-chord distance between the two vectors.
|
|
1145
|
+
|
|
1146
|
+
References
|
|
1147
|
+
----------
|
|
1148
|
+
1. Gavin DG et al. (2003) A statistical approach to evaluating
|
|
1149
|
+
distance metrics and analog assignments for pollen records.
|
|
1150
|
+
Quaternary Research 60:356–367.
|
|
1151
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1152
|
+
Measures between Probability Density Functions. International
|
|
1153
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1154
|
+
1(4), 300-307.
|
|
1155
|
+
|
|
1156
|
+
Notes
|
|
1157
|
+
-----
|
|
1158
|
+
Equals to squared Matusita distance.
|
|
1159
|
+
"""
|
|
1160
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1161
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1162
|
+
return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
|
|
1163
|
+
|
|
1164
|
+
|
|
1165
|
+
def squared_euclidean(u, v):
|
|
1166
|
+
"""Calculate the Squared Euclidean distance between two vectors.
|
|
1167
|
+
|
|
1168
|
+
Parameters
|
|
1169
|
+
----------
|
|
1170
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1171
|
+
|
|
1172
|
+
Returns
|
|
1173
|
+
-------
|
|
1174
|
+
- The Squared Euclidean distance between the two vectors.
|
|
1175
|
+
|
|
1176
|
+
References
|
|
1177
|
+
----------
|
|
1178
|
+
1. Gavin DG et al. (2003) A statistical approach to evaluating
|
|
1179
|
+
distance metrics and analog assignments for pollen records.
|
|
1180
|
+
Quaternary Research 60:356–367.
|
|
1181
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1182
|
+
Measures between Probability Density Functions. International
|
|
1183
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1184
|
+
1(4), 300-307.
|
|
1185
|
+
|
|
1186
|
+
Notes
|
|
1187
|
+
-----
|
|
1188
|
+
Equals to squared Euclidean distance.
|
|
1189
|
+
"""
|
|
1190
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1191
|
+
return np.dot((u - v), (u - v))
|
|
1192
|
+
|
|
1193
|
+
|
|
1194
|
+
def taneja(u, v):
|
|
1195
|
+
"""Calculate the Taneja distance between two vectors.
|
|
1196
|
+
|
|
1197
|
+
Parameters
|
|
1198
|
+
----------
|
|
1199
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1200
|
+
|
|
1201
|
+
Returns
|
|
1202
|
+
-------
|
|
1203
|
+
- The Taneja distance between the two vectors.
|
|
1204
|
+
|
|
1205
|
+
References
|
|
1206
|
+
----------
|
|
1207
|
+
1. Taneja IJ. (1995), New Developments in Generalized Information
|
|
1208
|
+
Measures, Chapter in: Advances in Imaging and Electron Physics,
|
|
1209
|
+
Ed. P.W. Hawkes, 91, 37-135.
|
|
1210
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1211
|
+
Measures between Probability Density Functions. International
|
|
1212
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1213
|
+
1(4), 300-307.
|
|
1214
|
+
"""
|
|
1215
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1216
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1217
|
+
u[u == 0] = EPSILON
|
|
1218
|
+
v[v == 0] = EPSILON
|
|
1132
1219
|
uvsum = u + v
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
# return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
|
|
1479
|
-
|
|
1480
|
-
# def neyman_chisq(self, u, v):
|
|
1481
|
-
# """Calculate the Neyman chi-square distance between two vectors.
|
|
1482
|
-
|
|
1483
|
-
# Parameters
|
|
1484
|
-
# ----------
|
|
1485
|
-
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1486
|
-
|
|
1487
|
-
# Returns
|
|
1488
|
-
# -------
|
|
1489
|
-
# - The Neyman chi-square distance between the two vectors.
|
|
1490
|
-
|
|
1491
|
-
# References
|
|
1492
|
-
# ----------
|
|
1493
|
-
# 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
|
|
1494
|
-
# In Proceedings of the First Berkley Symposium on Mathematical
|
|
1495
|
-
# Statistics and Probability.
|
|
1496
|
-
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1497
|
-
# Measures between Probability Density Functions. International
|
|
1498
|
-
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1499
|
-
# 1(4), 300-307.
|
|
1500
|
-
# """
|
|
1501
|
-
# u, v = np.asarray(u), np.asarray(v)
|
|
1502
|
-
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1503
|
-
# return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
|
|
1504
|
-
|
|
1505
|
-
# def pearson_chisq(self, u, v):
|
|
1506
|
-
# """Calculate the Pearson chi-square divergence between two vectors.
|
|
1507
|
-
|
|
1508
|
-
# Parameters
|
|
1509
|
-
# ----------
|
|
1510
|
-
# - u, v: Input vectors between which the divergence is to be calculated.
|
|
1511
|
-
|
|
1512
|
-
# Returns
|
|
1513
|
-
# -------
|
|
1514
|
-
# - The Pearson chi-square divergence between the two vectors.
|
|
1515
|
-
|
|
1516
|
-
# References
|
|
1517
|
-
# ----------
|
|
1518
|
-
# 1. Pearson K. (1900) On the Criterion that a given system of
|
|
1519
|
-
# deviations from the probable in the case of correlated system
|
|
1520
|
-
# of variables is such that it can be reasonable supposed to have
|
|
1521
|
-
# arisen from random sampling, Phil. Mag. 50, 157-172.
|
|
1522
|
-
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1523
|
-
# Measures between Probability Density Functions. International
|
|
1524
|
-
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1525
|
-
# 1(4), 300-307.
|
|
1526
|
-
|
|
1527
|
-
# Notes
|
|
1528
|
-
# -----
|
|
1529
|
-
# Pearson chi-square divergence is asymmetric.
|
|
1530
|
-
# """
|
|
1531
|
-
# u, v = np.asarray(u), np.asarray(v)
|
|
1532
|
-
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1533
|
-
# return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
|
|
1534
|
-
|
|
1535
|
-
# def nonintersection(self, u, v):
|
|
1536
|
-
# """
|
|
1537
|
-
# Calculate the Nonintersection distance between two vectors.
|
|
1538
|
-
|
|
1539
|
-
# Parameters
|
|
1540
|
-
# ----------
|
|
1541
|
-
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1542
|
-
|
|
1543
|
-
# Returns
|
|
1544
|
-
# -------
|
|
1545
|
-
# - The Nonintersection distance between the two vectors.
|
|
1546
|
-
|
|
1547
|
-
# References
|
|
1548
|
-
# ----------
|
|
1549
|
-
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1550
|
-
# Measures between Probability Density Functions. International
|
|
1551
|
-
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1552
|
-
# 1(4), 300-307.
|
|
1553
|
-
|
|
1554
|
-
# Notes
|
|
1555
|
-
# -----
|
|
1556
|
-
# When used for comparing two probability density functions (pdfs),
|
|
1557
|
-
# Nonintersection distance equals half of Cityblock distance.
|
|
1558
|
-
# """
|
|
1559
|
-
# u, v = np.asarray(u), np.asarray(v)
|
|
1560
|
-
# return 1 - np.sum(np.minimum(u, v))
|
|
1220
|
+
logarg = np.clip(uvsum / (2 * np.sqrt(u * v)), a_min=EPSILON, a_max=None)
|
|
1221
|
+
return np.sum((uvsum / 2) * np.log(logarg))
|
|
1222
|
+
|
|
1223
|
+
|
|
1224
|
+
def tanimoto(u, v):
|
|
1225
|
+
"""Calculate the Tanimoto distance between two vectors.
|
|
1226
|
+
|
|
1227
|
+
Parameters
|
|
1228
|
+
----------
|
|
1229
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1230
|
+
|
|
1231
|
+
Returns
|
|
1232
|
+
-------
|
|
1233
|
+
- The Tanimoto distance between the two vectors.
|
|
1234
|
+
|
|
1235
|
+
References
|
|
1236
|
+
----------
|
|
1237
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1238
|
+
Measures between Probability Density Functions. International
|
|
1239
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1240
|
+
1(4), 300-307.
|
|
1241
|
+
|
|
1242
|
+
Notes
|
|
1243
|
+
-----
|
|
1244
|
+
Equals Soergel distance.
|
|
1245
|
+
"""
|
|
1246
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1247
|
+
# return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
|
|
1248
|
+
usum = np.sum(u)
|
|
1249
|
+
vsum = np.sum(v)
|
|
1250
|
+
minsum = np.sum(np.minimum(u, v))
|
|
1251
|
+
return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
def topsoe(u, v):
|
|
1255
|
+
"""Calculate the Topsøe distance between two vectors.
|
|
1256
|
+
|
|
1257
|
+
Parameters
|
|
1258
|
+
----------
|
|
1259
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1260
|
+
|
|
1261
|
+
Returns
|
|
1262
|
+
-------
|
|
1263
|
+
- The Topsøe distance between the two vectors.
|
|
1264
|
+
|
|
1265
|
+
References
|
|
1266
|
+
----------
|
|
1267
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1268
|
+
Measures between Probability Density Functions. International
|
|
1269
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1270
|
+
1(4), 300-307.
|
|
1271
|
+
|
|
1272
|
+
Notes
|
|
1273
|
+
-----
|
|
1274
|
+
Equals two times Jensen-Shannon divergence.
|
|
1275
|
+
"""
|
|
1276
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1277
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1278
|
+
u[u == 0] = EPSILON
|
|
1279
|
+
v[v == 0] = EPSILON
|
|
1280
|
+
logarg1 = np.clip(2 * u / (u + v), a_min=EPSILON, a_max=None)
|
|
1281
|
+
logarg2 = np.clip(2 * v / (u + v), a_min=EPSILON, a_max=None)
|
|
1282
|
+
dl = u * np.log(logarg1)
|
|
1283
|
+
dr = v * np.log(logarg2)
|
|
1284
|
+
return np.sum(dl + dr)
|
|
1285
|
+
|
|
1286
|
+
|
|
1287
|
+
def vicis_symmetric_chisq(u, v):
|
|
1288
|
+
"""Calculate the Vicis Symmetric chi-square distance.
|
|
1289
|
+
|
|
1290
|
+
Parameters
|
|
1291
|
+
----------
|
|
1292
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1293
|
+
|
|
1294
|
+
Returns
|
|
1295
|
+
-------
|
|
1296
|
+
- The Vicis Symmetric chi-square distance between the two vectors.
|
|
1297
|
+
|
|
1298
|
+
References
|
|
1299
|
+
----------
|
|
1300
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1301
|
+
Measures between Probability Density Functions. International
|
|
1302
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1303
|
+
1(4), 300-307
|
|
1304
|
+
"""
|
|
1305
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1306
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1307
|
+
u_v = (u - v) ** 2
|
|
1308
|
+
uvmin = np.minimum(u, v) ** 2
|
|
1309
|
+
return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
|
|
1310
|
+
|
|
1311
|
+
|
|
1312
|
+
def vicis_wave_hedges(u, v):
|
|
1313
|
+
"""Calculate the Vicis-Wave Hedges distance between two vectors.
|
|
1314
|
+
|
|
1315
|
+
Parameters
|
|
1316
|
+
----------
|
|
1317
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1318
|
+
|
|
1319
|
+
Returns
|
|
1320
|
+
-------
|
|
1321
|
+
- The Vicis-Wave Hedges distance between the two vectors.
|
|
1322
|
+
|
|
1323
|
+
References
|
|
1324
|
+
----------
|
|
1325
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1326
|
+
Measures between Probability Density Functions. International
|
|
1327
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1328
|
+
1(4), 300-307.
|
|
1329
|
+
"""
|
|
1330
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1331
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1332
|
+
u_v = abs(u - v)
|
|
1333
|
+
uvmin = np.minimum(u, v)
|
|
1334
|
+
return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
|
|
1335
|
+
|
|
1336
|
+
|
|
1337
|
+
# def fidelity(u, v):
|
|
1338
|
+
# """
|
|
1339
|
+
# Calculate the fidelity distance between two vectors.
|
|
1340
|
+
|
|
1341
|
+
# The fidelity distance measures the similarity between two probability
|
|
1342
|
+
# distributions.
|
|
1343
|
+
|
|
1344
|
+
# Parameters
|
|
1345
|
+
# ----------
|
|
1346
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1347
|
+
|
|
1348
|
+
# Returns
|
|
1349
|
+
# -------
|
|
1350
|
+
# - The fidelity distance between the two vectors.
|
|
1351
|
+
|
|
1352
|
+
# Notes
|
|
1353
|
+
# -----
|
|
1354
|
+
# Added by SC.
|
|
1355
|
+
# """
|
|
1356
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1357
|
+
# return 1 - (np.sum(np.sqrt(u * v)))
|
|
1358
|
+
|
|
1359
|
+
# # NEEDS CHECKING
|
|
1360
|
+
# # def harmonicmean(u, v):
|
|
1361
|
+
# # """
|
|
1362
|
+
# # Harmonic mean distance.
|
|
1363
|
+
# # Notes:
|
|
1364
|
+
# # Added by SC.
|
|
1365
|
+
# # """
|
|
1366
|
+
# # u, v = np.asarray(u), np.asarray(v)
|
|
1367
|
+
# # return 1 - 2.0 * np.sum(u * v / (u + v))
|
|
1368
|
+
|
|
1369
|
+
# # def inner(u, v):
|
|
1370
|
+
# # """
|
|
1371
|
+
# # Calculate the inner product distance between two vectors.
|
|
1372
|
+
|
|
1373
|
+
# # The inner product distance is a measure of
|
|
1374
|
+
# # similarity between two vectors,
|
|
1375
|
+
# # based on their inner product.
|
|
1376
|
+
|
|
1377
|
+
# # Parameters
|
|
1378
|
+
# # ----------
|
|
1379
|
+
# # - u, v: Input vectors between which the distance is to be calculated.
|
|
1380
|
+
|
|
1381
|
+
# # Returns
|
|
1382
|
+
# # -------
|
|
1383
|
+
# # - The inner product distance between the two vectors.
|
|
1384
|
+
|
|
1385
|
+
# # Notes
|
|
1386
|
+
# # -----
|
|
1387
|
+
# # Added by SC.
|
|
1388
|
+
# # """
|
|
1389
|
+
# # u, v = np.asarray(u), np.asarray(v)
|
|
1390
|
+
# # return 1 - np.dot(u, v)
|
|
1391
|
+
|
|
1392
|
+
# def k_divergence(u, v):
|
|
1393
|
+
# """Calculate the K divergence between two vectors.
|
|
1394
|
+
|
|
1395
|
+
# Parameters
|
|
1396
|
+
# ----------
|
|
1397
|
+
# - u, v: Input vectors between which the divergence is to be calculated.
|
|
1398
|
+
|
|
1399
|
+
# Returns
|
|
1400
|
+
# -------
|
|
1401
|
+
# - The K divergence between the two vectors.
|
|
1402
|
+
|
|
1403
|
+
# References
|
|
1404
|
+
# ----------
|
|
1405
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1406
|
+
# Measures between Probability Density Functions. International
|
|
1407
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1408
|
+
# 1(4), 300-307.
|
|
1409
|
+
# """
|
|
1410
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1411
|
+
# u[u == 0] = EPSILON
|
|
1412
|
+
# v[v == 0] = EPSILON
|
|
1413
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1414
|
+
# return np.sum(u * np.log(2 * u / (u + v)))
|
|
1415
|
+
|
|
1416
|
+
# def kl_divergence(u, v):
|
|
1417
|
+
# """Calculate the Kullback-Leibler divergence between two vectors.
|
|
1418
|
+
|
|
1419
|
+
# The Kullback-Leibler divergence measures the difference between two
|
|
1420
|
+
# probability distributions.
|
|
1421
|
+
|
|
1422
|
+
# Parameters
|
|
1423
|
+
# ----------
|
|
1424
|
+
# - u, v: Input vectors between which the divergence is to be calculated.
|
|
1425
|
+
|
|
1426
|
+
# Returns
|
|
1427
|
+
# -------
|
|
1428
|
+
# - The Kullback-Leibler divergence between the two vectors.
|
|
1429
|
+
|
|
1430
|
+
# References
|
|
1431
|
+
# ----------
|
|
1432
|
+
# 1. Kullback S, Leibler RA (1951) On information and sufficiency.
|
|
1433
|
+
# Ann. Math. Statist. 22:79–86
|
|
1434
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1435
|
+
# Measures between Probability Density Functions. International
|
|
1436
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1437
|
+
# 1(4):300-307.
|
|
1438
|
+
# """
|
|
1439
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1440
|
+
# u[u == 0] = EPSILON
|
|
1441
|
+
# v[v == 0] = EPSILON
|
|
1442
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1443
|
+
# return np.sum(u * np.log(u / v))
|
|
1444
|
+
|
|
1445
|
+
# def max_symmetric_chisq(u, v):
|
|
1446
|
+
# """Calculate the maximum symmetric chi-square distance.
|
|
1447
|
+
|
|
1448
|
+
# Parameters
|
|
1449
|
+
# ----------
|
|
1450
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1451
|
+
|
|
1452
|
+
# Returns
|
|
1453
|
+
# -------
|
|
1454
|
+
# - The maximum symmetric chi-square distance between the two vectors.
|
|
1455
|
+
|
|
1456
|
+
# References
|
|
1457
|
+
# ----------
|
|
1458
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1459
|
+
# Measures between Probability Density Functions. International
|
|
1460
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1461
|
+
# 1(4):300-307.
|
|
1462
|
+
# """
|
|
1463
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1464
|
+
# return max(neyman_chisq(u, v), pearson_chisq(u, v))
|
|
1465
|
+
|
|
1466
|
+
# def min_symmetric_chisq(u, v):
|
|
1467
|
+
# """Calculate the minimum symmetric chi-square distance.
|
|
1468
|
+
|
|
1469
|
+
# Parameters
|
|
1470
|
+
# ----------
|
|
1471
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1472
|
+
|
|
1473
|
+
# Returns
|
|
1474
|
+
# -------
|
|
1475
|
+
# - The minimum symmetric chi-square distance between the two vectors.
|
|
1476
|
+
|
|
1477
|
+
# Notes
|
|
1478
|
+
# -----
|
|
1479
|
+
# Added by SC.
|
|
1480
|
+
# """
|
|
1481
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1482
|
+
# return min(neyman_chisq(u, v), pearson_chisq(u, v))
|
|
1483
|
+
|
|
1484
|
+
# def neyman_chisq(u, v):
|
|
1485
|
+
# """Calculate the Neyman chi-square distance between two vectors.
|
|
1486
|
+
|
|
1487
|
+
# Parameters
|
|
1488
|
+
# ----------
|
|
1489
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1490
|
+
|
|
1491
|
+
# Returns
|
|
1492
|
+
# -------
|
|
1493
|
+
# - The Neyman chi-square distance between the two vectors.
|
|
1494
|
+
|
|
1495
|
+
# References
|
|
1496
|
+
# ----------
|
|
1497
|
+
# 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
|
|
1498
|
+
# In Proceedings of the First Berkley Symposium on Mathematical
|
|
1499
|
+
# Statistics and Probability.
|
|
1500
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1501
|
+
# Measures between Probability Density Functions. International
|
|
1502
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1503
|
+
# 1(4), 300-307.
|
|
1504
|
+
# """
|
|
1505
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1506
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1507
|
+
# return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
|
|
1508
|
+
|
|
1509
|
+
# def pearson_chisq(u, v):
|
|
1510
|
+
# """Calculate the Pearson chi-square divergence between two vectors.
|
|
1511
|
+
|
|
1512
|
+
# Parameters
|
|
1513
|
+
# ----------
|
|
1514
|
+
# - u, v: Input vectors between which the divergence is to be calculated.
|
|
1515
|
+
|
|
1516
|
+
# Returns
|
|
1517
|
+
# -------
|
|
1518
|
+
# - The Pearson chi-square divergence between the two vectors.
|
|
1519
|
+
|
|
1520
|
+
# References
|
|
1521
|
+
# ----------
|
|
1522
|
+
# 1. Pearson K. (1900) On the Criterion that a given system of
|
|
1523
|
+
# deviations from the probable in the case of correlated system
|
|
1524
|
+
# of variables is such that it can be reasonable supposed to have
|
|
1525
|
+
# arisen from random sampling, Phil. Mag. 50, 157-172.
|
|
1526
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1527
|
+
# Measures between Probability Density Functions. International
|
|
1528
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1529
|
+
# 1(4), 300-307.
|
|
1530
|
+
|
|
1531
|
+
# Notes
|
|
1532
|
+
# -----
|
|
1533
|
+
# Pearson chi-square divergence is asymmetric.
|
|
1534
|
+
# """
|
|
1535
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1536
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1537
|
+
# return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
|
|
1538
|
+
|
|
1539
|
+
# def nonintersection(u, v):
|
|
1540
|
+
# """
|
|
1541
|
+
# Calculate the Nonintersection distance between two vectors.
|
|
1542
|
+
|
|
1543
|
+
# Parameters
|
|
1544
|
+
# ----------
|
|
1545
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1546
|
+
|
|
1547
|
+
# Returns
|
|
1548
|
+
# -------
|
|
1549
|
+
# - The Nonintersection distance between the two vectors.
|
|
1550
|
+
|
|
1551
|
+
# References
|
|
1552
|
+
# ----------
|
|
1553
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1554
|
+
# Measures between Probability Density Functions. International
|
|
1555
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1556
|
+
# 1(4), 300-307.
|
|
1557
|
+
|
|
1558
|
+
# Notes
|
|
1559
|
+
# -----
|
|
1560
|
+
# When used for comparing two probability density functions (pdfs),
|
|
1561
|
+
# Nonintersection distance equals half of Cityblock distance.
|
|
1562
|
+
# """
|
|
1563
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1564
|
+
# return 1 - np.sum(np.minimum(u, v))
|