distclassipy 0.2.0a0__py3-none-any.whl → 0.2.2a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
distclassipy/distances.py CHANGED
@@ -48,1437 +48,1517 @@ import numpy as np
48
48
 
49
49
  import scipy
50
50
 
51
+ # Default epsilon value to avoid division by zero
52
+ EPSILON = np.finfo(float).eps
53
+ _ALL_METRICS = [
54
+ "euclidean",
55
+ "braycurtis",
56
+ "canberra",
57
+ "cityblock",
58
+ "chebyshev",
59
+ "clark",
60
+ "correlation",
61
+ "cosine",
62
+ "hellinger",
63
+ "jaccard",
64
+ "lorentzian",
65
+ "marylandbridge",
66
+ "meehl",
67
+ "motyka",
68
+ "soergel",
69
+ "wave_hedges",
70
+ "kulczynski",
71
+ "add_chisq",
72
+ "acc",
73
+ "chebyshev_min",
74
+ "czekanowski",
75
+ "dice",
76
+ "divergence",
77
+ "google",
78
+ "gower",
79
+ "jeffreys",
80
+ "jensenshannon_divergence",
81
+ "jensen_difference",
82
+ "kumarjohnson",
83
+ "matusita",
84
+ "minkowski",
85
+ "penroseshape",
86
+ "prob_chisq",
87
+ "ruzicka",
88
+ "sorensen",
89
+ "squared_chisq",
90
+ "squaredchord",
91
+ "squared_euclidean",
92
+ "taneja",
93
+ "tanimoto",
94
+ "topsoe",
95
+ "vicis_symmetric_chisq",
96
+ "vicis_wave_hedges",
97
+ ]
98
+
99
+
100
+ def euclidean(u, v, w=None):
101
+ """Calculate the Euclidean distance between two vectors.
102
+
103
+ The Euclidean distance is the "ordinary" straight-line distance between two
104
+ points in Euclidean space.
105
+
106
+ Parameters
107
+ ----------
108
+ - u, v: Input vectors between which the distance is to be calculated.
109
+
110
+ Returns
111
+ -------
112
+ - The Euclidean distance between the two vectors.
113
+
114
+ References
115
+ ----------
116
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
117
+ Measures between Probability Density Functions. International
118
+ Journal of Mathematical Models and Methods in Applied Sciences.
119
+ 1(4), 300-307.
120
+ """
121
+ u, v = np.asarray(u), np.asarray(v)
122
+ return scipy.spatial.distance.euclidean(u, v, w)
123
+
124
+
125
+ def braycurtis(u, v, w=None):
126
+ """Calculate the Bray-Curtis distance between two vectors.
127
+
128
+ The Bray-Curtis distance is a measure of dissimilarity between two non-negative
129
+ vectors, often used in ecology to measure the compositional dissimilarity
130
+ between two sites based on counts of species at both sites. It is closely
131
+ related to the Sørensen distance and is also known as Bray-Curtis
132
+ dissimilarity.
133
+
134
+ Notes
135
+ -----
136
+ When used for comparing two probability density functions (pdfs),
137
+ the Bray-Curtis distance equals the Cityblock distance divided by 2.
138
+
139
+ Parameters
140
+ ----------
141
+ - u, v: Input vectors between which the distance is to be calculated.
142
+
143
+ Returns
144
+ -------
145
+ - The Bray-Curtis distance between the two vectors.
146
+
147
+ References
148
+ ----------
149
+ 1. Bray JR, Curtis JT (1957) An ordination of the upland forest of
150
+ southern Wisconsin. Ecological Monographs, 27, 325-349.
151
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
152
+ Measures between Probability Density Functions. International
153
+ Journal of Mathematical Models and Methods in Applied Sciences.
154
+ 1(4), 300-307.
155
+ 3. https://en.wikipedia.org/wiki/Bray–Curtis_dissimilarity
156
+ """
157
+ u, v = np.asarray(u), np.asarray(v)
158
+ return scipy.spatial.distance.braycurtis(u, v, w)
159
+
160
+
161
+ def canberra(u, v, w=None):
162
+ """Calculate the Canberra distance between two vectors.
163
+
164
+ The Canberra distance is a weighted version of the Manhattan distance, used
165
+ in numerical analysis.
166
+
167
+ Notes
168
+ -----
169
+ When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
170
+ is used in the calculation.
171
+
172
+ Parameters
173
+ ----------
174
+ - u, v: Input vectors between which the distance is to be calculated.
175
+
176
+ Returns
177
+ -------
178
+ - The Canberra distance between the two vectors.
51
179
 
52
- class Distance:
53
- """A class to calculate various distance metrics between vectors.
180
+ References
181
+ ----------
182
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
183
+ Measures between Probability Density Functions. International
184
+ Journal of Mathematical Models and Methods in Applied Sciences.
185
+ 1(4), 300-307.
186
+ """
187
+ u, v = np.asarray(u), np.asarray(v)
188
+ return scipy.spatial.distance.canberra(u, v, w)
54
189
 
55
- This class provides methods to compute different types of distances between
56
- two vectors, such as Euclidean, Manhattan, Canberra, and other statistical
57
- distances. Each method takes two vectors as input and returns the calculated
58
- distance. The class can handle both numpy arrays and lists, converting them
59
- internally to numpy arrays for computation.
60
190
 
61
- Attributes
191
+ def cityblock(u, v, w=None):
192
+ """Calculate the Cityblock (Manhattan) distance between two vectors.
193
+
194
+ Parameters
62
195
  ----------
63
- epsilon : float, optional
64
- A small value to avoid division by zero errors in certain distance
65
- calculations. Default is the machine precision for float data type.
196
+ - u, v: Input vectors between which the distance is to be calculated.
66
197
 
67
- Methods
198
+ Returns
68
199
  -------
69
- acc(u, v)
70
- Returns the average of Cityblock/Manhattan and Chebyshev distances.
71
- add_chisq(u, v)
72
- Returns the Additive Symmetric Chi-square distance.
73
- (Other methods are not listed here for brevity)
74
-
75
- Examples
76
- --------
77
- >>> dist = Distance()
78
- >>> u = [1, 2, 3]
79
- >>> v = [4, 5, 6]
80
- >>> print(dist.acc(u, v))
81
- 5.0
200
+ - The Cityblock distance between the two vectors.
201
+
202
+ References
203
+ ----------
204
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
205
+ Measures between Probability Density Functions. International
206
+ Journal of Mathematical Models and Methods in Applied Sciences.
207
+ 1(4):300-307.
208
+
209
+ Synonyms:
210
+ City block distance
211
+ Manhattan distance
212
+ Rectilinear distance
213
+ Taxicab norm
214
+
215
+ Notes
216
+ -----
217
+ Cityblock distance between two probability density functions
218
+ (pdfs) equals:
219
+ 1. Non-intersection distance multiplied by 2.
220
+ 2. Gower distance multiplied by vector length.
221
+ 3. Bray-Curtis distance multiplied by 2.
222
+ 4. Google distance multiplied by 2.
82
223
  """
224
+ u, v = np.asarray(u), np.asarray(v)
225
+ return scipy.spatial.distance.cityblock(u, v, w)
226
+
227
+
228
+ def chebyshev(u, v, w=None):
229
+ """Calculate the Chebyshev distance between two vectors.
230
+
231
+ The Chebyshev distance is a metric defined on a vector space where the distance
232
+ between two vectors
233
+ is the greatest of their differences along any coordinate dimension.
234
+
235
+ Synonyms:
236
+ Chessboard distance
237
+ King-move metric
238
+ Maximum value distance
239
+ Minimax approximation
240
+
241
+ Parameters
242
+ ----------
243
+ - u, v: Input vectors between which the distance is to be calculated.
244
+
245
+ Returns
246
+ -------
247
+ - The Chebyshev distance between the two vectors.
83
248
 
84
- def __init__(self, epsilon=None):
85
- """Initialize the Distance class with an optional epsilon value.
86
-
87
- Parameters
88
- ----------
89
- - epsilon: A small value to avoid division by zero errors.
90
- """
91
- self.epsilon = np.finfo(float).eps if not epsilon else epsilon
92
-
93
- def euclidean(self, u, v, w=None):
94
- """Calculate the Euclidean distance between two vectors.
95
-
96
- The Euclidean distance is the "ordinary" straight-line distance between two
97
- points in Euclidean space.
98
-
99
- Parameters
100
- ----------
101
- - u, v: Input vectors between which the distance is to be calculated.
102
-
103
- Returns
104
- -------
105
- - The Euclidean distance between the two vectors.
106
-
107
- References
108
- ----------
109
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
110
- Measures between Probability Density Functions. International
111
- Journal of Mathematical Models and Methods in Applied Sciences.
112
- 1(4), 300-307.
113
- """
114
- u, v = np.asarray(u), np.asarray(v)
115
- return scipy.spatial.distance.euclidean(u, v, w)
116
-
117
- def braycurtis(self, u, v, w=None):
118
- """Calculate the Bray-Curtis distance between two vectors.
119
-
120
- The Bray-Curtis distance is a measure of dissimilarity between two non-negative
121
- vectors, often used in ecology to measure the compositional dissimilarity
122
- between two sites based on counts of species at both sites. It is closely
123
- related to the Sørensen distance and is also known as Bray-Curtis
124
- dissimilarity.
125
-
126
- Notes
127
- -----
128
- When used for comparing two probability density functions (pdfs),
129
- the Bray-Curtis distance equals the Cityblock distance divided by 2.
130
-
131
- Parameters
132
- ----------
133
- - u, v: Input vectors between which the distance is to be calculated.
134
-
135
- Returns
136
- -------
137
- - The Bray-Curtis distance between the two vectors.
138
-
139
- References
140
- ----------
141
- 1. Bray JR, Curtis JT (1957) An ordination of the upland forest of
142
- southern Wisconsin. Ecological Monographs, 27, 325-349.
143
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
144
- Measures between Probability Density Functions. International
145
- Journal of Mathematical Models and Methods in Applied Sciences.
146
- 1(4), 300-307.
147
- 3. https://en.wikipedia.org/wiki/Bray–Curtis_dissimilarity
148
- """
149
- u, v = np.asarray(u), np.asarray(v)
150
- return scipy.spatial.distance.braycurtis(u, v, w)
151
-
152
- def canberra(self, u, v, w=None):
153
- """Calculate the Canberra distance between two vectors.
154
-
155
- The Canberra distance is a weighted version of the Manhattan distance, used
156
- in numerical analysis.
157
-
158
- Notes
159
- -----
160
- When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
161
- is used in the calculation.
162
-
163
- Parameters
164
- ----------
165
- - u, v: Input vectors between which the distance is to be calculated.
166
-
167
- Returns
168
- -------
169
- - The Canberra distance between the two vectors.
170
-
171
- References
172
- ----------
173
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
174
- Measures between Probability Density Functions. International
175
- Journal of Mathematical Models and Methods in Applied Sciences.
176
- 1(4), 300-307.
177
- """
178
- u, v = np.asarray(u), np.asarray(v)
179
- return scipy.spatial.distance.canberra(u, v, w)
180
-
181
- def cityblock(self, u, v, w=None):
182
- """Calculate the Cityblock (Manhattan) distance between two vectors.
183
-
184
- Parameters
185
- ----------
186
- - u, v: Input vectors between which the distance is to be calculated.
187
-
188
- Returns
189
- -------
190
- - The Cityblock distance between the two vectors.
191
-
192
- References
193
- ----------
194
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
195
- Measures between Probability Density Functions. International
196
- Journal of Mathematical Models and Methods in Applied Sciences.
197
- 1(4):300-307.
198
-
199
- Synonyms:
200
- City block distance
201
- Manhattan distance
202
- Rectilinear distance
203
- Taxicab norm
204
-
205
- Notes
206
- -----
207
- Cityblock distance between two probability density functions
208
- (pdfs) equals:
209
- 1. Non-intersection distance multiplied by 2.
210
- 2. Gower distance multiplied by vector length.
211
- 3. Bray-Curtis distance multiplied by 2.
212
- 4. Google distance multiplied by 2.
213
- """
214
- u, v = np.asarray(u), np.asarray(v)
215
- return scipy.spatial.distance.cityblock(u, v, w)
216
-
217
- def chebyshev(self, u, v, w=None):
218
- """Calculate the Chebyshev distance between two vectors.
219
-
220
- The Chebyshev distance is a metric defined on a vector space where the distance
221
- between two vectors
222
- is the greatest of their differences along any coordinate dimension.
223
-
224
- Synonyms:
225
- Chessboard distance
226
- King-move metric
227
- Maximum value distance
228
- Minimax approximation
229
-
230
- Parameters
231
- ----------
232
- - u, v: Input vectors between which the distance is to be calculated.
233
-
234
- Returns
235
- -------
236
- - The Chebyshev distance between the two vectors.
237
-
238
- References
239
- ----------
240
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
241
- Measures between Probability Density Functions. International
242
- Journal of Mathematical Models and Methods in Applied Sciences.
243
- 1(4), 300-307.
244
- """
245
- u, v = np.asarray(u), np.asarray(v)
246
- return scipy.spatial.distance.chebyshev(u, v, w)
247
-
248
- def correlation(self, u, v, w=None, centered=True):
249
- """Calculate the Pearson correlation distance between two vectors.
250
-
251
- Returns a distance value between 0 and 2.
252
-
253
- Parameters
254
- ----------
255
- - u, v: Input vectors between which the distance is to be calculated.
256
-
257
- Returns
258
- -------
259
- - The Pearson correlation distance between the two vectors.
260
- """
261
- u, v = np.asarray(u), np.asarray(v)
262
- if len(u) < 2 or len(v) < 2:
249
+ References
250
+ ----------
251
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
252
+ Measures between Probability Density Functions. International
253
+ Journal of Mathematical Models and Methods in Applied Sciences.
254
+ 1(4), 300-307.
255
+ """
256
+ u, v = np.asarray(u), np.asarray(v)
257
+ return scipy.spatial.distance.chebyshev(u, v, w)
258
+
259
+
260
+ def correlation(u, v, w=None, centered=True):
261
+ """Calculate the Pearson correlation distance between two vectors.
262
+
263
+ Returns a distance value between 0 and 2.
264
+
265
+ Parameters
266
+ ----------
267
+ - u, v: Input vectors between which the distance is to be calculated.
268
+
269
+ Returns
270
+ -------
271
+ - The Pearson correlation distance between the two vectors.
272
+ """
273
+ u, v = np.asarray(u), np.asarray(v)
274
+ if len(u) < 2 or len(v) < 2:
275
+ warnings.warn(
276
+ "Pearson correlation requires vectors of length at least 2.",
277
+ RuntimeWarning,
278
+ )
279
+ d = 0
280
+ else:
281
+ d = scipy.spatial.distance.correlation(u, v, w, centered)
282
+ if np.isnan(d) and (
283
+ np.allclose(u - np.mean(u), 0) or np.allclose(v - np.mean(v), 0)
284
+ ):
263
285
  warnings.warn(
264
- "Pearson correlation requires vectors of length at least 2.",
286
+ "One of the vectors is constant; correlation is set to 0",
265
287
  RuntimeWarning,
266
288
  )
267
289
  d = 0
268
- else:
269
- d = scipy.spatial.distance.correlation(u, v, w, centered)
270
- if np.isnan(d) and (
271
- np.allclose(u - np.mean(u), 0) or np.allclose(v - np.mean(v), 0)
272
- ):
273
- warnings.warn(
274
- "One of the vectors is constant; correlation is set to 0",
275
- RuntimeWarning,
276
- )
277
- d = 0
278
- return d
279
-
280
- def cosine(self, u, v, w=None):
281
- """Calculate the cosine distance between two vectors.
282
-
283
- Parameters
284
- ----------
285
- - u, v: Input vectors between which the distance is to be calculated.
286
-
287
- Returns
288
- -------
289
- - The cosine distance between the two vectors.
290
-
291
- References
292
- ----------
293
- 1. SciPy.
294
- """
295
- u, v = np.asarray(u), np.asarray(v)
296
- return scipy.spatial.distance.cosine(u, v, w)
297
-
298
- def clark(self, u, v):
299
- """Calculate the Clark distance between two vectors.
300
-
301
- The Clark distance equals the square root of half of the divergence.
302
-
303
- Notes
304
- -----
305
- When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
306
- is used in the calculation.
307
-
308
- Parameters
309
- ----------
310
- - u, v: Input vectors between which the distance is to be calculated.
311
-
312
- Returns
313
- -------
314
- - The Clark distance between the two vectors.
315
-
316
- References
317
- ----------
318
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
319
- Measures between Probability Density Functions. International
320
- Journal of Mathematical Models and Methods in Applied Sciences.
321
- 1(4), 300-307.
322
- """
323
- u, v = np.asarray(u), np.asarray(v)
324
- with np.errstate(divide="ignore", invalid="ignore"):
325
- return np.sqrt(np.nansum(np.power(np.abs(u - v) / (u + v), 2)))
326
-
327
- def hellinger(self, u, v):
328
- """Calculate the Hellinger distance between two vectors.
329
-
330
- The Hellinger distance is a measure of similarity between two probability
331
- distributions.
332
-
333
- Parameters
334
- ----------
335
- - u, v: Input vectors between which the distance is to be calculated.
336
-
337
- Returns
338
- -------
339
- - The Hellinger distance between the two vectors.
340
-
341
- Notes
342
- -----
343
- This implementation produces values two times larger than values
344
- obtained by Hellinger distance described in Wikipedia and also
345
- in https://gist.github.com/larsmans/3116927.
346
-
347
- References
348
- ----------
349
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
350
- Measures between Probability Density Functions. International
351
- Journal of Mathematical Models and Methods in Applied Sciences.
352
- 1(4), 300-307.
353
- """
354
- u, v = np.asarray(u), np.asarray(v)
290
+ return d
291
+
292
+
293
+ def cosine(u, v, w=None):
294
+ """Calculate the cosine distance between two vectors.
295
+
296
+ Parameters
297
+ ----------
298
+ - u, v: Input vectors between which the distance is to be calculated.
299
+
300
+ Returns
301
+ -------
302
+ - The cosine distance between the two vectors.
303
+
304
+ References
305
+ ----------
306
+ 1. SciPy.
307
+ """
308
+ u, v = np.asarray(u), np.asarray(v)
309
+ return scipy.spatial.distance.cosine(u, v, w)
310
+
311
+
312
+ def clark(u, v):
313
+ """Calculate the Clark distance between two vectors.
314
+
315
+ The Clark distance equals the square root of half of the divergence.
316
+
317
+ Notes
318
+ -----
319
+ When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
320
+ is used in the calculation.
321
+
322
+ Parameters
323
+ ----------
324
+ - u, v: Input vectors between which the distance is to be calculated.
325
+
326
+ Returns
327
+ -------
328
+ - The Clark distance between the two vectors.
329
+
330
+ References
331
+ ----------
332
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
333
+ Measures between Probability Density Functions. International
334
+ Journal of Mathematical Models and Methods in Applied Sciences.
335
+ 1(4), 300-307.
336
+ """
337
+ u, v = np.asarray(u), np.asarray(v)
338
+ with np.errstate(divide="ignore", invalid="ignore"):
339
+ return np.sqrt(np.nansum(np.power(np.abs(u - v) / (u + v), 2)))
340
+
341
+
342
+ def hellinger(u, v):
343
+ """Calculate the Hellinger distance between two vectors.
344
+
345
+ The Hellinger distance is a measure of similarity between two probability
346
+ distributions.
347
+
348
+ Parameters
349
+ ----------
350
+ - u, v: Input vectors between which the distance is to be calculated.
351
+
352
+ Returns
353
+ -------
354
+ - The Hellinger distance between the two vectors.
355
+
356
+ Notes
357
+ -----
358
+ This implementation produces values two times larger than values
359
+ obtained by Hellinger distance described in Wikipedia and also
360
+ in https://gist.github.com/larsmans/3116927.
361
+
362
+ References
363
+ ----------
364
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
365
+ Measures between Probability Density Functions. International
366
+ Journal of Mathematical Models and Methods in Applied Sciences.
367
+ 1(4), 300-307.
368
+ """
369
+ u, v = np.asarray(u), np.asarray(v)
370
+ # Clip negative values to zero for valid sqrt
371
+ with np.errstate(divide="ignore", invalid="ignore"):
372
+ u = np.clip(u, a_min=0, a_max=None)
373
+ v = np.clip(v, a_min=0, a_max=None)
355
374
  return np.sqrt(2 * np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
356
375
 
357
- def jaccard(self, u, v):
358
- """Calculate the Jaccard distance between two vectors.
359
-
360
- The Jaccard distance measures dissimilarity between sample sets.
361
-
362
- Parameters
363
- ----------
364
- - u, v: Input vectors between which the distance is to be calculated.
365
-
366
- Returns
367
- -------
368
- - The Jaccard distance between the two vectors.
369
-
370
- References
371
- ----------
372
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
373
- Measures between Probability Density Functions. International
374
- Journal of Mathematical Models and Methods in Applied Sciences.
375
- 1(4), 300-307.
376
- """
377
- u, v = np.asarray(u), np.asarray(v)
378
- uv = np.dot(u, v)
379
- return 1 - (uv / (np.dot(u, u) + np.dot(v, v) - uv))
380
-
381
- def lorentzian(self, u, v):
382
- """Calculate the Lorentzian distance between two vectors.
383
-
384
- Parameters
385
- ----------
386
- - u, v: Input vectors between which the distance is to be calculated.
387
-
388
- Returns
389
- -------
390
- - The Lorentzian distance between the two vectors.
391
-
392
- References
393
- ----------
394
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
395
- Measures between Probability Density Functions. International
396
- Journal of Mathematical Models and Methods in Applied Sciences.
397
- 1(4):300-307.
398
-
399
- Notes
400
- -----
401
- One (1) is added to guarantee the non-negativity property and to
402
- eschew the log of zero.
403
- """
404
- u, v = np.asarray(u), np.asarray(v)
376
+
377
+ def jaccard(u, v):
378
+ """Calculate the Jaccard distance between two vectors.
379
+
380
+ The Jaccard distance measures dissimilarity between sample sets.
381
+
382
+ Parameters
383
+ ----------
384
+ - u, v: Input vectors between which the distance is to be calculated.
385
+
386
+ Returns
387
+ -------
388
+ - The Jaccard distance between the two vectors.
389
+
390
+ References
391
+ ----------
392
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
393
+ Measures between Probability Density Functions. International
394
+ Journal of Mathematical Models and Methods in Applied Sciences.
395
+ 1(4), 300-307.
396
+ """
397
+ u, v = np.asarray(u), np.asarray(v)
398
+ uv = np.dot(u, v)
399
+ return 1 - (uv / (np.dot(u, u) + np.dot(v, v) - uv))
400
+
401
+
402
+ def lorentzian(u, v):
403
+ """Calculate the Lorentzian distance between two vectors.
404
+
405
+ Parameters
406
+ ----------
407
+ - u, v: Input vectors between which the distance is to be calculated.
408
+
409
+ Returns
410
+ -------
411
+ - The Lorentzian distance between the two vectors.
412
+
413
+ References
414
+ ----------
415
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
416
+ Measures between Probability Density Functions. International
417
+ Journal of Mathematical Models and Methods in Applied Sciences.
418
+ 1(4):300-307.
419
+
420
+ Notes
421
+ -----
422
+ One (1) is added to guarantee the non-negativity property and to
423
+ eschew the log of zero.
424
+ """
425
+ u, v = np.asarray(u), np.asarray(v)
426
+ with np.errstate(divide="ignore", invalid="ignore"):
405
427
  return np.sum(np.log(np.abs(u - v) + 1))
406
428
 
407
- def marylandbridge(self, u, v):
408
- """Calculate the Maryland Bridge distance between two vectors.
409
-
410
- Parameters
411
- ----------
412
- - u, v: Input vectors between which the distance is to be calculated.
413
-
414
- Returns
415
- -------
416
- - The Maryland Bridge distance between the two vectors.
417
-
418
- References
419
- ----------
420
- 1. Deza M, Deza E (2009) Encyclopedia of Distances.
421
- Springer-Verlag Berlin Heidelberg. 1-590.
422
- """
423
- u, v = np.asarray(u), np.asarray(v)
424
- uvdot = np.dot(u, v)
425
- return 1 - (uvdot / np.dot(u, u) + uvdot / np.dot(v, v)) / 2
426
-
427
- def meehl(self, u, v):
428
- """Calculate the Meehl distance between two vectors.
429
-
430
- Parameters
431
- ----------
432
- - u, v: Input vectors between which the distance is to be calculated.
433
-
434
- Returns
435
- -------
436
- - The Meehl distance between the two vectors.
437
-
438
- Notes
439
- -----
440
- Added by SC.
441
-
442
- References
443
- ----------
444
- 1. Deza M. and Deza E. (2013) Encyclopedia of Distances.
445
- Berlin, Heidelberg: Springer Berlin Heidelberg.
446
- https://doi.org/10.1007/978-3-642-30958-8.
447
- """
448
- u, v = np.asarray(u), np.asarray(v)
449
-
450
- xi = u[:-1]
451
- yi = v[:-1]
452
- xiplus1 = np.roll(u, 1)[:-1]
453
- yiplus1 = np.roll(v, 1)[:-1]
454
-
455
- with np.errstate(divide="ignore", invalid="ignore"):
456
- return np.nansum((xi - yi - xiplus1 + yiplus1) ** 2)
457
-
458
- def motyka(self, u, v):
459
- """Calculate the Motyka distance between two vectors.
460
-
461
- Parameters
462
- ----------
463
- - u, v: Input vectors between which the distance is to be calculated.
464
-
465
- Returns
466
- -------
467
- - The Motyka distance between the two vectors.
468
-
469
- Notes
470
- -----
471
- The distance between identical vectors is not equal to 0 but 0.5.
472
-
473
- References
474
- ----------
475
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
476
- Measures between Probability Density Functions. International
477
- Journal of Mathematical Models and Methods in Applied Sciences.
478
- 1(4), 300-307.
479
- """
480
- u, v = np.asarray(u), np.asarray(v)
481
- return np.sum(np.maximum(u, v)) / np.sum(u + v)
482
-
483
- def soergel(self, u, v):
484
- """Calculate the Soergel distance between two vectors.
485
-
486
- Parameters
487
- ----------
488
- - u, v: Input vectors between which the distance is to be calculated.
489
-
490
- Returns
491
- -------
492
- - The Soergel distance between the two vectors.
493
-
494
- Notes
495
- -----
496
- Equals Tanimoto distance.
497
-
498
- References
499
- ----------
500
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
501
- Measures between Probability Density Functions. International
502
- Journal of Mathematical Models and Methods in Applied Sciences.
503
- 1(4), 300-307.
504
- """
505
- u, v = np.asarray(u), np.asarray(v)
506
- return np.sum(np.abs(u - v)) / np.sum(np.maximum(u, v))
507
-
508
- def wave_hedges(self, u, v):
509
- """Calculate the Wave Hedges distance between two vectors.
510
-
511
- Parameters
512
- ----------
513
- - u, v: Input vectors between which the distance is to be calculated.
514
-
515
- Returns
516
- -------
517
- - The Wave Hedges distance between the two vectors.
518
-
519
- References
520
- ----------
521
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
522
- Measures between Probability Density Functions. International
523
- Journal of Mathematical Models and Methods in Applied Sciences.
524
- 1(4), 300-307
525
- """
526
- u, v = np.asarray(u), np.asarray(v)
527
- with np.errstate(divide="ignore", invalid="ignore"):
528
- u_v = abs(u - v)
529
- uvmax = np.maximum(u, v)
530
- return np.sum(np.where(((u_v != 0) & (uvmax != 0)), u_v / uvmax, 0))
531
-
532
- def kulczynski(self, u, v):
533
- """Calculate the Kulczynski distance between two vectors.
534
-
535
- Parameters
536
- ----------
537
- - u, v: Input vectors between which the distance is to be calculated.
538
-
539
- Returns
540
- -------
541
- - The Kulczynski distance between the two vectors.
542
-
543
- References
544
- ----------
545
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
546
- Measures between Probability Density Functions. International
547
- Journal of Mathematical Models and Methods in Applied Sciences.
548
- 1(4):300-307.
549
- """
550
- u, v = np.asarray(u), np.asarray(v)
551
- return np.sum(np.abs(u - v)) / np.sum(np.minimum(u, v))
552
-
553
- def add_chisq(self, u, v):
554
- """Compute the Additive Symmetric Chi-square distance between two vectors.
555
-
556
- The Additive Symmetric Chi-square distance is a measure that
557
- can be used to compare two vectors. This function calculates it based
558
- on the input vectors u and v.
559
-
560
- Parameters
561
- ----------
562
- - u, v: Input vectors between which the distance is to be calculated.
563
-
564
- Returns
565
- -------
566
- - The Additive Symmetric Chi-square distance between the two vectors.
567
-
568
- References
569
- ----------
570
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
571
- Measures between Probability Density Functions.
572
- International Journal of Mathematical Models and Methods in
573
- Applied Sciences.
574
- vol. 1(4), pp. 300-307.
575
- """
576
- u, v = np.asarray(u), np.asarray(v)
577
- uvmult = u * v
578
- with np.errstate(divide="ignore", invalid="ignore"):
579
- return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
580
-
581
- # NOTE: THE FOLLOWING DISTANCES HAVE NOT BEEN TESTED EXTENSIVELY AND ARE
582
- # CURRENTLY IN ALPHA AND SO HAVE BEEN COMMENTED
583
-
584
- def acc(self, u, v):
585
- """Calculate the average of Cityblock and Chebyshev distance.
586
-
587
- This function computes the ACC distance, also known as the
588
- Average distance, between two vectors u and v. It is the average of the
589
- Cityblock (or Manhattan) and Chebyshev distances.
590
-
591
- Parameters
592
- ----------
593
- - u, v: Input vectors between which the distance is to be calculated.
594
-
595
- Returns
596
- -------
597
- - The ACC distance between the two vectors.
598
-
599
- References
600
- ----------
601
- 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
602
- Geometry. Dover Publications.
603
- 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
604
- Measures between Probability Density Functions. International
605
- Journal of Mathematical Models and Methods in Applied Sciences.
606
- vol. 1(4), pp. 300-307.
607
- """
608
- return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
609
-
610
- # def bhattacharyya(self, u, v):
611
- # """
612
- # Calculate the Bhattacharyya distance between two vectors.
613
-
614
- # Returns a distance value between 0 and 1.
615
-
616
- # Parameters
617
- # ----------
618
- # - u, v: Input vectors between which the distance is to be calculated.
619
-
620
- # Returns
621
- # -------
622
- # - The Bhattacharyya distance between the two vectors.
623
-
624
- # References
625
- # ----------
626
- # 1. Bhattacharyya A (1947) On a measure of divergence between two
627
- # statistical populations defined by probability distributions,
628
- # Bull. Calcutta Math. Soc., 35, 99–109.
629
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
630
- # Measures between Probability Density Functions. International
631
- # Journal of Mathematical Models and Methods in Applied Sciences.
632
- # 1(4), 300-307.
633
- # 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
634
- # """
635
- # u, v = np.asarray(u), np.asarray(v)
636
- # return -np.log(np.sum(np.sqrt(u * v)))
637
-
638
- def chebyshev_min(self, u, v):
639
- """Calculate the minimum value distance between two vectors.
640
-
641
- This measure represents a custom approach by Zielezinski to distance
642
- measurement, focusing on the minimum absolute difference.
643
-
644
- Parameters
645
- ----------
646
- - u, v: Input vectors between which the distance is to be calculated.
647
-
648
- Returns
649
- -------
650
- - The minimum value distance between the two vectors.
651
- """
652
- u, v = np.asarray(u), np.asarray(v)
653
- return np.amin(np.abs(u - v))
654
-
655
- def czekanowski(self, u, v):
656
- """Calculate the Czekanowski distance between two vectors.
657
-
658
- Parameters
659
- ----------
660
- - u, v: Input vectors between which the distance is to be calculated.
661
-
662
- Returns
663
- -------
664
- - The Czekanowski distance between the two vectors.
665
-
666
- References
667
- ----------
668
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
669
- Measures between Probability Density Functions. International
670
- Journal of Mathematical Models and Methods in Applied Sciences.
671
- 1(4), 300-307.
672
- """
673
- u, v = np.asarray(u), np.asarray(v)
674
- return np.sum(np.abs(u - v)) / np.sum(u + v)
675
-
676
- def dice(self, u, v):
677
- """Calculate the Dice dissimilarity between two vectors.
678
-
679
- Synonyms:
680
- Sorensen distance
681
-
682
- Parameters
683
- ----------
684
- - u, v: Input vectors between which the distance is to be calculated.
685
-
686
- Returns
687
- -------
688
- - The Dice dissimilarity between the two vectors.
689
-
690
- References
691
- ----------
692
- 1. Dice LR (1945) Measures of the amount of ecologic association
693
- between species. Ecology. 26, 297-302.
694
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
695
- Measures between Probability Density Functions. International
696
- Journal of Mathematical Models and Methods in Applied Sciences.
697
- 1(4), 300-307.
698
- """
699
- u, v = np.asarray(u), np.asarray(v)
700
- u_v = u - v
701
- return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
702
-
703
- def divergence(self, u, v):
704
- """Calculate the divergence between two vectors.
705
-
706
- Divergence equals squared Clark distance multiplied by 2.
707
-
708
- Parameters
709
- ----------
710
- - u, v: Input vectors between which the distance is to be calculated.
711
-
712
- Returns
713
- -------
714
- - The divergence between the two vectors.
715
-
716
- References
717
- ----------
718
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
719
- Measures between Probability Density Functions. International
720
- Journal of Mathematical Models and Methods in Applied Sciences.
721
- 1(4), 300-307.
722
- """
723
- u, v = np.asarray(u), np.asarray(v)
724
- with np.errstate(invalid="ignore"):
725
- return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
726
-
727
- def google(self, u, v):
728
- """Calculate the Normalized Google Distance (NGD) between two vectors.
729
-
730
- NGD is a measure of similarity derived from the number of hits returned by the
731
- Google search engine for a given set of keywords.
732
-
733
- Parameters
734
- ----------
735
- - u, v: Input vectors between which the distance is to be calculated.
736
-
737
- Returns
738
- -------
739
- - The Normalized Google Distance between the two vectors.
740
-
741
- Notes
742
- -----
743
- When used for comparing two probability density functions (pdfs),
744
- Google distance equals half of Cityblock distance.
745
-
746
- References
747
- ----------
748
- 1. Lee & Rashid (2008) Information Technology, ITSim 2008.
749
- doi:10.1109/ITSIM.2008.4631601.
750
- """
751
- u, v = np.asarray(u), np.asarray(v)
752
- x = float(np.sum(u))
753
- y = float(np.sum(v))
754
- summin = float(np.sum(np.minimum(u, v)))
755
- return (max([x, y]) - summin) / ((x + y) - min([x, y]))
756
-
757
- def gower(self, u, v):
758
- """Calculate the Gower distance between two vectors.
759
-
760
- The Gower distance equals the Cityblock distance divided by the vector length.
761
-
762
- Parameters
763
- ----------
764
- - u, v: Input vectors between which the distance is to be calculated.
765
-
766
- Returns
767
- -------
768
- - The Gower distance between the two vectors.
769
-
770
- References
771
- ----------
772
- 1. Gower JC. (1971) General Coefficient of Similarity
773
- and Some of Its Properties, Biometrics 27, 857-874.
774
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
775
- Measures between Probability Density Functions. International
776
- Journal of Mathematical Models and Methods in Applied Sciences.
777
- 1(4), 300-307.
778
- """
779
- u, v = np.asarray(u), np.asarray(v)
780
- return np.sum(np.abs(u - v)) / u.size
781
-
782
- def jeffreys(self, u, v):
783
- """Calculate the Jeffreys divergence between two vectors.
784
-
785
- The Jeffreys divergence is a symmetric version of the Kullback-Leibler
786
- divergence.
787
-
788
- Parameters
789
- ----------
790
- - u, v: Input vectors between which the divergence is to be calculated.
791
-
792
- Returns
793
- -------
794
- - The Jeffreys divergence between the two vectors.
795
-
796
- References
797
- ----------
798
- 1. Jeffreys H (1946) An Invariant Form for the Prior Probability
799
- in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
800
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
801
- Measures between Probability Density Functions. International
802
- Journal of Mathematical Models and Methods in Applied Sciences.
803
- 1(4), 300-307.
804
- """
805
- u, v = np.asarray(u), np.asarray(v)
806
- # Add epsilon to zeros in vectors to avoid division
807
- # by 0 and/or log of 0. Alternatively, zeros in the
808
- # vectors could be ignored or masked (see below).
809
- # u = ma.masked_where(u == 0, u)
810
- # v = ma.masked_where(v == 0, u)
811
- u = np.where(u == 0, self.epsilon, u)
812
- v = np.where(v == 0, self.epsilon, v)
813
- return np.sum((u - v) * np.log(u / v))
814
-
815
- def jensenshannon_divergence(self, u, v):
816
- """Calculate the Jensen-Shannon divergence between two vectors.
817
-
818
- The Jensen-Shannon divergence is a symmetric and finite measure of similarity
819
- between two probability distributions.
820
-
821
- Parameters
822
- ----------
823
- - u, v: Input vectors between which the divergence is to be calculated.
824
-
825
- Returns
826
- -------
827
- - The Jensen-Shannon divergence between the two vectors.
828
-
829
- References
830
- ----------
831
- 1. Lin J. (1991) Divergence measures based on the Shannon entropy.
832
- IEEE Transactions on Information Theory, 37(1):145–151.
833
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
834
- Measures between Probability Density Functions. International
835
- Journal of Mathematical Models and Methods in Applied Sciences.
836
- 1(4), 300-307.
837
- Comments:
838
- Equals Jensen difference in Sung-Hyuk (2007):
839
- u = np.where(u==0, self.epsilon, u)
840
- v = np.where(v==0, self.epsilon, v)
841
- el1 = (u * np.log(u) + v * np.log(v)) / 2
842
- el2 = (u + v)/2
843
- el3 = np.log(el2)
844
- return np.sum(el1 - el2 * el3)
845
- """
846
- u, v = np.asarray(u), np.asarray(v)
847
- u = np.where(u == 0, self.epsilon, u)
848
- v = np.where(v == 0, self.epsilon, v)
849
- dl = u * np.log(2 * u / (u + v))
850
- dr = v * np.log(2 * v / (u + v))
429
+
430
+ def marylandbridge(u, v):
431
+ """Calculate the Maryland Bridge distance between two vectors.
432
+
433
+ Parameters
434
+ ----------
435
+ - u, v: Input vectors between which the distance is to be calculated.
436
+
437
+ Returns
438
+ -------
439
+ - The Maryland Bridge distance between the two vectors.
440
+
441
+ References
442
+ ----------
443
+ 1. Deza M, Deza E (2009) Encyclopedia of Distances.
444
+ Springer-Verlag Berlin Heidelberg. 1-590.
445
+ """
446
+ u, v = np.asarray(u), np.asarray(v)
447
+ uvdot = np.dot(u, v)
448
+ return 1 - (uvdot / np.dot(u, u) + uvdot / np.dot(v, v)) / 2
449
+
450
+
451
+ def meehl(u, v):
452
+ """Calculate the Meehl distance between two vectors.
453
+
454
+ Parameters
455
+ ----------
456
+ - u, v: Input vectors between which the distance is to be calculated.
457
+
458
+ Returns
459
+ -------
460
+ - The Meehl distance between the two vectors.
461
+
462
+ Notes
463
+ -----
464
+ Added by SC.
465
+
466
+ References
467
+ ----------
468
+ 1. Deza M. and Deza E. (2013) Encyclopedia of Distances.
469
+ Berlin, Heidelberg: Springer Berlin Heidelberg.
470
+ https://doi.org/10.1007/978-3-642-30958-8.
471
+ """
472
+ u, v = np.asarray(u), np.asarray(v)
473
+
474
+ xi = u[:-1]
475
+ yi = v[:-1]
476
+ xiplus1 = np.roll(u, 1)[:-1]
477
+ yiplus1 = np.roll(v, 1)[:-1]
478
+
479
+ with np.errstate(divide="ignore", invalid="ignore"):
480
+ return np.nansum((xi - yi - xiplus1 + yiplus1) ** 2)
481
+
482
+
483
+ def motyka(u, v):
484
+ """Calculate the Motyka distance between two vectors.
485
+
486
+ Parameters
487
+ ----------
488
+ - u, v: Input vectors between which the distance is to be calculated.
489
+
490
+ Returns
491
+ -------
492
+ - The Motyka distance between the two vectors.
493
+
494
+ Notes
495
+ -----
496
+ The distance between identical vectors is not equal to 0 but 0.5.
497
+
498
+ References
499
+ ----------
500
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
501
+ Measures between Probability Density Functions. International
502
+ Journal of Mathematical Models and Methods in Applied Sciences.
503
+ 1(4), 300-307.
504
+ """
505
+ u, v = np.asarray(u), np.asarray(v)
506
+ return np.sum(np.maximum(u, v)) / np.sum(u + v)
507
+
508
+
509
+ def soergel(u, v):
510
+ """Calculate the Soergel distance between two vectors.
511
+
512
+ Parameters
513
+ ----------
514
+ - u, v: Input vectors between which the distance is to be calculated.
515
+
516
+ Returns
517
+ -------
518
+ - The Soergel distance between the two vectors.
519
+
520
+ Notes
521
+ -----
522
+ Equals Tanimoto distance.
523
+
524
+ References
525
+ ----------
526
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
527
+ Measures between Probability Density Functions. International
528
+ Journal of Mathematical Models and Methods in Applied Sciences.
529
+ 1(4), 300-307.
530
+ """
531
+ u, v = np.asarray(u), np.asarray(v)
532
+ return np.sum(np.abs(u - v)) / np.sum(np.maximum(u, v))
533
+
534
+
535
+ def wave_hedges(u, v):
536
+ """Calculate the Wave Hedges distance between two vectors.
537
+
538
+ Parameters
539
+ ----------
540
+ - u, v: Input vectors between which the distance is to be calculated.
541
+
542
+ Returns
543
+ -------
544
+ - The Wave Hedges distance between the two vectors.
545
+
546
+ References
547
+ ----------
548
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
549
+ Measures between Probability Density Functions. International
550
+ Journal of Mathematical Models and Methods in Applied Sciences.
551
+ 1(4), 300-307
552
+ """
553
+ u, v = np.asarray(u), np.asarray(v)
554
+ with np.errstate(divide="ignore", invalid="ignore"):
555
+ u_v = abs(u - v)
556
+ uvmax = np.maximum(u, v)
557
+ return np.sum(np.where(((u_v != 0) & (uvmax != 0)), u_v / uvmax, 0))
558
+
559
+
560
+ def kulczynski(u, v):
561
+ """Calculate the Kulczynski distance between two vectors.
562
+
563
+ Parameters
564
+ ----------
565
+ - u, v: Input vectors between which the distance is to be calculated.
566
+
567
+ Returns
568
+ -------
569
+ - The Kulczynski distance between the two vectors.
570
+
571
+ References
572
+ ----------
573
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
574
+ Measures between Probability Density Functions. International
575
+ Journal of Mathematical Models and Methods in Applied Sciences.
576
+ 1(4):300-307.
577
+ """
578
+ u, v = np.asarray(u), np.asarray(v)
579
+ return np.sum(np.abs(u - v)) / np.sum(np.minimum(u, v))
580
+
581
+
582
+ def add_chisq(u, v):
583
+ """Compute the Additive Symmetric Chi-square distance between two vectors.
584
+
585
+ The Additive Symmetric Chi-square distance is a measure that
586
+ can be used to compare two vectors. This function calculates it based
587
+ on the input vectors u and v.
588
+
589
+ Parameters
590
+ ----------
591
+ - u, v: Input vectors between which the distance is to be calculated.
592
+
593
+ Returns
594
+ -------
595
+ - The Additive Symmetric Chi-square distance between the two vectors.
596
+
597
+ References
598
+ ----------
599
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
600
+ Measures between Probability Density Functions.
601
+ International Journal of Mathematical Models and Methods in
602
+ Applied Sciences.
603
+ vol. 1(4), pp. 300-307.
604
+ """
605
+ u, v = np.asarray(u), np.asarray(v)
606
+ uvmult = u * v
607
+ with np.errstate(divide="ignore", invalid="ignore"):
608
+ return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
609
+
610
+
611
+ # NOTE: THE FOLLOWING DISTANCES HAVE NOT BEEN TESTED EXTENSIVELY AND ARE
612
+ # CURRENTLY IN ALPHA AND SO HAVE BEEN COMMENTED
613
+
614
+
615
+ def acc(u, v):
616
+ """Calculate the average of Cityblock and Chebyshev distance.
617
+
618
+ This function computes the ACC distance, also known as the
619
+ Average distance, between two vectors u and v. It is the average of the
620
+ Cityblock (or Manhattan) and Chebyshev distances.
621
+
622
+ Parameters
623
+ ----------
624
+ - u, v: Input vectors between which the distance is to be calculated.
625
+
626
+ Returns
627
+ -------
628
+ - The ACC distance between the two vectors.
629
+
630
+ References
631
+ ----------
632
+ 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
633
+ Geometry. Dover Publications.
634
+ 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
635
+ Measures between Probability Density Functions. International
636
+ Journal of Mathematical Models and Methods in Applied Sciences.
637
+ vol. 1(4), pp. 300-307.
638
+ """
639
+ return (cityblock(u, v) + chebyshev(u, v)) / 2
640
+
641
+
642
+ # def bhattacharyya(u, v):
643
+ # """
644
+ # Calculate the Bhattacharyya distance between two vectors.
645
+
646
+ # Returns a distance value between 0 and 1.
647
+
648
+ # Parameters
649
+ # ----------
650
+ # - u, v: Input vectors between which the distance is to be calculated.
651
+
652
+ # Returns
653
+ # -------
654
+ # - The Bhattacharyya distance between the two vectors.
655
+
656
+ # References
657
+ # ----------
658
+ # 1. Bhattacharyya A (1947) On a measure of divergence between two
659
+ # statistical populations defined by probability distributions,
660
+ # Bull. Calcutta Math. Soc., 35, 99–109.
661
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
662
+ # Measures between Probability Density Functions. International
663
+ # Journal of Mathematical Models and Methods in Applied Sciences.
664
+ # 1(4), 300-307.
665
+ # 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
666
+ # """
667
+ # u, v = np.asarray(u), np.asarray(v)
668
+ # with np.errstate(divide="ignore", invalid="ignore"):
669
+ # return -np.log(np.sum(np.sqrt(u * v)))
670
+
671
+
672
+ def chebyshev_min(u, v):
673
+ """Calculate the minimum value distance between two vectors.
674
+
675
+ This measure represents a custom approach by Zielezinski to distance
676
+ measurement, focusing on the minimum absolute difference.
677
+
678
+ Parameters
679
+ ----------
680
+ - u, v: Input vectors between which the distance is to be calculated.
681
+
682
+ Returns
683
+ -------
684
+ - The minimum value distance between the two vectors.
685
+ """
686
+ u, v = np.asarray(u), np.asarray(v)
687
+ return np.amin(np.abs(u - v))
688
+
689
+
690
+ def czekanowski(u, v):
691
+ """Calculate the Czekanowski distance between two vectors.
692
+
693
+ Parameters
694
+ ----------
695
+ - u, v: Input vectors between which the distance is to be calculated.
696
+
697
+ Returns
698
+ -------
699
+ - The Czekanowski distance between the two vectors.
700
+
701
+ References
702
+ ----------
703
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
704
+ Measures between Probability Density Functions. International
705
+ Journal of Mathematical Models and Methods in Applied Sciences.
706
+ 1(4), 300-307.
707
+ """
708
+ u, v = np.asarray(u), np.asarray(v)
709
+ return np.sum(np.abs(u - v)) / np.sum(u + v)
710
+
711
+
712
+ def dice(u, v):
713
+ """Calculate the Dice dissimilarity between two vectors.
714
+
715
+ Synonyms:
716
+ Sorensen distance
717
+
718
+ Parameters
719
+ ----------
720
+ - u, v: Input vectors between which the distance is to be calculated.
721
+
722
+ Returns
723
+ -------
724
+ - The Dice dissimilarity between the two vectors.
725
+
726
+ References
727
+ ----------
728
+ 1. Dice LR (1945) Measures of the amount of ecologic association
729
+ between species. Ecology. 26, 297-302.
730
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
731
+ Measures between Probability Density Functions. International
732
+ Journal of Mathematical Models and Methods in Applied Sciences.
733
+ 1(4), 300-307.
734
+ """
735
+ u, v = np.asarray(u), np.asarray(v)
736
+ u_v = u - v
737
+ return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
738
+
739
+
740
+ def divergence(u, v):
741
+ """Calculate the divergence between two vectors.
742
+
743
+ Divergence equals squared Clark distance multiplied by 2.
744
+
745
+ Parameters
746
+ ----------
747
+ - u, v: Input vectors between which the distance is to be calculated.
748
+
749
+ Returns
750
+ -------
751
+ - The divergence between the two vectors.
752
+
753
+ References
754
+ ----------
755
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
756
+ Measures between Probability Density Functions. International
757
+ Journal of Mathematical Models and Methods in Applied Sciences.
758
+ 1(4), 300-307.
759
+ """
760
+ u, v = np.asarray(u), np.asarray(v)
761
+ with np.errstate(invalid="ignore"):
762
+ return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
763
+
764
+
765
+ def google(u, v):
766
+ """Calculate the Normalized Google Distance (NGD) between two vectors.
767
+
768
+ NGD is a measure of similarity derived from the number of hits returned by the
769
+ Google search engine for a given set of keywords.
770
+
771
+ Parameters
772
+ ----------
773
+ - u, v: Input vectors between which the distance is to be calculated.
774
+
775
+ Returns
776
+ -------
777
+ - The Normalized Google Distance between the two vectors.
778
+
779
+ Notes
780
+ -----
781
+ When used for comparing two probability density functions (pdfs),
782
+ Google distance equals half of Cityblock distance.
783
+
784
+ References
785
+ ----------
786
+ 1. Lee & Rashid (2008) Information Technology, ITSim 2008.
787
+ doi:10.1109/ITSIM.2008.4631601.
788
+ """
789
+ u, v = np.asarray(u), np.asarray(v)
790
+ x = float(np.sum(u))
791
+ y = float(np.sum(v))
792
+ summin = float(np.sum(np.minimum(u, v)))
793
+ return (max([x, y]) - summin) / ((x + y) - min([x, y]))
794
+
795
+
796
+ def gower(u, v):
797
+ """Calculate the Gower distance between two vectors.
798
+
799
+ The Gower distance equals the Cityblock distance divided by the vector length.
800
+
801
+ Parameters
802
+ ----------
803
+ - u, v: Input vectors between which the distance is to be calculated.
804
+
805
+ Returns
806
+ -------
807
+ - The Gower distance between the two vectors.
808
+
809
+ References
810
+ ----------
811
+ 1. Gower JC. (1971) General Coefficient of Similarity
812
+ and Some of Its Properties, Biometrics 27, 857-874.
813
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
814
+ Measures between Probability Density Functions. International
815
+ Journal of Mathematical Models and Methods in Applied Sciences.
816
+ 1(4), 300-307.
817
+ """
818
+ u, v = np.asarray(u), np.asarray(v)
819
+ return np.sum(np.abs(u - v)) / u.size
820
+
821
+
822
+ def jeffreys(u, v):
823
+ """Calculate the Jeffreys divergence between two vectors.
824
+
825
+ The Jeffreys divergence is a symmetric version of the Kullback-Leibler
826
+ divergence.
827
+
828
+ Parameters
829
+ ----------
830
+ - u, v: Input vectors between which the divergence is to be calculated.
831
+
832
+ Returns
833
+ -------
834
+ - The Jeffreys divergence between the two vectors.
835
+
836
+ References
837
+ ----------
838
+ 1. Jeffreys H (1946) An Invariant Form for the Prior Probability
839
+ in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
840
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
841
+ Measures between Probability Density Functions. International
842
+ Journal of Mathematical Models and Methods in Applied Sciences.
843
+ 1(4), 300-307.
844
+ """
845
+ u, v = np.asarray(u), np.asarray(v)
846
+ # Add epsilon to zeros in vectors to avoid division
847
+ # by 0 and/or log of 0. Alternatively, zeros in the
848
+ # vectors could be ignored or masked (see below).
849
+ # u = ma.masked_where(u == 0, u)
850
+ # v = ma.masked_where(v == 0, u)
851
+ with np.errstate(divide="ignore", invalid="ignore"):
852
+ u[u == 0] = EPSILON
853
+ v[v == 0] = EPSILON
854
+ # Clip negative values to zero for valid log
855
+ udivv = np.clip(u / v, a_min=EPSILON, a_max=None)
856
+ return np.sum((u - v) * np.log(udivv))
857
+
858
+
859
+ def jensenshannon_divergence(u, v):
860
+ """Calculate the Jensen-Shannon divergence between two vectors.
861
+
862
+ The Jensen-Shannon divergence is a symmetric and finite measure of similarity
863
+ between two probability distributions.
864
+
865
+ Parameters
866
+ ----------
867
+ - u, v: Input vectors between which the divergence is to be calculated.
868
+
869
+ Returns
870
+ -------
871
+ - The Jensen-Shannon divergence between the two vectors.
872
+
873
+ References
874
+ ----------
875
+ 1. Lin J. (1991) Divergence measures based on the Shannon entropy.
876
+ IEEE Transactions on Information Theory, 37(1):145–151.
877
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
878
+ Measures between Probability Density Functions. International
879
+ Journal of Mathematical Models and Methods in Applied Sciences.
880
+ 1(4), 300-307.
881
+ Comments:
882
+ Equals Jensen difference in Sung-Hyuk (2007):
883
+ u = np.where(u==0, EPSILON, u)
884
+ v = np.where(v==0, EPSILON, v)
885
+ el1 = (u * np.log(u) + v * np.log(v)) / 2
886
+ el2 = (u + v)/2
887
+ el3 = np.log(el2)
888
+ return np.sum(el1 - el2 * el3)
889
+ """
890
+ u, v = np.asarray(u), np.asarray(v)
891
+ with np.errstate(divide="ignore", invalid="ignore"):
892
+ # Clip negative values to zero for valid log
893
+ u[u == 0] = EPSILON
894
+ v[v == 0] = EPSILON
895
+
896
+ term1 = np.clip(2 * u / (u + v), a_min=EPSILON, a_max=None)
897
+ term2 = np.clip(2 * v / (u + v), a_min=EPSILON, a_max=None)
898
+
899
+ dl = u * np.log(term1)
900
+ dr = v * np.log(term2)
851
901
  return (np.sum(dl) + np.sum(dr)) / 2
852
902
 
853
- def jensen_difference(self, u, v):
854
- """Calculate the Jensen difference between two vectors.
855
903
 
856
- The Jensen difference is considered similar to the Jensen-Shannon divergence.
904
+ def jensen_difference(u, v):
905
+ """Calculate the Jensen difference between two vectors.
857
906
 
858
- Parameters
859
- ----------
860
- - u, v: Input vectors between which the distance is to be calculated.
907
+ The Jensen difference is considered similar to the Jensen-Shannon divergence.
861
908
 
862
- Returns
863
- -------
864
- - The Jensen difference between the two vectors.
909
+ Parameters
910
+ ----------
911
+ - u, v: Input vectors between which the distance is to be calculated.
912
+
913
+ Returns
914
+ -------
915
+ - The Jensen difference between the two vectors.
916
+
917
+ Notes
918
+ -----
919
+ 1. Equals half of Topsøe distance
920
+ 2. Equals squared jensenshannon_distance.
865
921
 
866
- Notes
867
- -----
868
- 1. Equals half of Topsøe distance
869
- 2. Equals squared jensenshannon_distance.
870
922
 
923
+ References
924
+ ----------
925
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
926
+ Measures between Probability Density Functions. International
927
+ Journal of Mathematical Models and Methods in Applied Sciences.
928
+ 1(4), 300-307.
929
+ """
930
+ u, v = np.asarray(u), np.asarray(v)
871
931
 
872
- References
873
- ----------
874
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
875
- Measures between Probability Density Functions. International
876
- Journal of Mathematical Models and Methods in Applied Sciences.
877
- 1(4), 300-307.
878
- """
879
- u, v = np.asarray(u), np.asarray(v)
880
- u = np.where(u == 0, self.epsilon, u)
881
- v = np.where(v == 0, self.epsilon, v)
932
+ with np.errstate(divide="ignore", invalid="ignore"):
933
+ # Clip negative values to eps for valid log
934
+ u = np.clip(u, EPSILON, None)
935
+ v = np.clip(v, EPSILON, None)
882
936
  el1 = (u * np.log(u) + v * np.log(v)) / 2
883
- el2 = (u + v) / 2
937
+ el2 = np.clip((u + v) / 2, a_min=EPSILON, a_max=None)
884
938
  return np.sum(el1 - el2 * np.log(el2))
885
939
 
886
- def kumarjohnson(self, u, v):
887
- """Calculate the Kumar-Johnson distance between two vectors.
888
-
889
- Parameters
890
- ----------
891
- - u, v: Input vectors between which the distance is to be calculated.
892
-
893
- Returns
894
- -------
895
- - The Kumar-Johnson distance between the two vectors.
896
-
897
- References
898
- ----------
899
- 1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
900
- and information inequalities, Journal of Inequalities in pure
901
- and applied Mathematics. 6(3).
902
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
903
- Measures between Probability Density Functions. International
904
- Journal of Mathematical Models and Methods in Applied Sciences.
905
- 1(4):300-307.
906
- """
907
- u, v = np.asarray(u), np.asarray(v)
908
- uvmult = u * v
909
- with np.errstate(divide="ignore", invalid="ignore"):
910
- numer = np.power(u**2 - v**2, 2)
911
- denom = 2 * np.power(uvmult, 3 / 2)
912
- return np.sum(np.where(uvmult != 0, numer / denom, 0))
913
-
914
- def matusita(self, u, v):
915
- """Calculate the Matusita distance between two vectors.
916
-
917
- Parameters
918
- ----------
919
- - u, v: Input vectors between which the distance is to be calculated.
920
-
921
- Returns
922
- -------
923
- - The Matusita distance between the two vectors.
924
-
925
- References
926
- ----------
927
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
928
- Measures between Probability Density Functions. International
929
- Journal of Mathematical Models and Methods in Applied Sciences.
930
- 1(4):300-307.
931
-
932
- Notes
933
- -----
934
- Equals square root of Squared-chord distance.
935
- """
936
- u, v = np.asarray(u), np.asarray(v)
940
+
941
+ def kumarjohnson(u, v):
942
+ """Calculate the Kumar-Johnson distance between two vectors.
943
+
944
+ Parameters
945
+ ----------
946
+ - u, v: Input vectors between which the distance is to be calculated.
947
+
948
+ Returns
949
+ -------
950
+ - The Kumar-Johnson distance between the two vectors.
951
+
952
+ References
953
+ ----------
954
+ 1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
955
+ and information inequalities, Journal of Inequalities in pure
956
+ and applied Mathematics. 6(3).
957
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
958
+ Measures between Probability Density Functions. International
959
+ Journal of Mathematical Models and Methods in Applied Sciences.
960
+ 1(4):300-307.
961
+ """
962
+ u, v = np.asarray(u), np.asarray(v)
963
+ uvmult = u * v
964
+ with np.errstate(divide="ignore", invalid="ignore"):
965
+ numer = np.power(u**2 - v**2, 2)
966
+ denom = 2 * np.power(uvmult, 3 / 2)
967
+ return np.sum(np.where(uvmult != 0, numer / denom, 0))
968
+
969
+
970
+ def matusita(u, v):
971
+ """Calculate the Matusita distance between two vectors.
972
+
973
+ Parameters
974
+ ----------
975
+ - u, v: Input vectors between which the distance is to be calculated.
976
+
977
+ Returns
978
+ -------
979
+ - The Matusita distance between the two vectors.
980
+
981
+ References
982
+ ----------
983
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
984
+ Measures between Probability Density Functions. International
985
+ Journal of Mathematical Models and Methods in Applied Sciences.
986
+ 1(4):300-307.
987
+
988
+ Notes
989
+ -----
990
+ Equals square root of Squared-chord distance.
991
+ """
992
+ u, v = np.asarray(u), np.asarray(v)
993
+ with np.errstate(divide="ignore", invalid="ignore"):
937
994
  return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
938
995
 
939
- def minkowski(self, u, v, p=2):
940
- """Calculate the Minkowski distance between two vectors.
941
-
942
- Parameters
943
- ----------
944
- - u, v: Input vectors between which the distance is to be calculated.
945
- - p: The order of the norm of the difference.
946
-
947
- Returns
948
- -------
949
- - The Minkowski distance between the two vectors.
950
-
951
- Notes
952
- -----
953
- When p goes to infinite, the Chebyshev distance is derived.
954
-
955
- References
956
- ----------
957
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
958
- Measures between Probability Density Functions. International
959
- Journal of Mathematical Models and Methods in Applied Sciences.
960
- 1(4):300-307.
961
- """
962
- u, v = np.asarray(u), np.asarray(v)
963
- return np.linalg.norm(u - v, ord=p)
964
-
965
- def penroseshape(self, u, v):
966
- """Calculate the Penrose shape distance between two vectors.
967
-
968
- Parameters
969
- ----------
970
- - u, v: Input vectors between which the distance is to be calculated.
971
-
972
- Returns
973
- -------
974
- - The Penrose shape distance between the two vectors.
975
-
976
- References
977
- ----------
978
- 1. Deza M, Deza E (2009) Encyclopedia of Distances.
979
- Springer-Verlag Berlin Heidelberg. 1-590.
980
- """
981
- u, v = np.asarray(u), np.asarray(v)
982
- umu = np.mean(u)
983
- vmu = np.mean(v)
996
+
997
+ def minkowski(u, v, p=2):
998
+ """Calculate the Minkowski distance between two vectors.
999
+
1000
+ Parameters
1001
+ ----------
1002
+ - u, v: Input vectors between which the distance is to be calculated.
1003
+ - p: The order of the norm of the difference.
1004
+
1005
+ Returns
1006
+ -------
1007
+ - The Minkowski distance between the two vectors.
1008
+
1009
+ Notes
1010
+ -----
1011
+ When p goes to infinite, the Chebyshev distance is derived.
1012
+
1013
+ References
1014
+ ----------
1015
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1016
+ Measures between Probability Density Functions. International
1017
+ Journal of Mathematical Models and Methods in Applied Sciences.
1018
+ 1(4):300-307.
1019
+ """
1020
+ u, v = np.asarray(u), np.asarray(v)
1021
+ return np.linalg.norm(u - v, ord=p)
1022
+
1023
+
1024
+ def penroseshape(u, v):
1025
+ """Calculate the Penrose shape distance between two vectors.
1026
+
1027
+ Parameters
1028
+ ----------
1029
+ - u, v: Input vectors between which the distance is to be calculated.
1030
+
1031
+ Returns
1032
+ -------
1033
+ - The Penrose shape distance between the two vectors.
1034
+
1035
+ References
1036
+ ----------
1037
+ 1. Deza M, Deza E (2009) Encyclopedia of Distances.
1038
+ Springer-Verlag Berlin Heidelberg. 1-590.
1039
+ """
1040
+ u, v = np.asarray(u), np.asarray(v)
1041
+ umu = np.mean(u)
1042
+ vmu = np.mean(v)
1043
+ with np.errstate(divide="ignore", invalid="ignore"):
984
1044
  return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
985
1045
 
986
- def prob_chisq(self, u, v):
987
- """Calculate the Probabilistic chi-square distance between two vectors.
988
1046
 
989
- Parameters
990
- ----------
991
- - u, v: Input vectors between which the distance is to be calculated.
1047
+ def prob_chisq(u, v):
1048
+ """Calculate the Probabilistic chi-square distance between two vectors.
992
1049
 
993
- Returns
994
- -------
995
- - The Probabilistic chi-square distance between the two vectors.
1050
+ Parameters
1051
+ ----------
1052
+ - u, v: Input vectors between which the distance is to be calculated.
996
1053
 
997
- Notes
998
- -----
999
- Added by SC.
1000
- """
1001
- u, v = np.asarray(u), np.asarray(v)
1002
- uvsum = u + v
1003
- with np.errstate(divide="ignore", invalid="ignore"):
1004
- return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1005
-
1006
- def ruzicka(self, u, v):
1007
- """Calculate the Ruzicka distance between two vectors.
1008
-
1009
- Parameters
1010
- ----------
1011
- - u, v: Input vectors between which the distance is to be calculated.
1012
-
1013
- Returns
1014
- -------
1015
- - The Ruzicka distance between the two vectors.
1016
-
1017
- Notes
1018
- -----
1019
- Added by SC.
1020
- """
1021
- u, v = np.asarray(u), np.asarray(v)
1022
- den = np.sum(np.maximum(u, v))
1023
-
1024
- return 1 - np.sum(np.minimum(u, v)) / den
1025
-
1026
- def sorensen(self, u, v):
1027
- """Calculate the Sorensen distance between two vectors.
1028
-
1029
- Parameters
1030
- ----------
1031
- - u, v: Input vectors between which the distance is to be calculated.
1032
-
1033
- Returns
1034
- -------
1035
- - The Sorensen distance between the two vectors.
1036
-
1037
- Notes
1038
- -----
1039
- The Sorensen distance equals the Manhattan distance divided by the sum of
1040
- the two vectors.
1041
-
1042
- Added by SC.
1043
- """
1044
- u, v = np.asarray(u), np.asarray(v)
1045
- return np.sum(np.abs(u - v)) / np.sum(u + v)
1046
-
1047
- def squared_chisq(self, u, v):
1048
- """Calculate the Squared chi-square distance between two vectors.
1049
-
1050
- Parameters
1051
- ----------
1052
- - u, v: Input vectors between which the distance is to be calculated.
1053
-
1054
- Returns
1055
- -------
1056
- - The Squared chi-square distance between the two vectors.
1057
-
1058
- References
1059
- ----------
1060
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1061
- Measures between Probability Density Functions. International
1062
- Journal of Mathematical Models and Methods in Applied Sciences.
1063
- 1(4), 300-307.
1064
- """
1065
- u, v = np.asarray(u), np.asarray(v)
1066
- uvsum = u + v
1067
- with np.errstate(divide="ignore", invalid="ignore"):
1068
- return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1069
-
1070
- def squaredchord(self, u, v):
1071
- """Calculate the Squared-chord distance between two vectors.
1072
-
1073
- Parameters
1074
- ----------
1075
- - u, v: Input vectors between which the distance is to be calculated.
1076
-
1077
- Returns
1078
- -------
1079
- - The Squared-chord distance between the two vectors.
1080
-
1081
- References
1082
- ----------
1083
- 1. Gavin DG et al. (2003) A statistical approach to evaluating
1084
- distance metrics and analog assignments for pollen records.
1085
- Quaternary Research 60:356–367.
1086
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1087
- Measures between Probability Density Functions. International
1088
- Journal of Mathematical Models and Methods in Applied Sciences.
1089
- 1(4), 300-307.
1090
-
1091
- Notes
1092
- -----
1093
- Equals to squared Matusita distance.
1094
- """
1095
- u, v = np.asarray(u), np.asarray(v)
1054
+ Returns
1055
+ -------
1056
+ - The Probabilistic chi-square distance between the two vectors.
1057
+
1058
+ Notes
1059
+ -----
1060
+ Added by SC.
1061
+ """
1062
+ u, v = np.asarray(u), np.asarray(v)
1063
+ uvsum = u + v
1064
+ with np.errstate(divide="ignore", invalid="ignore"):
1065
+ return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1066
+
1067
+
1068
+ def ruzicka(u, v):
1069
+ """Calculate the Ruzicka distance between two vectors.
1070
+
1071
+ Parameters
1072
+ ----------
1073
+ - u, v: Input vectors between which the distance is to be calculated.
1074
+
1075
+ Returns
1076
+ -------
1077
+ - The Ruzicka distance between the two vectors.
1078
+
1079
+ Notes
1080
+ -----
1081
+ Added by SC.
1082
+ """
1083
+ u, v = np.asarray(u), np.asarray(v)
1084
+ den = np.sum(np.maximum(u, v))
1085
+
1086
+ return 1 - np.sum(np.minimum(u, v)) / den
1087
+
1088
+
1089
+ def sorensen(u, v):
1090
+ """Calculate the Sorensen distance between two vectors.
1091
+
1092
+ Parameters
1093
+ ----------
1094
+ - u, v: Input vectors between which the distance is to be calculated.
1095
+
1096
+ Returns
1097
+ -------
1098
+ - The Sorensen distance between the two vectors.
1099
+
1100
+ Notes
1101
+ -----
1102
+ The Sorensen distance equals the Manhattan distance divided by the sum of
1103
+ the two vectors.
1104
+
1105
+ Added by SC.
1106
+ """
1107
+ u, v = np.asarray(u), np.asarray(v)
1108
+ return np.sum(np.abs(u - v)) / np.sum(u + v)
1109
+
1110
+
1111
+ def squared_chisq(u, v):
1112
+ """Calculate the Squared chi-square distance between two vectors.
1113
+
1114
+ Parameters
1115
+ ----------
1116
+ - u, v: Input vectors between which the distance is to be calculated.
1117
+
1118
+ Returns
1119
+ -------
1120
+ - The Squared chi-square distance between the two vectors.
1121
+
1122
+ References
1123
+ ----------
1124
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1125
+ Measures between Probability Density Functions. International
1126
+ Journal of Mathematical Models and Methods in Applied Sciences.
1127
+ 1(4), 300-307.
1128
+ """
1129
+ u, v = np.asarray(u), np.asarray(v)
1130
+ uvsum = u + v
1131
+ with np.errstate(divide="ignore", invalid="ignore"):
1132
+ return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1133
+
1134
+
1135
+ def squaredchord(u, v):
1136
+ """Calculate the Squared-chord distance between two vectors.
1137
+
1138
+ Parameters
1139
+ ----------
1140
+ - u, v: Input vectors between which the distance is to be calculated.
1141
+
1142
+ Returns
1143
+ -------
1144
+ - The Squared-chord distance between the two vectors.
1145
+
1146
+ References
1147
+ ----------
1148
+ 1. Gavin DG et al. (2003) A statistical approach to evaluating
1149
+ distance metrics and analog assignments for pollen records.
1150
+ Quaternary Research 60:356–367.
1151
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1152
+ Measures between Probability Density Functions. International
1153
+ Journal of Mathematical Models and Methods in Applied Sciences.
1154
+ 1(4), 300-307.
1155
+
1156
+ Notes
1157
+ -----
1158
+ Equals to squared Matusita distance.
1159
+ """
1160
+ u, v = np.asarray(u), np.asarray(v)
1161
+ with np.errstate(divide="ignore", invalid="ignore"):
1096
1162
  return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
1097
1163
 
1098
- def squared_euclidean(self, u, v):
1099
- """Calculate the Squared Euclidean distance between two vectors.
1100
-
1101
- Parameters
1102
- ----------
1103
- - u, v: Input vectors between which the distance is to be calculated.
1104
-
1105
- Returns
1106
- -------
1107
- - The Squared Euclidean distance between the two vectors.
1108
-
1109
- References
1110
- ----------
1111
- 1. Gavin DG et al. (2003) A statistical approach to evaluating
1112
- distance metrics and analog assignments for pollen records.
1113
- Quaternary Research 60:356–367.
1114
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1115
- Measures between Probability Density Functions. International
1116
- Journal of Mathematical Models and Methods in Applied Sciences.
1117
- 1(4), 300-307.
1118
-
1119
- Notes
1120
- -----
1121
- Equals to squared Euclidean distance.
1122
- """
1123
- u, v = np.asarray(u), np.asarray(v)
1124
- return np.dot((u - v), (u - v))
1125
-
1126
- def taneja(self, u, v):
1127
- """Calculate the Taneja distance between two vectors.
1128
-
1129
- Parameters
1130
- ----------
1131
- - u, v: Input vectors between which the distance is to be calculated.
1132
-
1133
- Returns
1134
- -------
1135
- - The Taneja distance between the two vectors.
1136
-
1137
- References
1138
- ----------
1139
- 1. Taneja IJ. (1995), New Developments in Generalized Information
1140
- Measures, Chapter in: Advances in Imaging and Electron Physics,
1141
- Ed. P.W. Hawkes, 91, 37-135.
1142
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1143
- Measures between Probability Density Functions. International
1144
- Journal of Mathematical Models and Methods in Applied Sciences.
1145
- 1(4), 300-307.
1146
- """
1147
- u, v = np.asarray(u), np.asarray(v)
1148
- u = np.where(u == 0, self.epsilon, u)
1149
- v = np.where(v == 0, self.epsilon, v)
1164
+
1165
+ def squared_euclidean(u, v):
1166
+ """Calculate the Squared Euclidean distance between two vectors.
1167
+
1168
+ Parameters
1169
+ ----------
1170
+ - u, v: Input vectors between which the distance is to be calculated.
1171
+
1172
+ Returns
1173
+ -------
1174
+ - The Squared Euclidean distance between the two vectors.
1175
+
1176
+ References
1177
+ ----------
1178
+ 1. Gavin DG et al. (2003) A statistical approach to evaluating
1179
+ distance metrics and analog assignments for pollen records.
1180
+ Quaternary Research 60:356–367.
1181
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1182
+ Measures between Probability Density Functions. International
1183
+ Journal of Mathematical Models and Methods in Applied Sciences.
1184
+ 1(4), 300-307.
1185
+
1186
+ Notes
1187
+ -----
1188
+ Equals to squared Euclidean distance.
1189
+ """
1190
+ u, v = np.asarray(u), np.asarray(v)
1191
+ return np.dot((u - v), (u - v))
1192
+
1193
+
1194
+ def taneja(u, v):
1195
+ """Calculate the Taneja distance between two vectors.
1196
+
1197
+ Parameters
1198
+ ----------
1199
+ - u, v: Input vectors between which the distance is to be calculated.
1200
+
1201
+ Returns
1202
+ -------
1203
+ - The Taneja distance between the two vectors.
1204
+
1205
+ References
1206
+ ----------
1207
+ 1. Taneja IJ. (1995), New Developments in Generalized Information
1208
+ Measures, Chapter in: Advances in Imaging and Electron Physics,
1209
+ Ed. P.W. Hawkes, 91, 37-135.
1210
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1211
+ Measures between Probability Density Functions. International
1212
+ Journal of Mathematical Models and Methods in Applied Sciences.
1213
+ 1(4), 300-307.
1214
+ """
1215
+ u, v = np.asarray(u), np.asarray(v)
1216
+ with np.errstate(divide="ignore", invalid="ignore"):
1217
+ u[u == 0] = EPSILON
1218
+ v[v == 0] = EPSILON
1150
1219
  uvsum = u + v
1151
- return np.sum((uvsum / 2) * np.log(uvsum / (2 * np.sqrt(u * v))))
1152
-
1153
- def tanimoto(self, u, v):
1154
- """Calculate the Tanimoto distance between two vectors.
1155
-
1156
- Parameters
1157
- ----------
1158
- - u, v: Input vectors between which the distance is to be calculated.
1159
-
1160
- Returns
1161
- -------
1162
- - The Tanimoto distance between the two vectors.
1163
-
1164
- References
1165
- ----------
1166
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1167
- Measures between Probability Density Functions. International
1168
- Journal of Mathematical Models and Methods in Applied Sciences.
1169
- 1(4), 300-307.
1170
-
1171
- Notes
1172
- -----
1173
- Equals Soergel distance.
1174
- """
1175
- u, v = np.asarray(u), np.asarray(v)
1176
- # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
1177
- usum = np.sum(u)
1178
- vsum = np.sum(v)
1179
- minsum = np.sum(np.minimum(u, v))
1180
- return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
1181
-
1182
- def topsoe(self, u, v):
1183
- """Calculate the Topsøe distance between two vectors.
1184
-
1185
- Parameters
1186
- ----------
1187
- - u, v: Input vectors between which the distance is to be calculated.
1188
-
1189
- Returns
1190
- -------
1191
- - The Topsøe distance between the two vectors.
1192
-
1193
- References
1194
- ----------
1195
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1196
- Measures between Probability Density Functions. International
1197
- Journal of Mathematical Models and Methods in Applied Sciences.
1198
- 1(4), 300-307.
1199
-
1200
- Notes
1201
- -----
1202
- Equals two times Jensen-Shannon divergence.
1203
- """
1204
- u, v = np.asarray(u), np.asarray(v)
1205
- u = np.where(u == 0, self.epsilon, u)
1206
- v = np.where(v == 0, self.epsilon, v)
1207
- dl = u * np.log(2 * u / (u + v))
1208
- dr = v * np.log(2 * v / (u + v))
1220
+ logarg = np.clip(uvsum / (2 * np.sqrt(u * v)), a_min=EPSILON, a_max=None)
1221
+ return np.sum((uvsum / 2) * np.log(logarg))
1222
+
1223
+
1224
+ def tanimoto(u, v):
1225
+ """Calculate the Tanimoto distance between two vectors.
1226
+
1227
+ Parameters
1228
+ ----------
1229
+ - u, v: Input vectors between which the distance is to be calculated.
1230
+
1231
+ Returns
1232
+ -------
1233
+ - The Tanimoto distance between the two vectors.
1234
+
1235
+ References
1236
+ ----------
1237
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1238
+ Measures between Probability Density Functions. International
1239
+ Journal of Mathematical Models and Methods in Applied Sciences.
1240
+ 1(4), 300-307.
1241
+
1242
+ Notes
1243
+ -----
1244
+ Equals Soergel distance.
1245
+ """
1246
+ u, v = np.asarray(u), np.asarray(v)
1247
+ # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
1248
+ usum = np.sum(u)
1249
+ vsum = np.sum(v)
1250
+ minsum = np.sum(np.minimum(u, v))
1251
+ return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
1252
+
1253
+
1254
+ def topsoe(u, v):
1255
+ """Calculate the Topsøe distance between two vectors.
1256
+
1257
+ Parameters
1258
+ ----------
1259
+ - u, v: Input vectors between which the distance is to be calculated.
1260
+
1261
+ Returns
1262
+ -------
1263
+ - The Topsøe distance between the two vectors.
1264
+
1265
+ References
1266
+ ----------
1267
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1268
+ Measures between Probability Density Functions. International
1269
+ Journal of Mathematical Models and Methods in Applied Sciences.
1270
+ 1(4), 300-307.
1271
+
1272
+ Notes
1273
+ -----
1274
+ Equals two times Jensen-Shannon divergence.
1275
+ """
1276
+ u, v = np.asarray(u), np.asarray(v)
1277
+ with np.errstate(divide="ignore", invalid="ignore"):
1278
+ u[u == 0] = EPSILON
1279
+ v[v == 0] = EPSILON
1280
+ logarg1 = np.clip(2 * u / (u + v), a_min=EPSILON, a_max=None)
1281
+ logarg2 = np.clip(2 * v / (u + v), a_min=EPSILON, a_max=None)
1282
+ dl = u * np.log(logarg1)
1283
+ dr = v * np.log(logarg2)
1209
1284
  return np.sum(dl + dr)
1210
1285
 
1211
- def vicis_symmetric_chisq(self, u, v):
1212
- """Calculate the Vicis Symmetric chi-square distance.
1213
-
1214
- Parameters
1215
- ----------
1216
- - u, v: Input vectors between which the distance is to be calculated.
1217
-
1218
- Returns
1219
- -------
1220
- - The Vicis Symmetric chi-square distance between the two vectors.
1221
-
1222
- References
1223
- ----------
1224
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1225
- Measures between Probability Density Functions. International
1226
- Journal of Mathematical Models and Methods in Applied Sciences.
1227
- 1(4), 300-307
1228
- """
1229
- u, v = np.asarray(u), np.asarray(v)
1230
- with np.errstate(divide="ignore", invalid="ignore"):
1231
- u_v = (u - v) ** 2
1232
- uvmin = np.minimum(u, v) ** 2
1233
- return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1234
-
1235
- def vicis_wave_hedges(self, u, v):
1236
- """Calculate the Vicis-Wave Hedges distance between two vectors.
1237
-
1238
- Parameters
1239
- ----------
1240
- - u, v: Input vectors between which the distance is to be calculated.
1241
-
1242
- Returns
1243
- -------
1244
- - The Vicis-Wave Hedges distance between the two vectors.
1245
-
1246
- References
1247
- ----------
1248
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1249
- Measures between Probability Density Functions. International
1250
- Journal of Mathematical Models and Methods in Applied Sciences.
1251
- 1(4), 300-307.
1252
- """
1253
- u, v = np.asarray(u), np.asarray(v)
1254
- with np.errstate(divide="ignore", invalid="ignore"):
1255
- u_v = abs(u - v)
1256
- uvmin = np.minimum(u, v)
1257
- return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1258
-
1259
- # def fidelity(self, u, v):
1260
- # """
1261
- # Calculate the fidelity distance between two vectors.
1262
-
1263
- # The fidelity distance measures the similarity between two probability
1264
- # distributions.
1265
-
1266
- # Parameters
1267
- # ----------
1268
- # - u, v: Input vectors between which the distance is to be calculated.
1269
-
1270
- # Returns
1271
- # -------
1272
- # - The fidelity distance between the two vectors.
1273
-
1274
- # Notes
1275
- # -----
1276
- # Added by SC.
1277
- # """
1278
- # u, v = np.asarray(u), np.asarray(v)
1279
- # return 1 - (np.sum(np.sqrt(u * v)))
1280
-
1281
- # # NEEDS CHECKING
1282
- # # def harmonicmean(self, u, v):
1283
- # # """
1284
- # # Harmonic mean distance.
1285
- # # Notes:
1286
- # # Added by SC.
1287
- # # """
1288
- # # u, v = np.asarray(u), np.asarray(v)
1289
- # # return 1 - 2.0 * np.sum(u * v / (u + v))
1290
-
1291
- # # def inner(self, u, v):
1292
- # # """
1293
- # # Calculate the inner product distance between two vectors.
1294
-
1295
- # # The inner product distance is a measure of
1296
- # # similarity between two vectors,
1297
- # # based on their inner product.
1298
-
1299
- # # Parameters
1300
- # # ----------
1301
- # # - u, v: Input vectors between which the distance is to be calculated.
1302
-
1303
- # # Returns
1304
- # # -------
1305
- # # - The inner product distance between the two vectors.
1306
-
1307
- # # Notes
1308
- # # -----
1309
- # # Added by SC.
1310
- # # """
1311
- # # u, v = np.asarray(u), np.asarray(v)
1312
- # # return 1 - np.dot(u, v)
1313
-
1314
- # def k_divergence(self, u, v):
1315
- # """Calculate the K divergence between two vectors.
1316
-
1317
- # Parameters
1318
- # ----------
1319
- # - u, v: Input vectors between which the divergence is to be calculated.
1320
-
1321
- # Returns
1322
- # -------
1323
- # - The K divergence between the two vectors.
1324
-
1325
- # References
1326
- # ----------
1327
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1328
- # Measures between Probability Density Functions. International
1329
- # Journal of Mathematical Models and Methods in Applied Sciences.
1330
- # 1(4), 300-307.
1331
- # """
1332
- # u, v = np.asarray(u), np.asarray(v)
1333
- # u = np.where(u == 0, self.epsilon, u)
1334
- # v = np.where(v == 0, self.epsilon, v)
1335
- # return np.sum(u * np.log(2 * u / (u + v)))
1336
-
1337
- # def kl_divergence(self, u, v):
1338
- # """Calculate the Kullback-Leibler divergence between two vectors.
1339
-
1340
- # The Kullback-Leibler divergence measures the difference between two
1341
- # probability distributions.
1342
-
1343
- # Parameters
1344
- # ----------
1345
- # - u, v: Input vectors between which the divergence is to be calculated.
1346
-
1347
- # Returns
1348
- # -------
1349
- # - The Kullback-Leibler divergence between the two vectors.
1350
-
1351
- # References
1352
- # ----------
1353
- # 1. Kullback S, Leibler RA (1951) On information and sufficiency.
1354
- # Ann. Math. Statist. 22:79–86
1355
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1356
- # Measures between Probability Density Functions. International
1357
- # Journal of Mathematical Models and Methods in Applied Sciences.
1358
- # 1(4):300-307.
1359
- # """
1360
- # u, v = np.asarray(u), np.asarray(v)
1361
- # u = np.where(u == 0, self.epsilon, u)
1362
- # v = np.where(v == 0, self.epsilon, v)
1363
- # return np.sum(u * np.log(u / v))
1364
-
1365
- # def max_symmetric_chisq(self, u, v):
1366
- # """Calculate the maximum symmetric chi-square distance.
1367
-
1368
- # Parameters
1369
- # ----------
1370
- # - u, v: Input vectors between which the distance is to be calculated.
1371
-
1372
- # Returns
1373
- # -------
1374
- # - The maximum symmetric chi-square distance between the two vectors.
1375
-
1376
- # References
1377
- # ----------
1378
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1379
- # Measures between Probability Density Functions. International
1380
- # Journal of Mathematical Models and Methods in Applied Sciences.
1381
- # 1(4):300-307.
1382
- # """
1383
- # u, v = np.asarray(u), np.asarray(v)
1384
- # return max(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1385
-
1386
- # def min_symmetric_chisq(self, u, v):
1387
- # """Calculate the minimum symmetric chi-square distance.
1388
-
1389
- # Parameters
1390
- # ----------
1391
- # - u, v: Input vectors between which the distance is to be calculated.
1392
-
1393
- # Returns
1394
- # -------
1395
- # - The minimum symmetric chi-square distance between the two vectors.
1396
-
1397
- # Notes
1398
- # -----
1399
- # Added by SC.
1400
- # """
1401
- # u, v = np.asarray(u), np.asarray(v)
1402
- # return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1403
-
1404
- # def neyman_chisq(self, u, v):
1405
- # """Calculate the Neyman chi-square distance between two vectors.
1406
-
1407
- # Parameters
1408
- # ----------
1409
- # - u, v: Input vectors between which the distance is to be calculated.
1410
-
1411
- # Returns
1412
- # -------
1413
- # - The Neyman chi-square distance between the two vectors.
1414
-
1415
- # References
1416
- # ----------
1417
- # 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
1418
- # In Proceedings of the First Berkley Symposium on Mathematical
1419
- # Statistics and Probability.
1420
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1421
- # Measures between Probability Density Functions. International
1422
- # Journal of Mathematical Models and Methods in Applied Sciences.
1423
- # 1(4), 300-307.
1424
- # """
1425
- # u, v = np.asarray(u), np.asarray(v)
1426
- # with np.errstate(divide="ignore", invalid="ignore"):
1427
- # return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
1428
-
1429
- # def pearson_chisq(self, u, v):
1430
- # """Calculate the Pearson chi-square divergence between two vectors.
1431
-
1432
- # Parameters
1433
- # ----------
1434
- # - u, v: Input vectors between which the divergence is to be calculated.
1435
-
1436
- # Returns
1437
- # -------
1438
- # - The Pearson chi-square divergence between the two vectors.
1439
-
1440
- # References
1441
- # ----------
1442
- # 1. Pearson K. (1900) On the Criterion that a given system of
1443
- # deviations from the probable in the case of correlated system
1444
- # of variables is such that it can be reasonable supposed to have
1445
- # arisen from random sampling, Phil. Mag. 50, 157-172.
1446
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1447
- # Measures between Probability Density Functions. International
1448
- # Journal of Mathematical Models and Methods in Applied Sciences.
1449
- # 1(4), 300-307.
1450
-
1451
- # Notes
1452
- # -----
1453
- # Pearson chi-square divergence is asymmetric.
1454
- # """
1455
- # u, v = np.asarray(u), np.asarray(v)
1456
- # with np.errstate(divide="ignore", invalid="ignore"):
1457
- # return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
1458
-
1459
- # def nonintersection(self, u, v):
1460
- # """
1461
- # Calculate the Nonintersection distance between two vectors.
1462
-
1463
- # Parameters
1464
- # ----------
1465
- # - u, v: Input vectors between which the distance is to be calculated.
1466
-
1467
- # Returns
1468
- # -------
1469
- # - The Nonintersection distance between the two vectors.
1470
-
1471
- # References
1472
- # ----------
1473
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1474
- # Measures between Probability Density Functions. International
1475
- # Journal of Mathematical Models and Methods in Applied Sciences.
1476
- # 1(4), 300-307.
1477
-
1478
- # Notes
1479
- # -----
1480
- # When used for comparing two probability density functions (pdfs),
1481
- # Nonintersection distance equals half of Cityblock distance.
1482
- # """
1483
- # u, v = np.asarray(u), np.asarray(v)
1484
- # return 1 - np.sum(np.minimum(u, v))
1286
+
1287
+ def vicis_symmetric_chisq(u, v):
1288
+ """Calculate the Vicis Symmetric chi-square distance.
1289
+
1290
+ Parameters
1291
+ ----------
1292
+ - u, v: Input vectors between which the distance is to be calculated.
1293
+
1294
+ Returns
1295
+ -------
1296
+ - The Vicis Symmetric chi-square distance between the two vectors.
1297
+
1298
+ References
1299
+ ----------
1300
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1301
+ Measures between Probability Density Functions. International
1302
+ Journal of Mathematical Models and Methods in Applied Sciences.
1303
+ 1(4), 300-307
1304
+ """
1305
+ u, v = np.asarray(u), np.asarray(v)
1306
+ with np.errstate(divide="ignore", invalid="ignore"):
1307
+ u_v = (u - v) ** 2
1308
+ uvmin = np.minimum(u, v) ** 2
1309
+ return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1310
+
1311
+
1312
+ def vicis_wave_hedges(u, v):
1313
+ """Calculate the Vicis-Wave Hedges distance between two vectors.
1314
+
1315
+ Parameters
1316
+ ----------
1317
+ - u, v: Input vectors between which the distance is to be calculated.
1318
+
1319
+ Returns
1320
+ -------
1321
+ - The Vicis-Wave Hedges distance between the two vectors.
1322
+
1323
+ References
1324
+ ----------
1325
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1326
+ Measures between Probability Density Functions. International
1327
+ Journal of Mathematical Models and Methods in Applied Sciences.
1328
+ 1(4), 300-307.
1329
+ """
1330
+ u, v = np.asarray(u), np.asarray(v)
1331
+ with np.errstate(divide="ignore", invalid="ignore"):
1332
+ u_v = abs(u - v)
1333
+ uvmin = np.minimum(u, v)
1334
+ return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1335
+
1336
+
1337
+ # def fidelity(u, v):
1338
+ # """
1339
+ # Calculate the fidelity distance between two vectors.
1340
+
1341
+ # The fidelity distance measures the similarity between two probability
1342
+ # distributions.
1343
+
1344
+ # Parameters
1345
+ # ----------
1346
+ # - u, v: Input vectors between which the distance is to be calculated.
1347
+
1348
+ # Returns
1349
+ # -------
1350
+ # - The fidelity distance between the two vectors.
1351
+
1352
+ # Notes
1353
+ # -----
1354
+ # Added by SC.
1355
+ # """
1356
+ # u, v = np.asarray(u), np.asarray(v)
1357
+ # return 1 - (np.sum(np.sqrt(u * v)))
1358
+
1359
+ # # NEEDS CHECKING
1360
+ # # def harmonicmean(u, v):
1361
+ # # """
1362
+ # # Harmonic mean distance.
1363
+ # # Notes:
1364
+ # # Added by SC.
1365
+ # # """
1366
+ # # u, v = np.asarray(u), np.asarray(v)
1367
+ # # return 1 - 2.0 * np.sum(u * v / (u + v))
1368
+
1369
+ # # def inner(u, v):
1370
+ # # """
1371
+ # # Calculate the inner product distance between two vectors.
1372
+
1373
+ # # The inner product distance is a measure of
1374
+ # # similarity between two vectors,
1375
+ # # based on their inner product.
1376
+
1377
+ # # Parameters
1378
+ # # ----------
1379
+ # # - u, v: Input vectors between which the distance is to be calculated.
1380
+
1381
+ # # Returns
1382
+ # # -------
1383
+ # # - The inner product distance between the two vectors.
1384
+
1385
+ # # Notes
1386
+ # # -----
1387
+ # # Added by SC.
1388
+ # # """
1389
+ # # u, v = np.asarray(u), np.asarray(v)
1390
+ # # return 1 - np.dot(u, v)
1391
+
1392
+ # def k_divergence(u, v):
1393
+ # """Calculate the K divergence between two vectors.
1394
+
1395
+ # Parameters
1396
+ # ----------
1397
+ # - u, v: Input vectors between which the divergence is to be calculated.
1398
+
1399
+ # Returns
1400
+ # -------
1401
+ # - The K divergence between the two vectors.
1402
+
1403
+ # References
1404
+ # ----------
1405
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1406
+ # Measures between Probability Density Functions. International
1407
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1408
+ # 1(4), 300-307.
1409
+ # """
1410
+ # u, v = np.asarray(u), np.asarray(v)
1411
+ # u[u == 0] = EPSILON
1412
+ # v[v == 0] = EPSILON
1413
+ # with np.errstate(divide="ignore", invalid="ignore"):
1414
+ # return np.sum(u * np.log(2 * u / (u + v)))
1415
+
1416
+ # def kl_divergence(u, v):
1417
+ # """Calculate the Kullback-Leibler divergence between two vectors.
1418
+
1419
+ # The Kullback-Leibler divergence measures the difference between two
1420
+ # probability distributions.
1421
+
1422
+ # Parameters
1423
+ # ----------
1424
+ # - u, v: Input vectors between which the divergence is to be calculated.
1425
+
1426
+ # Returns
1427
+ # -------
1428
+ # - The Kullback-Leibler divergence between the two vectors.
1429
+
1430
+ # References
1431
+ # ----------
1432
+ # 1. Kullback S, Leibler RA (1951) On information and sufficiency.
1433
+ # Ann. Math. Statist. 22:79–86
1434
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1435
+ # Measures between Probability Density Functions. International
1436
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1437
+ # 1(4):300-307.
1438
+ # """
1439
+ # u, v = np.asarray(u), np.asarray(v)
1440
+ # u[u == 0] = EPSILON
1441
+ # v[v == 0] = EPSILON
1442
+ # with np.errstate(divide="ignore", invalid="ignore"):
1443
+ # return np.sum(u * np.log(u / v))
1444
+
1445
+ # def max_symmetric_chisq(u, v):
1446
+ # """Calculate the maximum symmetric chi-square distance.
1447
+
1448
+ # Parameters
1449
+ # ----------
1450
+ # - u, v: Input vectors between which the distance is to be calculated.
1451
+
1452
+ # Returns
1453
+ # -------
1454
+ # - The maximum symmetric chi-square distance between the two vectors.
1455
+
1456
+ # References
1457
+ # ----------
1458
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1459
+ # Measures between Probability Density Functions. International
1460
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1461
+ # 1(4):300-307.
1462
+ # """
1463
+ # u, v = np.asarray(u), np.asarray(v)
1464
+ # return max(neyman_chisq(u, v), pearson_chisq(u, v))
1465
+
1466
+ # def min_symmetric_chisq(u, v):
1467
+ # """Calculate the minimum symmetric chi-square distance.
1468
+
1469
+ # Parameters
1470
+ # ----------
1471
+ # - u, v: Input vectors between which the distance is to be calculated.
1472
+
1473
+ # Returns
1474
+ # -------
1475
+ # - The minimum symmetric chi-square distance between the two vectors.
1476
+
1477
+ # Notes
1478
+ # -----
1479
+ # Added by SC.
1480
+ # """
1481
+ # u, v = np.asarray(u), np.asarray(v)
1482
+ # return min(neyman_chisq(u, v), pearson_chisq(u, v))
1483
+
1484
+ # def neyman_chisq(u, v):
1485
+ # """Calculate the Neyman chi-square distance between two vectors.
1486
+
1487
+ # Parameters
1488
+ # ----------
1489
+ # - u, v: Input vectors between which the distance is to be calculated.
1490
+
1491
+ # Returns
1492
+ # -------
1493
+ # - The Neyman chi-square distance between the two vectors.
1494
+
1495
+ # References
1496
+ # ----------
1497
+ # 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
1498
+ # In Proceedings of the First Berkley Symposium on Mathematical
1499
+ # Statistics and Probability.
1500
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1501
+ # Measures between Probability Density Functions. International
1502
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1503
+ # 1(4), 300-307.
1504
+ # """
1505
+ # u, v = np.asarray(u), np.asarray(v)
1506
+ # with np.errstate(divide="ignore", invalid="ignore"):
1507
+ # return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
1508
+
1509
+ # def pearson_chisq(u, v):
1510
+ # """Calculate the Pearson chi-square divergence between two vectors.
1511
+
1512
+ # Parameters
1513
+ # ----------
1514
+ # - u, v: Input vectors between which the divergence is to be calculated.
1515
+
1516
+ # Returns
1517
+ # -------
1518
+ # - The Pearson chi-square divergence between the two vectors.
1519
+
1520
+ # References
1521
+ # ----------
1522
+ # 1. Pearson K. (1900) On the Criterion that a given system of
1523
+ # deviations from the probable in the case of correlated system
1524
+ # of variables is such that it can be reasonable supposed to have
1525
+ # arisen from random sampling, Phil. Mag. 50, 157-172.
1526
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1527
+ # Measures between Probability Density Functions. International
1528
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1529
+ # 1(4), 300-307.
1530
+
1531
+ # Notes
1532
+ # -----
1533
+ # Pearson chi-square divergence is asymmetric.
1534
+ # """
1535
+ # u, v = np.asarray(u), np.asarray(v)
1536
+ # with np.errstate(divide="ignore", invalid="ignore"):
1537
+ # return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
1538
+
1539
+ # def nonintersection(u, v):
1540
+ # """
1541
+ # Calculate the Nonintersection distance between two vectors.
1542
+
1543
+ # Parameters
1544
+ # ----------
1545
+ # - u, v: Input vectors between which the distance is to be calculated.
1546
+
1547
+ # Returns
1548
+ # -------
1549
+ # - The Nonintersection distance between the two vectors.
1550
+
1551
+ # References
1552
+ # ----------
1553
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1554
+ # Measures between Probability Density Functions. International
1555
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1556
+ # 1(4), 300-307.
1557
+
1558
+ # Notes
1559
+ # -----
1560
+ # When used for comparing two probability density functions (pdfs),
1561
+ # Nonintersection distance equals half of Cityblock distance.
1562
+ # """
1563
+ # u, v = np.asarray(u), np.asarray(v)
1564
+ # return 1 - np.sum(np.minimum(u, v))