distclassipy 0.2.1__py3-none-any.whl → 0.2.2a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
distclassipy/distances.py CHANGED
@@ -48,6 +48,8 @@ import numpy as np
48
48
 
49
49
  import scipy
50
50
 
51
+ # Default epsilon value to avoid division by zero
52
+ EPSILON = np.finfo(float).eps
51
53
  _ALL_METRICS = [
52
54
  "euclidean",
53
55
  "braycurtis",
@@ -95,1466 +97,1468 @@ _ALL_METRICS = [
95
97
  ]
96
98
 
97
99
 
98
- class Distance:
99
- """A class to calculate various distance metrics between vectors.
100
+ def euclidean(u, v, w=None):
101
+ """Calculate the Euclidean distance between two vectors.
100
102
 
101
- This class provides methods to compute different types of distances between
102
- two vectors, such as Euclidean, Manhattan, Canberra, and other statistical
103
- distances. Each method takes two vectors as input and returns the calculated
104
- distance. The class can handle both numpy arrays and lists, converting them
105
- internally to numpy arrays for computation.
103
+ The Euclidean distance is the "ordinary" straight-line distance between two
104
+ points in Euclidean space.
106
105
 
107
- Attributes
106
+ Parameters
108
107
  ----------
109
- epsilon : float, optional
110
- A small value to avoid division by zero errors in certain distance
111
- calculations. Default is the machine precision for float data type.
108
+ - u, v: Input vectors between which the distance is to be calculated.
112
109
 
113
- Methods
110
+ Returns
114
111
  -------
115
- acc(u, v)
116
- Returns the average of Cityblock/Manhattan and Chebyshev distances.
117
- add_chisq(u, v)
118
- Returns the Additive Symmetric Chi-square distance.
119
- (Other methods are not listed here for brevity)
120
-
121
- Examples
122
- --------
123
- >>> dist = Distance()
124
- >>> u = [1, 2, 3]
125
- >>> v = [4, 5, 6]
126
- >>> print(dist.acc(u, v))
127
- 5.0
112
+ - The Euclidean distance between the two vectors.
113
+
114
+ References
115
+ ----------
116
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
117
+ Measures between Probability Density Functions. International
118
+ Journal of Mathematical Models and Methods in Applied Sciences.
119
+ 1(4), 300-307.
120
+ """
121
+ u, v = np.asarray(u), np.asarray(v)
122
+ return scipy.spatial.distance.euclidean(u, v, w)
123
+
124
+
125
+ def braycurtis(u, v, w=None):
126
+ """Calculate the Bray-Curtis distance between two vectors.
127
+
128
+ The Bray-Curtis distance is a measure of dissimilarity between two non-negative
129
+ vectors, often used in ecology to measure the compositional dissimilarity
130
+ between two sites based on counts of species at both sites. It is closely
131
+ related to the Sørensen distance and is also known as Bray-Curtis
132
+ dissimilarity.
133
+
134
+ Notes
135
+ -----
136
+ When used for comparing two probability density functions (pdfs),
137
+ the Bray-Curtis distance equals the Cityblock distance divided by 2.
138
+
139
+ Parameters
140
+ ----------
141
+ - u, v: Input vectors between which the distance is to be calculated.
142
+
143
+ Returns
144
+ -------
145
+ - The Bray-Curtis distance between the two vectors.
146
+
147
+ References
148
+ ----------
149
+ 1. Bray JR, Curtis JT (1957) An ordination of the upland forest of
150
+ southern Wisconsin. Ecological Monographs, 27, 325-349.
151
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
152
+ Measures between Probability Density Functions. International
153
+ Journal of Mathematical Models and Methods in Applied Sciences.
154
+ 1(4), 300-307.
155
+ 3. https://en.wikipedia.org/wiki/Bray–Curtis_dissimilarity
156
+ """
157
+ u, v = np.asarray(u), np.asarray(v)
158
+ return scipy.spatial.distance.braycurtis(u, v, w)
159
+
160
+
161
+ def canberra(u, v, w=None):
162
+ """Calculate the Canberra distance between two vectors.
163
+
164
+ The Canberra distance is a weighted version of the Manhattan distance, used
165
+ in numerical analysis.
166
+
167
+ Notes
168
+ -----
169
+ When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
170
+ is used in the calculation.
171
+
172
+ Parameters
173
+ ----------
174
+ - u, v: Input vectors between which the distance is to be calculated.
175
+
176
+ Returns
177
+ -------
178
+ - The Canberra distance between the two vectors.
179
+
180
+ References
181
+ ----------
182
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
183
+ Measures between Probability Density Functions. International
184
+ Journal of Mathematical Models and Methods in Applied Sciences.
185
+ 1(4), 300-307.
186
+ """
187
+ u, v = np.asarray(u), np.asarray(v)
188
+ return scipy.spatial.distance.canberra(u, v, w)
189
+
190
+
191
+ def cityblock(u, v, w=None):
192
+ """Calculate the Cityblock (Manhattan) distance between two vectors.
193
+
194
+ Parameters
195
+ ----------
196
+ - u, v: Input vectors between which the distance is to be calculated.
197
+
198
+ Returns
199
+ -------
200
+ - The Cityblock distance between the two vectors.
201
+
202
+ References
203
+ ----------
204
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
205
+ Measures between Probability Density Functions. International
206
+ Journal of Mathematical Models and Methods in Applied Sciences.
207
+ 1(4):300-307.
208
+
209
+ Synonyms:
210
+ City block distance
211
+ Manhattan distance
212
+ Rectilinear distance
213
+ Taxicab norm
214
+
215
+ Notes
216
+ -----
217
+ Cityblock distance between two probability density functions
218
+ (pdfs) equals:
219
+ 1. Non-intersection distance multiplied by 2.
220
+ 2. Gower distance multiplied by vector length.
221
+ 3. Bray-Curtis distance multiplied by 2.
222
+ 4. Google distance multiplied by 2.
223
+ """
224
+ u, v = np.asarray(u), np.asarray(v)
225
+ return scipy.spatial.distance.cityblock(u, v, w)
226
+
227
+
228
+ def chebyshev(u, v, w=None):
229
+ """Calculate the Chebyshev distance between two vectors.
230
+
231
+ The Chebyshev distance is a metric defined on a vector space where the distance
232
+ between two vectors
233
+ is the greatest of their differences along any coordinate dimension.
234
+
235
+ Synonyms:
236
+ Chessboard distance
237
+ King-move metric
238
+ Maximum value distance
239
+ Minimax approximation
240
+
241
+ Parameters
242
+ ----------
243
+ - u, v: Input vectors between which the distance is to be calculated.
244
+
245
+ Returns
246
+ -------
247
+ - The Chebyshev distance between the two vectors.
248
+
249
+ References
250
+ ----------
251
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
252
+ Measures between Probability Density Functions. International
253
+ Journal of Mathematical Models and Methods in Applied Sciences.
254
+ 1(4), 300-307.
128
255
  """
256
+ u, v = np.asarray(u), np.asarray(v)
257
+ return scipy.spatial.distance.chebyshev(u, v, w)
258
+
259
+
260
+ def correlation(u, v, w=None, centered=True):
261
+ """Calculate the Pearson correlation distance between two vectors.
129
262
 
130
- def __init__(self, epsilon=None):
131
- """Initialize the Distance class with an optional epsilon value.
132
-
133
- Parameters
134
- ----------
135
- - epsilon: A small value to avoid division by zero errors.
136
- """
137
- self.epsilon = np.finfo(float).eps if not epsilon else epsilon
138
-
139
- def euclidean(self, u, v, w=None):
140
- """Calculate the Euclidean distance between two vectors.
141
-
142
- The Euclidean distance is the "ordinary" straight-line distance between two
143
- points in Euclidean space.
144
-
145
- Parameters
146
- ----------
147
- - u, v: Input vectors between which the distance is to be calculated.
148
-
149
- Returns
150
- -------
151
- - The Euclidean distance between the two vectors.
152
-
153
- References
154
- ----------
155
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
156
- Measures between Probability Density Functions. International
157
- Journal of Mathematical Models and Methods in Applied Sciences.
158
- 1(4), 300-307.
159
- """
160
- u, v = np.asarray(u), np.asarray(v)
161
- return scipy.spatial.distance.euclidean(u, v, w)
162
-
163
- def braycurtis(self, u, v, w=None):
164
- """Calculate the Bray-Curtis distance between two vectors.
165
-
166
- The Bray-Curtis distance is a measure of dissimilarity between two non-negative
167
- vectors, often used in ecology to measure the compositional dissimilarity
168
- between two sites based on counts of species at both sites. It is closely
169
- related to the Sørensen distance and is also known as Bray-Curtis
170
- dissimilarity.
171
-
172
- Notes
173
- -----
174
- When used for comparing two probability density functions (pdfs),
175
- the Bray-Curtis distance equals the Cityblock distance divided by 2.
176
-
177
- Parameters
178
- ----------
179
- - u, v: Input vectors between which the distance is to be calculated.
180
-
181
- Returns
182
- -------
183
- - The Bray-Curtis distance between the two vectors.
184
-
185
- References
186
- ----------
187
- 1. Bray JR, Curtis JT (1957) An ordination of the upland forest of
188
- southern Wisconsin. Ecological Monographs, 27, 325-349.
189
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
190
- Measures between Probability Density Functions. International
191
- Journal of Mathematical Models and Methods in Applied Sciences.
192
- 1(4), 300-307.
193
- 3. https://en.wikipedia.org/wiki/Bray–Curtis_dissimilarity
194
- """
195
- u, v = np.asarray(u), np.asarray(v)
196
- return scipy.spatial.distance.braycurtis(u, v, w)
197
-
198
- def canberra(self, u, v, w=None):
199
- """Calculate the Canberra distance between two vectors.
200
-
201
- The Canberra distance is a weighted version of the Manhattan distance, used
202
- in numerical analysis.
203
-
204
- Notes
205
- -----
206
- When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
207
- is used in the calculation.
208
-
209
- Parameters
210
- ----------
211
- - u, v: Input vectors between which the distance is to be calculated.
212
-
213
- Returns
214
- -------
215
- - The Canberra distance between the two vectors.
216
-
217
- References
218
- ----------
219
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
220
- Measures between Probability Density Functions. International
221
- Journal of Mathematical Models and Methods in Applied Sciences.
222
- 1(4), 300-307.
223
- """
224
- u, v = np.asarray(u), np.asarray(v)
225
- return scipy.spatial.distance.canberra(u, v, w)
226
-
227
- def cityblock(self, u, v, w=None):
228
- """Calculate the Cityblock (Manhattan) distance between two vectors.
229
-
230
- Parameters
231
- ----------
232
- - u, v: Input vectors between which the distance is to be calculated.
233
-
234
- Returns
235
- -------
236
- - The Cityblock distance between the two vectors.
237
-
238
- References
239
- ----------
240
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
241
- Measures between Probability Density Functions. International
242
- Journal of Mathematical Models and Methods in Applied Sciences.
243
- 1(4):300-307.
244
-
245
- Synonyms:
246
- City block distance
247
- Manhattan distance
248
- Rectilinear distance
249
- Taxicab norm
250
-
251
- Notes
252
- -----
253
- Cityblock distance between two probability density functions
254
- (pdfs) equals:
255
- 1. Non-intersection distance multiplied by 2.
256
- 2. Gower distance multiplied by vector length.
257
- 3. Bray-Curtis distance multiplied by 2.
258
- 4. Google distance multiplied by 2.
259
- """
260
- u, v = np.asarray(u), np.asarray(v)
261
- return scipy.spatial.distance.cityblock(u, v, w)
262
-
263
- def chebyshev(self, u, v, w=None):
264
- """Calculate the Chebyshev distance between two vectors.
265
-
266
- The Chebyshev distance is a metric defined on a vector space where the distance
267
- between two vectors
268
- is the greatest of their differences along any coordinate dimension.
269
-
270
- Synonyms:
271
- Chessboard distance
272
- King-move metric
273
- Maximum value distance
274
- Minimax approximation
275
-
276
- Parameters
277
- ----------
278
- - u, v: Input vectors between which the distance is to be calculated.
279
-
280
- Returns
281
- -------
282
- - The Chebyshev distance between the two vectors.
283
-
284
- References
285
- ----------
286
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
287
- Measures between Probability Density Functions. International
288
- Journal of Mathematical Models and Methods in Applied Sciences.
289
- 1(4), 300-307.
290
- """
291
- u, v = np.asarray(u), np.asarray(v)
292
- return scipy.spatial.distance.chebyshev(u, v, w)
293
-
294
- def correlation(self, u, v, w=None, centered=True):
295
- """Calculate the Pearson correlation distance between two vectors.
296
-
297
- Returns a distance value between 0 and 2.
298
-
299
- Parameters
300
- ----------
301
- - u, v: Input vectors between which the distance is to be calculated.
302
-
303
- Returns
304
- -------
305
- - The Pearson correlation distance between the two vectors.
306
- """
307
- u, v = np.asarray(u), np.asarray(v)
308
- if len(u) < 2 or len(v) < 2:
263
+ Returns a distance value between 0 and 2.
264
+
265
+ Parameters
266
+ ----------
267
+ - u, v: Input vectors between which the distance is to be calculated.
268
+
269
+ Returns
270
+ -------
271
+ - The Pearson correlation distance between the two vectors.
272
+ """
273
+ u, v = np.asarray(u), np.asarray(v)
274
+ if len(u) < 2 or len(v) < 2:
275
+ warnings.warn(
276
+ "Pearson correlation requires vectors of length at least 2.",
277
+ RuntimeWarning,
278
+ )
279
+ d = 0
280
+ else:
281
+ d = scipy.spatial.distance.correlation(u, v, w, centered)
282
+ if np.isnan(d) and (
283
+ np.allclose(u - np.mean(u), 0) or np.allclose(v - np.mean(v), 0)
284
+ ):
309
285
  warnings.warn(
310
- "Pearson correlation requires vectors of length at least 2.",
286
+ "One of the vectors is constant; correlation is set to 0",
311
287
  RuntimeWarning,
312
288
  )
313
289
  d = 0
314
- else:
315
- d = scipy.spatial.distance.correlation(u, v, w, centered)
316
- if np.isnan(d) and (
317
- np.allclose(u - np.mean(u), 0) or np.allclose(v - np.mean(v), 0)
318
- ):
319
- warnings.warn(
320
- "One of the vectors is constant; correlation is set to 0",
321
- RuntimeWarning,
322
- )
323
- d = 0
324
- return d
325
-
326
- def cosine(self, u, v, w=None):
327
- """Calculate the cosine distance between two vectors.
328
-
329
- Parameters
330
- ----------
331
- - u, v: Input vectors between which the distance is to be calculated.
332
-
333
- Returns
334
- -------
335
- - The cosine distance between the two vectors.
336
-
337
- References
338
- ----------
339
- 1. SciPy.
340
- """
341
- u, v = np.asarray(u), np.asarray(v)
342
- return scipy.spatial.distance.cosine(u, v, w)
343
-
344
- def clark(self, u, v):
345
- """Calculate the Clark distance between two vectors.
346
-
347
- The Clark distance equals the square root of half of the divergence.
348
-
349
- Notes
350
- -----
351
- When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
352
- is used in the calculation.
353
-
354
- Parameters
355
- ----------
356
- - u, v: Input vectors between which the distance is to be calculated.
357
-
358
- Returns
359
- -------
360
- - The Clark distance between the two vectors.
361
-
362
- References
363
- ----------
364
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
365
- Measures between Probability Density Functions. International
366
- Journal of Mathematical Models and Methods in Applied Sciences.
367
- 1(4), 300-307.
368
- """
369
- u, v = np.asarray(u), np.asarray(v)
370
- with np.errstate(divide="ignore", invalid="ignore"):
371
- return np.sqrt(np.nansum(np.power(np.abs(u - v) / (u + v), 2)))
372
-
373
- def hellinger(self, u, v):
374
- """Calculate the Hellinger distance between two vectors.
375
-
376
- The Hellinger distance is a measure of similarity between two probability
377
- distributions.
378
-
379
- Parameters
380
- ----------
381
- - u, v: Input vectors between which the distance is to be calculated.
382
-
383
- Returns
384
- -------
385
- - The Hellinger distance between the two vectors.
386
-
387
- Notes
388
- -----
389
- This implementation produces values two times larger than values
390
- obtained by Hellinger distance described in Wikipedia and also
391
- in https://gist.github.com/larsmans/3116927.
392
-
393
- References
394
- ----------
395
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
396
- Measures between Probability Density Functions. International
397
- Journal of Mathematical Models and Methods in Applied Sciences.
398
- 1(4), 300-307.
399
- """
400
- u, v = np.asarray(u), np.asarray(v)
401
- # Clip negative values to zero for valid sqrt
402
- with np.errstate(divide="ignore", invalid="ignore"):
403
- u = np.clip(u, a_min=0, a_max=None)
404
- v = np.clip(v, a_min=0, a_max=None)
405
- return np.sqrt(2 * np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
406
-
407
- def jaccard(self, u, v):
408
- """Calculate the Jaccard distance between two vectors.
409
-
410
- The Jaccard distance measures dissimilarity between sample sets.
411
-
412
- Parameters
413
- ----------
414
- - u, v: Input vectors between which the distance is to be calculated.
415
-
416
- Returns
417
- -------
418
- - The Jaccard distance between the two vectors.
419
-
420
- References
421
- ----------
422
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
423
- Measures between Probability Density Functions. International
424
- Journal of Mathematical Models and Methods in Applied Sciences.
425
- 1(4), 300-307.
426
- """
427
- u, v = np.asarray(u), np.asarray(v)
428
- uv = np.dot(u, v)
429
- return 1 - (uv / (np.dot(u, u) + np.dot(v, v) - uv))
430
-
431
- def lorentzian(self, u, v):
432
- """Calculate the Lorentzian distance between two vectors.
433
-
434
- Parameters
435
- ----------
436
- - u, v: Input vectors between which the distance is to be calculated.
437
-
438
- Returns
439
- -------
440
- - The Lorentzian distance between the two vectors.
441
-
442
- References
443
- ----------
444
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
445
- Measures between Probability Density Functions. International
446
- Journal of Mathematical Models and Methods in Applied Sciences.
447
- 1(4):300-307.
448
-
449
- Notes
450
- -----
451
- One (1) is added to guarantee the non-negativity property and to
452
- eschew the log of zero.
453
- """
454
- u, v = np.asarray(u), np.asarray(v)
455
- with np.errstate(divide="ignore", invalid="ignore"):
456
- return np.sum(np.log(np.abs(u - v) + 1))
457
-
458
- def marylandbridge(self, u, v):
459
- """Calculate the Maryland Bridge distance between two vectors.
460
-
461
- Parameters
462
- ----------
463
- - u, v: Input vectors between which the distance is to be calculated.
464
-
465
- Returns
466
- -------
467
- - The Maryland Bridge distance between the two vectors.
468
-
469
- References
470
- ----------
471
- 1. Deza M, Deza E (2009) Encyclopedia of Distances.
472
- Springer-Verlag Berlin Heidelberg. 1-590.
473
- """
474
- u, v = np.asarray(u), np.asarray(v)
475
- uvdot = np.dot(u, v)
476
- return 1 - (uvdot / np.dot(u, u) + uvdot / np.dot(v, v)) / 2
477
-
478
- def meehl(self, u, v):
479
- """Calculate the Meehl distance between two vectors.
480
-
481
- Parameters
482
- ----------
483
- - u, v: Input vectors between which the distance is to be calculated.
484
-
485
- Returns
486
- -------
487
- - The Meehl distance between the two vectors.
488
-
489
- Notes
490
- -----
491
- Added by SC.
492
-
493
- References
494
- ----------
495
- 1. Deza M. and Deza E. (2013) Encyclopedia of Distances.
496
- Berlin, Heidelberg: Springer Berlin Heidelberg.
497
- https://doi.org/10.1007/978-3-642-30958-8.
498
- """
499
- u, v = np.asarray(u), np.asarray(v)
500
-
501
- xi = u[:-1]
502
- yi = v[:-1]
503
- xiplus1 = np.roll(u, 1)[:-1]
504
- yiplus1 = np.roll(v, 1)[:-1]
505
-
506
- with np.errstate(divide="ignore", invalid="ignore"):
507
- return np.nansum((xi - yi - xiplus1 + yiplus1) ** 2)
508
-
509
- def motyka(self, u, v):
510
- """Calculate the Motyka distance between two vectors.
511
-
512
- Parameters
513
- ----------
514
- - u, v: Input vectors between which the distance is to be calculated.
515
-
516
- Returns
517
- -------
518
- - The Motyka distance between the two vectors.
519
-
520
- Notes
521
- -----
522
- The distance between identical vectors is not equal to 0 but 0.5.
523
-
524
- References
525
- ----------
526
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
527
- Measures between Probability Density Functions. International
528
- Journal of Mathematical Models and Methods in Applied Sciences.
529
- 1(4), 300-307.
530
- """
531
- u, v = np.asarray(u), np.asarray(v)
532
- return np.sum(np.maximum(u, v)) / np.sum(u + v)
533
-
534
- def soergel(self, u, v):
535
- """Calculate the Soergel distance between two vectors.
536
-
537
- Parameters
538
- ----------
539
- - u, v: Input vectors between which the distance is to be calculated.
540
-
541
- Returns
542
- -------
543
- - The Soergel distance between the two vectors.
544
-
545
- Notes
546
- -----
547
- Equals Tanimoto distance.
548
-
549
- References
550
- ----------
551
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
552
- Measures between Probability Density Functions. International
553
- Journal of Mathematical Models and Methods in Applied Sciences.
554
- 1(4), 300-307.
555
- """
556
- u, v = np.asarray(u), np.asarray(v)
557
- return np.sum(np.abs(u - v)) / np.sum(np.maximum(u, v))
558
-
559
- def wave_hedges(self, u, v):
560
- """Calculate the Wave Hedges distance between two vectors.
561
-
562
- Parameters
563
- ----------
564
- - u, v: Input vectors between which the distance is to be calculated.
565
-
566
- Returns
567
- -------
568
- - The Wave Hedges distance between the two vectors.
569
-
570
- References
571
- ----------
572
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
573
- Measures between Probability Density Functions. International
574
- Journal of Mathematical Models and Methods in Applied Sciences.
575
- 1(4), 300-307
576
- """
577
- u, v = np.asarray(u), np.asarray(v)
578
- with np.errstate(divide="ignore", invalid="ignore"):
579
- u_v = abs(u - v)
580
- uvmax = np.maximum(u, v)
581
- return np.sum(np.where(((u_v != 0) & (uvmax != 0)), u_v / uvmax, 0))
582
-
583
- def kulczynski(self, u, v):
584
- """Calculate the Kulczynski distance between two vectors.
585
-
586
- Parameters
587
- ----------
588
- - u, v: Input vectors between which the distance is to be calculated.
589
-
590
- Returns
591
- -------
592
- - The Kulczynski distance between the two vectors.
593
-
594
- References
595
- ----------
596
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
597
- Measures between Probability Density Functions. International
598
- Journal of Mathematical Models and Methods in Applied Sciences.
599
- 1(4):300-307.
600
- """
601
- u, v = np.asarray(u), np.asarray(v)
602
- return np.sum(np.abs(u - v)) / np.sum(np.minimum(u, v))
603
-
604
- def add_chisq(self, u, v):
605
- """Compute the Additive Symmetric Chi-square distance between two vectors.
606
-
607
- The Additive Symmetric Chi-square distance is a measure that
608
- can be used to compare two vectors. This function calculates it based
609
- on the input vectors u and v.
610
-
611
- Parameters
612
- ----------
613
- - u, v: Input vectors between which the distance is to be calculated.
614
-
615
- Returns
616
- -------
617
- - The Additive Symmetric Chi-square distance between the two vectors.
618
-
619
- References
620
- ----------
621
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
622
- Measures between Probability Density Functions.
623
- International Journal of Mathematical Models and Methods in
624
- Applied Sciences.
625
- vol. 1(4), pp. 300-307.
626
- """
627
- u, v = np.asarray(u), np.asarray(v)
628
- uvmult = u * v
629
- with np.errstate(divide="ignore", invalid="ignore"):
630
- return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
631
-
632
- # NOTE: THE FOLLOWING DISTANCES HAVE NOT BEEN TESTED EXTENSIVELY AND ARE
633
- # CURRENTLY IN ALPHA AND SO HAVE BEEN COMMENTED
634
-
635
- def acc(self, u, v):
636
- """Calculate the average of Cityblock and Chebyshev distance.
637
-
638
- This function computes the ACC distance, also known as the
639
- Average distance, between two vectors u and v. It is the average of the
640
- Cityblock (or Manhattan) and Chebyshev distances.
641
-
642
- Parameters
643
- ----------
644
- - u, v: Input vectors between which the distance is to be calculated.
645
-
646
- Returns
647
- -------
648
- - The ACC distance between the two vectors.
649
-
650
- References
651
- ----------
652
- 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
653
- Geometry. Dover Publications.
654
- 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
655
- Measures between Probability Density Functions. International
656
- Journal of Mathematical Models and Methods in Applied Sciences.
657
- vol. 1(4), pp. 300-307.
658
- """
659
- return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
660
-
661
- # def bhattacharyya(self, u, v):
662
- # """
663
- # Calculate the Bhattacharyya distance between two vectors.
664
-
665
- # Returns a distance value between 0 and 1.
666
-
667
- # Parameters
668
- # ----------
669
- # - u, v: Input vectors between which the distance is to be calculated.
670
-
671
- # Returns
672
- # -------
673
- # - The Bhattacharyya distance between the two vectors.
674
-
675
- # References
676
- # ----------
677
- # 1. Bhattacharyya A (1947) On a measure of divergence between two
678
- # statistical populations defined by probability distributions,
679
- # Bull. Calcutta Math. Soc., 35, 99–109.
680
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
681
- # Measures between Probability Density Functions. International
682
- # Journal of Mathematical Models and Methods in Applied Sciences.
683
- # 1(4), 300-307.
684
- # 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
685
- # """
686
- # u, v = np.asarray(u), np.asarray(v)
687
- # with np.errstate(divide="ignore", invalid="ignore"):
688
- # return -np.log(np.sum(np.sqrt(u * v)))
689
-
690
- def chebyshev_min(self, u, v):
691
- """Calculate the minimum value distance between two vectors.
692
-
693
- This measure represents a custom approach by Zielezinski to distance
694
- measurement, focusing on the minimum absolute difference.
695
-
696
- Parameters
697
- ----------
698
- - u, v: Input vectors between which the distance is to be calculated.
699
-
700
- Returns
701
- -------
702
- - The minimum value distance between the two vectors.
703
- """
704
- u, v = np.asarray(u), np.asarray(v)
705
- return np.amin(np.abs(u - v))
706
-
707
- def czekanowski(self, u, v):
708
- """Calculate the Czekanowski distance between two vectors.
709
-
710
- Parameters
711
- ----------
712
- - u, v: Input vectors between which the distance is to be calculated.
713
-
714
- Returns
715
- -------
716
- - The Czekanowski distance between the two vectors.
717
-
718
- References
719
- ----------
720
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
721
- Measures between Probability Density Functions. International
722
- Journal of Mathematical Models and Methods in Applied Sciences.
723
- 1(4), 300-307.
724
- """
725
- u, v = np.asarray(u), np.asarray(v)
726
- return np.sum(np.abs(u - v)) / np.sum(u + v)
727
-
728
- def dice(self, u, v):
729
- """Calculate the Dice dissimilarity between two vectors.
730
-
731
- Synonyms:
732
- Sorensen distance
733
-
734
- Parameters
735
- ----------
736
- - u, v: Input vectors between which the distance is to be calculated.
737
-
738
- Returns
739
- -------
740
- - The Dice dissimilarity between the two vectors.
741
-
742
- References
743
- ----------
744
- 1. Dice LR (1945) Measures of the amount of ecologic association
745
- between species. Ecology. 26, 297-302.
746
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
747
- Measures between Probability Density Functions. International
748
- Journal of Mathematical Models and Methods in Applied Sciences.
749
- 1(4), 300-307.
750
- """
751
- u, v = np.asarray(u), np.asarray(v)
752
- u_v = u - v
753
- return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
754
-
755
- def divergence(self, u, v):
756
- """Calculate the divergence between two vectors.
757
-
758
- Divergence equals squared Clark distance multiplied by 2.
759
-
760
- Parameters
761
- ----------
762
- - u, v: Input vectors between which the distance is to be calculated.
763
-
764
- Returns
765
- -------
766
- - The divergence between the two vectors.
767
-
768
- References
769
- ----------
770
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
771
- Measures between Probability Density Functions. International
772
- Journal of Mathematical Models and Methods in Applied Sciences.
773
- 1(4), 300-307.
774
- """
775
- u, v = np.asarray(u), np.asarray(v)
776
- with np.errstate(invalid="ignore"):
777
- return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
778
-
779
- def google(self, u, v):
780
- """Calculate the Normalized Google Distance (NGD) between two vectors.
781
-
782
- NGD is a measure of similarity derived from the number of hits returned by the
783
- Google search engine for a given set of keywords.
784
-
785
- Parameters
786
- ----------
787
- - u, v: Input vectors between which the distance is to be calculated.
788
-
789
- Returns
790
- -------
791
- - The Normalized Google Distance between the two vectors.
792
-
793
- Notes
794
- -----
795
- When used for comparing two probability density functions (pdfs),
796
- Google distance equals half of Cityblock distance.
797
-
798
- References
799
- ----------
800
- 1. Lee & Rashid (2008) Information Technology, ITSim 2008.
801
- doi:10.1109/ITSIM.2008.4631601.
802
- """
803
- u, v = np.asarray(u), np.asarray(v)
804
- x = float(np.sum(u))
805
- y = float(np.sum(v))
806
- summin = float(np.sum(np.minimum(u, v)))
807
- return (max([x, y]) - summin) / ((x + y) - min([x, y]))
808
-
809
- def gower(self, u, v):
810
- """Calculate the Gower distance between two vectors.
811
-
812
- The Gower distance equals the Cityblock distance divided by the vector length.
813
-
814
- Parameters
815
- ----------
816
- - u, v: Input vectors between which the distance is to be calculated.
817
-
818
- Returns
819
- -------
820
- - The Gower distance between the two vectors.
821
-
822
- References
823
- ----------
824
- 1. Gower JC. (1971) General Coefficient of Similarity
825
- and Some of Its Properties, Biometrics 27, 857-874.
826
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
827
- Measures between Probability Density Functions. International
828
- Journal of Mathematical Models and Methods in Applied Sciences.
829
- 1(4), 300-307.
830
- """
831
- u, v = np.asarray(u), np.asarray(v)
832
- return np.sum(np.abs(u - v)) / u.size
833
-
834
- def jeffreys(self, u, v):
835
- """Calculate the Jeffreys divergence between two vectors.
836
-
837
- The Jeffreys divergence is a symmetric version of the Kullback-Leibler
838
- divergence.
839
-
840
- Parameters
841
- ----------
842
- - u, v: Input vectors between which the divergence is to be calculated.
843
-
844
- Returns
845
- -------
846
- - The Jeffreys divergence between the two vectors.
847
-
848
- References
849
- ----------
850
- 1. Jeffreys H (1946) An Invariant Form for the Prior Probability
851
- in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
852
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
853
- Measures between Probability Density Functions. International
854
- Journal of Mathematical Models and Methods in Applied Sciences.
855
- 1(4), 300-307.
856
- """
857
- u, v = np.asarray(u), np.asarray(v)
858
- # Add epsilon to zeros in vectors to avoid division
859
- # by 0 and/or log of 0. Alternatively, zeros in the
860
- # vectors could be ignored or masked (see below).
861
- # u = ma.masked_where(u == 0, u)
862
- # v = ma.masked_where(v == 0, u)
863
- with np.errstate(divide="ignore", invalid="ignore"):
864
- u[u == 0] = self.epsilon
865
- v[v == 0] = self.epsilon
866
- # Clip negative values to zero for valid log
867
- udivv = np.clip(u / v, a_min=self.epsilon, a_max=None)
868
- return np.sum((u - v) * np.log(udivv))
869
-
870
- def jensenshannon_divergence(self, u, v):
871
- """Calculate the Jensen-Shannon divergence between two vectors.
872
-
873
- The Jensen-Shannon divergence is a symmetric and finite measure of similarity
874
- between two probability distributions.
875
-
876
- Parameters
877
- ----------
878
- - u, v: Input vectors between which the divergence is to be calculated.
879
-
880
- Returns
881
- -------
882
- - The Jensen-Shannon divergence between the two vectors.
883
-
884
- References
885
- ----------
886
- 1. Lin J. (1991) Divergence measures based on the Shannon entropy.
887
- IEEE Transactions on Information Theory, 37(1):145–151.
888
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
889
- Measures between Probability Density Functions. International
890
- Journal of Mathematical Models and Methods in Applied Sciences.
891
- 1(4), 300-307.
892
- Comments:
893
- Equals Jensen difference in Sung-Hyuk (2007):
894
- u = np.where(u==0, self.epsilon, u)
895
- v = np.where(v==0, self.epsilon, v)
896
- el1 = (u * np.log(u) + v * np.log(v)) / 2
897
- el2 = (u + v)/2
898
- el3 = np.log(el2)
899
- return np.sum(el1 - el2 * el3)
900
- """
901
- u, v = np.asarray(u), np.asarray(v)
902
- with np.errstate(divide="ignore", invalid="ignore"):
903
- # Clip negative values to zero for valid log
904
- u[u == 0] = self.epsilon
905
- v[v == 0] = self.epsilon
906
-
907
- term1 = np.clip(2 * u / (u + v), a_min=self.epsilon, a_max=None)
908
- term2 = np.clip(2 * v / (u + v), a_min=self.epsilon, a_max=None)
909
-
910
- dl = u * np.log(term1)
911
- dr = v * np.log(term2)
912
- return (np.sum(dl) + np.sum(dr)) / 2
913
-
914
- def jensen_difference(self, u, v):
915
- """Calculate the Jensen difference between two vectors.
916
-
917
- The Jensen difference is considered similar to the Jensen-Shannon divergence.
918
-
919
- Parameters
920
- ----------
921
- - u, v: Input vectors between which the distance is to be calculated.
922
-
923
- Returns
924
- -------
925
- - The Jensen difference between the two vectors.
926
-
927
- Notes
928
- -----
929
- 1. Equals half of Topsøe distance
930
- 2. Equals squared jensenshannon_distance.
931
-
932
-
933
- References
934
- ----------
935
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
936
- Measures between Probability Density Functions. International
937
- Journal of Mathematical Models and Methods in Applied Sciences.
938
- 1(4), 300-307.
939
- """
940
- u, v = np.asarray(u), np.asarray(v)
941
-
942
- with np.errstate(divide="ignore", invalid="ignore"):
943
- # Clip negative values to eps for valid log
944
- u = np.clip(u, self.epsilon, None)
945
- v = np.clip(v, self.epsilon, None)
946
- el1 = (u * np.log(u) + v * np.log(v)) / 2
947
- el2 = np.clip((u + v) / 2, a_min=self.epsilon, a_max=None)
948
- return np.sum(el1 - el2 * np.log(el2))
949
-
950
- def kumarjohnson(self, u, v):
951
- """Calculate the Kumar-Johnson distance between two vectors.
952
-
953
- Parameters
954
- ----------
955
- - u, v: Input vectors between which the distance is to be calculated.
956
-
957
- Returns
958
- -------
959
- - The Kumar-Johnson distance between the two vectors.
960
-
961
- References
962
- ----------
963
- 1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
964
- and information inequalities, Journal of Inequalities in pure
965
- and applied Mathematics. 6(3).
966
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
967
- Measures between Probability Density Functions. International
968
- Journal of Mathematical Models and Methods in Applied Sciences.
969
- 1(4):300-307.
970
- """
971
- u, v = np.asarray(u), np.asarray(v)
972
- uvmult = u * v
973
- with np.errstate(divide="ignore", invalid="ignore"):
974
- numer = np.power(u**2 - v**2, 2)
975
- denom = 2 * np.power(uvmult, 3 / 2)
976
- return np.sum(np.where(uvmult != 0, numer / denom, 0))
977
-
978
- def matusita(self, u, v):
979
- """Calculate the Matusita distance between two vectors.
980
-
981
- Parameters
982
- ----------
983
- - u, v: Input vectors between which the distance is to be calculated.
984
-
985
- Returns
986
- -------
987
- - The Matusita distance between the two vectors.
988
-
989
- References
990
- ----------
991
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
992
- Measures between Probability Density Functions. International
993
- Journal of Mathematical Models and Methods in Applied Sciences.
994
- 1(4):300-307.
995
-
996
- Notes
997
- -----
998
- Equals square root of Squared-chord distance.
999
- """
1000
- u, v = np.asarray(u), np.asarray(v)
1001
- with np.errstate(divide="ignore", invalid="ignore"):
1002
- return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
1003
-
1004
- def minkowski(self, u, v, p=2):
1005
- """Calculate the Minkowski distance between two vectors.
1006
-
1007
- Parameters
1008
- ----------
1009
- - u, v: Input vectors between which the distance is to be calculated.
1010
- - p: The order of the norm of the difference.
1011
-
1012
- Returns
1013
- -------
1014
- - The Minkowski distance between the two vectors.
1015
-
1016
- Notes
1017
- -----
1018
- When p goes to infinite, the Chebyshev distance is derived.
1019
-
1020
- References
1021
- ----------
1022
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1023
- Measures between Probability Density Functions. International
1024
- Journal of Mathematical Models and Methods in Applied Sciences.
1025
- 1(4):300-307.
1026
- """
1027
- u, v = np.asarray(u), np.asarray(v)
1028
- return np.linalg.norm(u - v, ord=p)
1029
-
1030
- def penroseshape(self, u, v):
1031
- """Calculate the Penrose shape distance between two vectors.
1032
-
1033
- Parameters
1034
- ----------
1035
- - u, v: Input vectors between which the distance is to be calculated.
1036
-
1037
- Returns
1038
- -------
1039
- - The Penrose shape distance between the two vectors.
1040
-
1041
- References
1042
- ----------
1043
- 1. Deza M, Deza E (2009) Encyclopedia of Distances.
1044
- Springer-Verlag Berlin Heidelberg. 1-590.
1045
- """
1046
- u, v = np.asarray(u), np.asarray(v)
1047
- umu = np.mean(u)
1048
- vmu = np.mean(v)
1049
- with np.errstate(divide="ignore", invalid="ignore"):
1050
- return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
1051
-
1052
- def prob_chisq(self, u, v):
1053
- """Calculate the Probabilistic chi-square distance between two vectors.
1054
-
1055
- Parameters
1056
- ----------
1057
- - u, v: Input vectors between which the distance is to be calculated.
1058
-
1059
- Returns
1060
- -------
1061
- - The Probabilistic chi-square distance between the two vectors.
1062
-
1063
- Notes
1064
- -----
1065
- Added by SC.
1066
- """
1067
- u, v = np.asarray(u), np.asarray(v)
1068
- uvsum = u + v
1069
- with np.errstate(divide="ignore", invalid="ignore"):
1070
- return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1071
-
1072
- def ruzicka(self, u, v):
1073
- """Calculate the Ruzicka distance between two vectors.
1074
-
1075
- Parameters
1076
- ----------
1077
- - u, v: Input vectors between which the distance is to be calculated.
1078
-
1079
- Returns
1080
- -------
1081
- - The Ruzicka distance between the two vectors.
1082
-
1083
- Notes
1084
- -----
1085
- Added by SC.
1086
- """
1087
- u, v = np.asarray(u), np.asarray(v)
1088
- den = np.sum(np.maximum(u, v))
1089
-
1090
- return 1 - np.sum(np.minimum(u, v)) / den
1091
-
1092
- def sorensen(self, u, v):
1093
- """Calculate the Sorensen distance between two vectors.
1094
-
1095
- Parameters
1096
- ----------
1097
- - u, v: Input vectors between which the distance is to be calculated.
1098
-
1099
- Returns
1100
- -------
1101
- - The Sorensen distance between the two vectors.
1102
-
1103
- Notes
1104
- -----
1105
- The Sorensen distance equals the Manhattan distance divided by the sum of
1106
- the two vectors.
1107
-
1108
- Added by SC.
1109
- """
1110
- u, v = np.asarray(u), np.asarray(v)
1111
- return np.sum(np.abs(u - v)) / np.sum(u + v)
1112
-
1113
- def squared_chisq(self, u, v):
1114
- """Calculate the Squared chi-square distance between two vectors.
1115
-
1116
- Parameters
1117
- ----------
1118
- - u, v: Input vectors between which the distance is to be calculated.
1119
-
1120
- Returns
1121
- -------
1122
- - The Squared chi-square distance between the two vectors.
1123
-
1124
- References
1125
- ----------
1126
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1127
- Measures between Probability Density Functions. International
1128
- Journal of Mathematical Models and Methods in Applied Sciences.
1129
- 1(4), 300-307.
1130
- """
1131
- u, v = np.asarray(u), np.asarray(v)
290
+ return d
291
+
292
+
293
+ def cosine(u, v, w=None):
294
+ """Calculate the cosine distance between two vectors.
295
+
296
+ Parameters
297
+ ----------
298
+ - u, v: Input vectors between which the distance is to be calculated.
299
+
300
+ Returns
301
+ -------
302
+ - The cosine distance between the two vectors.
303
+
304
+ References
305
+ ----------
306
+ 1. SciPy.
307
+ """
308
+ u, v = np.asarray(u), np.asarray(v)
309
+ return scipy.spatial.distance.cosine(u, v, w)
310
+
311
+
312
+ def clark(u, v):
313
+ """Calculate the Clark distance between two vectors.
314
+
315
+ The Clark distance equals the square root of half of the divergence.
316
+
317
+ Notes
318
+ -----
319
+ When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
320
+ is used in the calculation.
321
+
322
+ Parameters
323
+ ----------
324
+ - u, v: Input vectors between which the distance is to be calculated.
325
+
326
+ Returns
327
+ -------
328
+ - The Clark distance between the two vectors.
329
+
330
+ References
331
+ ----------
332
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
333
+ Measures between Probability Density Functions. International
334
+ Journal of Mathematical Models and Methods in Applied Sciences.
335
+ 1(4), 300-307.
336
+ """
337
+ u, v = np.asarray(u), np.asarray(v)
338
+ with np.errstate(divide="ignore", invalid="ignore"):
339
+ return np.sqrt(np.nansum(np.power(np.abs(u - v) / (u + v), 2)))
340
+
341
+
342
+ def hellinger(u, v):
343
+ """Calculate the Hellinger distance between two vectors.
344
+
345
+ The Hellinger distance is a measure of similarity between two probability
346
+ distributions.
347
+
348
+ Parameters
349
+ ----------
350
+ - u, v: Input vectors between which the distance is to be calculated.
351
+
352
+ Returns
353
+ -------
354
+ - The Hellinger distance between the two vectors.
355
+
356
+ Notes
357
+ -----
358
+ This implementation produces values two times larger than values
359
+ obtained by Hellinger distance described in Wikipedia and also
360
+ in https://gist.github.com/larsmans/3116927.
361
+
362
+ References
363
+ ----------
364
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
365
+ Measures between Probability Density Functions. International
366
+ Journal of Mathematical Models and Methods in Applied Sciences.
367
+ 1(4), 300-307.
368
+ """
369
+ u, v = np.asarray(u), np.asarray(v)
370
+ # Clip negative values to zero for valid sqrt
371
+ with np.errstate(divide="ignore", invalid="ignore"):
372
+ u = np.clip(u, a_min=0, a_max=None)
373
+ v = np.clip(v, a_min=0, a_max=None)
374
+ return np.sqrt(2 * np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
375
+
376
+
377
+ def jaccard(u, v):
378
+ """Calculate the Jaccard distance between two vectors.
379
+
380
+ The Jaccard distance measures dissimilarity between sample sets.
381
+
382
+ Parameters
383
+ ----------
384
+ - u, v: Input vectors between which the distance is to be calculated.
385
+
386
+ Returns
387
+ -------
388
+ - The Jaccard distance between the two vectors.
389
+
390
+ References
391
+ ----------
392
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
393
+ Measures between Probability Density Functions. International
394
+ Journal of Mathematical Models and Methods in Applied Sciences.
395
+ 1(4), 300-307.
396
+ """
397
+ u, v = np.asarray(u), np.asarray(v)
398
+ uv = np.dot(u, v)
399
+ return 1 - (uv / (np.dot(u, u) + np.dot(v, v) - uv))
400
+
401
+
402
+ def lorentzian(u, v):
403
+ """Calculate the Lorentzian distance between two vectors.
404
+
405
+ Parameters
406
+ ----------
407
+ - u, v: Input vectors between which the distance is to be calculated.
408
+
409
+ Returns
410
+ -------
411
+ - The Lorentzian distance between the two vectors.
412
+
413
+ References
414
+ ----------
415
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
416
+ Measures between Probability Density Functions. International
417
+ Journal of Mathematical Models and Methods in Applied Sciences.
418
+ 1(4):300-307.
419
+
420
+ Notes
421
+ -----
422
+ One (1) is added to guarantee the non-negativity property and to
423
+ eschew the log of zero.
424
+ """
425
+ u, v = np.asarray(u), np.asarray(v)
426
+ with np.errstate(divide="ignore", invalid="ignore"):
427
+ return np.sum(np.log(np.abs(u - v) + 1))
428
+
429
+
430
+ def marylandbridge(u, v):
431
+ """Calculate the Maryland Bridge distance between two vectors.
432
+
433
+ Parameters
434
+ ----------
435
+ - u, v: Input vectors between which the distance is to be calculated.
436
+
437
+ Returns
438
+ -------
439
+ - The Maryland Bridge distance between the two vectors.
440
+
441
+ References
442
+ ----------
443
+ 1. Deza M, Deza E (2009) Encyclopedia of Distances.
444
+ Springer-Verlag Berlin Heidelberg. 1-590.
445
+ """
446
+ u, v = np.asarray(u), np.asarray(v)
447
+ uvdot = np.dot(u, v)
448
+ return 1 - (uvdot / np.dot(u, u) + uvdot / np.dot(v, v)) / 2
449
+
450
+
451
+ def meehl(u, v):
452
+ """Calculate the Meehl distance between two vectors.
453
+
454
+ Parameters
455
+ ----------
456
+ - u, v: Input vectors between which the distance is to be calculated.
457
+
458
+ Returns
459
+ -------
460
+ - The Meehl distance between the two vectors.
461
+
462
+ Notes
463
+ -----
464
+ Added by SC.
465
+
466
+ References
467
+ ----------
468
+ 1. Deza M. and Deza E. (2013) Encyclopedia of Distances.
469
+ Berlin, Heidelberg: Springer Berlin Heidelberg.
470
+ https://doi.org/10.1007/978-3-642-30958-8.
471
+ """
472
+ u, v = np.asarray(u), np.asarray(v)
473
+
474
+ xi = u[:-1]
475
+ yi = v[:-1]
476
+ xiplus1 = np.roll(u, 1)[:-1]
477
+ yiplus1 = np.roll(v, 1)[:-1]
478
+
479
+ with np.errstate(divide="ignore", invalid="ignore"):
480
+ return np.nansum((xi - yi - xiplus1 + yiplus1) ** 2)
481
+
482
+
483
+ def motyka(u, v):
484
+ """Calculate the Motyka distance between two vectors.
485
+
486
+ Parameters
487
+ ----------
488
+ - u, v: Input vectors between which the distance is to be calculated.
489
+
490
+ Returns
491
+ -------
492
+ - The Motyka distance between the two vectors.
493
+
494
+ Notes
495
+ -----
496
+ The distance between identical vectors is not equal to 0 but 0.5.
497
+
498
+ References
499
+ ----------
500
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
501
+ Measures between Probability Density Functions. International
502
+ Journal of Mathematical Models and Methods in Applied Sciences.
503
+ 1(4), 300-307.
504
+ """
505
+ u, v = np.asarray(u), np.asarray(v)
506
+ return np.sum(np.maximum(u, v)) / np.sum(u + v)
507
+
508
+
509
+ def soergel(u, v):
510
+ """Calculate the Soergel distance between two vectors.
511
+
512
+ Parameters
513
+ ----------
514
+ - u, v: Input vectors between which the distance is to be calculated.
515
+
516
+ Returns
517
+ -------
518
+ - The Soergel distance between the two vectors.
519
+
520
+ Notes
521
+ -----
522
+ Equals Tanimoto distance.
523
+
524
+ References
525
+ ----------
526
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
527
+ Measures between Probability Density Functions. International
528
+ Journal of Mathematical Models and Methods in Applied Sciences.
529
+ 1(4), 300-307.
530
+ """
531
+ u, v = np.asarray(u), np.asarray(v)
532
+ return np.sum(np.abs(u - v)) / np.sum(np.maximum(u, v))
533
+
534
+
535
+ def wave_hedges(u, v):
536
+ """Calculate the Wave Hedges distance between two vectors.
537
+
538
+ Parameters
539
+ ----------
540
+ - u, v: Input vectors between which the distance is to be calculated.
541
+
542
+ Returns
543
+ -------
544
+ - The Wave Hedges distance between the two vectors.
545
+
546
+ References
547
+ ----------
548
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
549
+ Measures between Probability Density Functions. International
550
+ Journal of Mathematical Models and Methods in Applied Sciences.
551
+ 1(4), 300-307
552
+ """
553
+ u, v = np.asarray(u), np.asarray(v)
554
+ with np.errstate(divide="ignore", invalid="ignore"):
555
+ u_v = abs(u - v)
556
+ uvmax = np.maximum(u, v)
557
+ return np.sum(np.where(((u_v != 0) & (uvmax != 0)), u_v / uvmax, 0))
558
+
559
+
560
+ def kulczynski(u, v):
561
+ """Calculate the Kulczynski distance between two vectors.
562
+
563
+ Parameters
564
+ ----------
565
+ - u, v: Input vectors between which the distance is to be calculated.
566
+
567
+ Returns
568
+ -------
569
+ - The Kulczynski distance between the two vectors.
570
+
571
+ References
572
+ ----------
573
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
574
+ Measures between Probability Density Functions. International
575
+ Journal of Mathematical Models and Methods in Applied Sciences.
576
+ 1(4):300-307.
577
+ """
578
+ u, v = np.asarray(u), np.asarray(v)
579
+ return np.sum(np.abs(u - v)) / np.sum(np.minimum(u, v))
580
+
581
+
582
+ def add_chisq(u, v):
583
+ """Compute the Additive Symmetric Chi-square distance between two vectors.
584
+
585
+ The Additive Symmetric Chi-square distance is a measure that
586
+ can be used to compare two vectors. This function calculates it based
587
+ on the input vectors u and v.
588
+
589
+ Parameters
590
+ ----------
591
+ - u, v: Input vectors between which the distance is to be calculated.
592
+
593
+ Returns
594
+ -------
595
+ - The Additive Symmetric Chi-square distance between the two vectors.
596
+
597
+ References
598
+ ----------
599
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
600
+ Measures between Probability Density Functions.
601
+ International Journal of Mathematical Models and Methods in
602
+ Applied Sciences.
603
+ vol. 1(4), pp. 300-307.
604
+ """
605
+ u, v = np.asarray(u), np.asarray(v)
606
+ uvmult = u * v
607
+ with np.errstate(divide="ignore", invalid="ignore"):
608
+ return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
609
+
610
+
611
+ # NOTE: THE FOLLOWING DISTANCES HAVE NOT BEEN TESTED EXTENSIVELY AND ARE
612
+ # CURRENTLY IN ALPHA AND SO HAVE BEEN COMMENTED
613
+
614
+
615
+ def acc(u, v):
616
+ """Calculate the average of Cityblock and Chebyshev distance.
617
+
618
+ This function computes the ACC distance, also known as the
619
+ Average distance, between two vectors u and v. It is the average of the
620
+ Cityblock (or Manhattan) and Chebyshev distances.
621
+
622
+ Parameters
623
+ ----------
624
+ - u, v: Input vectors between which the distance is to be calculated.
625
+
626
+ Returns
627
+ -------
628
+ - The ACC distance between the two vectors.
629
+
630
+ References
631
+ ----------
632
+ 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
633
+ Geometry. Dover Publications.
634
+ 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
635
+ Measures between Probability Density Functions. International
636
+ Journal of Mathematical Models and Methods in Applied Sciences.
637
+ vol. 1(4), pp. 300-307.
638
+ """
639
+ return (cityblock(u, v) + chebyshev(u, v)) / 2
640
+
641
+
642
+ # def bhattacharyya(u, v):
643
+ # """
644
+ # Calculate the Bhattacharyya distance between two vectors.
645
+
646
+ # Returns a distance value between 0 and 1.
647
+
648
+ # Parameters
649
+ # ----------
650
+ # - u, v: Input vectors between which the distance is to be calculated.
651
+
652
+ # Returns
653
+ # -------
654
+ # - The Bhattacharyya distance between the two vectors.
655
+
656
+ # References
657
+ # ----------
658
+ # 1. Bhattacharyya A (1947) On a measure of divergence between two
659
+ # statistical populations defined by probability distributions,
660
+ # Bull. Calcutta Math. Soc., 35, 99–109.
661
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
662
+ # Measures between Probability Density Functions. International
663
+ # Journal of Mathematical Models and Methods in Applied Sciences.
664
+ # 1(4), 300-307.
665
+ # 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
666
+ # """
667
+ # u, v = np.asarray(u), np.asarray(v)
668
+ # with np.errstate(divide="ignore", invalid="ignore"):
669
+ # return -np.log(np.sum(np.sqrt(u * v)))
670
+
671
+
672
+ def chebyshev_min(u, v):
673
+ """Calculate the minimum value distance between two vectors.
674
+
675
+ This measure represents a custom approach by Zielezinski to distance
676
+ measurement, focusing on the minimum absolute difference.
677
+
678
+ Parameters
679
+ ----------
680
+ - u, v: Input vectors between which the distance is to be calculated.
681
+
682
+ Returns
683
+ -------
684
+ - The minimum value distance between the two vectors.
685
+ """
686
+ u, v = np.asarray(u), np.asarray(v)
687
+ return np.amin(np.abs(u - v))
688
+
689
+
690
+ def czekanowski(u, v):
691
+ """Calculate the Czekanowski distance between two vectors.
692
+
693
+ Parameters
694
+ ----------
695
+ - u, v: Input vectors between which the distance is to be calculated.
696
+
697
+ Returns
698
+ -------
699
+ - The Czekanowski distance between the two vectors.
700
+
701
+ References
702
+ ----------
703
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
704
+ Measures between Probability Density Functions. International
705
+ Journal of Mathematical Models and Methods in Applied Sciences.
706
+ 1(4), 300-307.
707
+ """
708
+ u, v = np.asarray(u), np.asarray(v)
709
+ return np.sum(np.abs(u - v)) / np.sum(u + v)
710
+
711
+
712
+ def dice(u, v):
713
+ """Calculate the Dice dissimilarity between two vectors.
714
+
715
+ Synonyms:
716
+ Sorensen distance
717
+
718
+ Parameters
719
+ ----------
720
+ - u, v: Input vectors between which the distance is to be calculated.
721
+
722
+ Returns
723
+ -------
724
+ - The Dice dissimilarity between the two vectors.
725
+
726
+ References
727
+ ----------
728
+ 1. Dice LR (1945) Measures of the amount of ecologic association
729
+ between species. Ecology. 26, 297-302.
730
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
731
+ Measures between Probability Density Functions. International
732
+ Journal of Mathematical Models and Methods in Applied Sciences.
733
+ 1(4), 300-307.
734
+ """
735
+ u, v = np.asarray(u), np.asarray(v)
736
+ u_v = u - v
737
+ return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
738
+
739
+
740
+ def divergence(u, v):
741
+ """Calculate the divergence between two vectors.
742
+
743
+ Divergence equals squared Clark distance multiplied by 2.
744
+
745
+ Parameters
746
+ ----------
747
+ - u, v: Input vectors between which the distance is to be calculated.
748
+
749
+ Returns
750
+ -------
751
+ - The divergence between the two vectors.
752
+
753
+ References
754
+ ----------
755
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
756
+ Measures between Probability Density Functions. International
757
+ Journal of Mathematical Models and Methods in Applied Sciences.
758
+ 1(4), 300-307.
759
+ """
760
+ u, v = np.asarray(u), np.asarray(v)
761
+ with np.errstate(invalid="ignore"):
762
+ return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
763
+
764
+
765
+ def google(u, v):
766
+ """Calculate the Normalized Google Distance (NGD) between two vectors.
767
+
768
+ NGD is a measure of similarity derived from the number of hits returned by the
769
+ Google search engine for a given set of keywords.
770
+
771
+ Parameters
772
+ ----------
773
+ - u, v: Input vectors between which the distance is to be calculated.
774
+
775
+ Returns
776
+ -------
777
+ - The Normalized Google Distance between the two vectors.
778
+
779
+ Notes
780
+ -----
781
+ When used for comparing two probability density functions (pdfs),
782
+ Google distance equals half of Cityblock distance.
783
+
784
+ References
785
+ ----------
786
+ 1. Lee & Rashid (2008) Information Technology, ITSim 2008.
787
+ doi:10.1109/ITSIM.2008.4631601.
788
+ """
789
+ u, v = np.asarray(u), np.asarray(v)
790
+ x = float(np.sum(u))
791
+ y = float(np.sum(v))
792
+ summin = float(np.sum(np.minimum(u, v)))
793
+ return (max([x, y]) - summin) / ((x + y) - min([x, y]))
794
+
795
+
796
+ def gower(u, v):
797
+ """Calculate the Gower distance between two vectors.
798
+
799
+ The Gower distance equals the Cityblock distance divided by the vector length.
800
+
801
+ Parameters
802
+ ----------
803
+ - u, v: Input vectors between which the distance is to be calculated.
804
+
805
+ Returns
806
+ -------
807
+ - The Gower distance between the two vectors.
808
+
809
+ References
810
+ ----------
811
+ 1. Gower JC. (1971) General Coefficient of Similarity
812
+ and Some of Its Properties, Biometrics 27, 857-874.
813
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
814
+ Measures between Probability Density Functions. International
815
+ Journal of Mathematical Models and Methods in Applied Sciences.
816
+ 1(4), 300-307.
817
+ """
818
+ u, v = np.asarray(u), np.asarray(v)
819
+ return np.sum(np.abs(u - v)) / u.size
820
+
821
+
822
+ def jeffreys(u, v):
823
+ """Calculate the Jeffreys divergence between two vectors.
824
+
825
+ The Jeffreys divergence is a symmetric version of the Kullback-Leibler
826
+ divergence.
827
+
828
+ Parameters
829
+ ----------
830
+ - u, v: Input vectors between which the divergence is to be calculated.
831
+
832
+ Returns
833
+ -------
834
+ - The Jeffreys divergence between the two vectors.
835
+
836
+ References
837
+ ----------
838
+ 1. Jeffreys H (1946) An Invariant Form for the Prior Probability
839
+ in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
840
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
841
+ Measures between Probability Density Functions. International
842
+ Journal of Mathematical Models and Methods in Applied Sciences.
843
+ 1(4), 300-307.
844
+ """
845
+ u, v = np.asarray(u), np.asarray(v)
846
+ # Add epsilon to zeros in vectors to avoid division
847
+ # by 0 and/or log of 0. Alternatively, zeros in the
848
+ # vectors could be ignored or masked (see below).
849
+ # u = ma.masked_where(u == 0, u)
850
+ # v = ma.masked_where(v == 0, u)
851
+ with np.errstate(divide="ignore", invalid="ignore"):
852
+ u[u == 0] = EPSILON
853
+ v[v == 0] = EPSILON
854
+ # Clip negative values to zero for valid log
855
+ udivv = np.clip(u / v, a_min=EPSILON, a_max=None)
856
+ return np.sum((u - v) * np.log(udivv))
857
+
858
+
859
+ def jensenshannon_divergence(u, v):
860
+ """Calculate the Jensen-Shannon divergence between two vectors.
861
+
862
+ The Jensen-Shannon divergence is a symmetric and finite measure of similarity
863
+ between two probability distributions.
864
+
865
+ Parameters
866
+ ----------
867
+ - u, v: Input vectors between which the divergence is to be calculated.
868
+
869
+ Returns
870
+ -------
871
+ - The Jensen-Shannon divergence between the two vectors.
872
+
873
+ References
874
+ ----------
875
+ 1. Lin J. (1991) Divergence measures based on the Shannon entropy.
876
+ IEEE Transactions on Information Theory, 37(1):145–151.
877
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
878
+ Measures between Probability Density Functions. International
879
+ Journal of Mathematical Models and Methods in Applied Sciences.
880
+ 1(4), 300-307.
881
+ Comments:
882
+ Equals Jensen difference in Sung-Hyuk (2007):
883
+ u = np.where(u==0, EPSILON, u)
884
+ v = np.where(v==0, EPSILON, v)
885
+ el1 = (u * np.log(u) + v * np.log(v)) / 2
886
+ el2 = (u + v)/2
887
+ el3 = np.log(el2)
888
+ return np.sum(el1 - el2 * el3)
889
+ """
890
+ u, v = np.asarray(u), np.asarray(v)
891
+ with np.errstate(divide="ignore", invalid="ignore"):
892
+ # Clip negative values to zero for valid log
893
+ u[u == 0] = EPSILON
894
+ v[v == 0] = EPSILON
895
+
896
+ term1 = np.clip(2 * u / (u + v), a_min=EPSILON, a_max=None)
897
+ term2 = np.clip(2 * v / (u + v), a_min=EPSILON, a_max=None)
898
+
899
+ dl = u * np.log(term1)
900
+ dr = v * np.log(term2)
901
+ return (np.sum(dl) + np.sum(dr)) / 2
902
+
903
+
904
+ def jensen_difference(u, v):
905
+ """Calculate the Jensen difference between two vectors.
906
+
907
+ The Jensen difference is considered similar to the Jensen-Shannon divergence.
908
+
909
+ Parameters
910
+ ----------
911
+ - u, v: Input vectors between which the distance is to be calculated.
912
+
913
+ Returns
914
+ -------
915
+ - The Jensen difference between the two vectors.
916
+
917
+ Notes
918
+ -----
919
+ 1. Equals half of Topsøe distance
920
+ 2. Equals squared jensenshannon_distance.
921
+
922
+
923
+ References
924
+ ----------
925
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
926
+ Measures between Probability Density Functions. International
927
+ Journal of Mathematical Models and Methods in Applied Sciences.
928
+ 1(4), 300-307.
929
+ """
930
+ u, v = np.asarray(u), np.asarray(v)
931
+
932
+ with np.errstate(divide="ignore", invalid="ignore"):
933
+ # Clip negative values to eps for valid log
934
+ u = np.clip(u, EPSILON, None)
935
+ v = np.clip(v, EPSILON, None)
936
+ el1 = (u * np.log(u) + v * np.log(v)) / 2
937
+ el2 = np.clip((u + v) / 2, a_min=EPSILON, a_max=None)
938
+ return np.sum(el1 - el2 * np.log(el2))
939
+
940
+
941
+ def kumarjohnson(u, v):
942
+ """Calculate the Kumar-Johnson distance between two vectors.
943
+
944
+ Parameters
945
+ ----------
946
+ - u, v: Input vectors between which the distance is to be calculated.
947
+
948
+ Returns
949
+ -------
950
+ - The Kumar-Johnson distance between the two vectors.
951
+
952
+ References
953
+ ----------
954
+ 1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
955
+ and information inequalities, Journal of Inequalities in pure
956
+ and applied Mathematics. 6(3).
957
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
958
+ Measures between Probability Density Functions. International
959
+ Journal of Mathematical Models and Methods in Applied Sciences.
960
+ 1(4):300-307.
961
+ """
962
+ u, v = np.asarray(u), np.asarray(v)
963
+ uvmult = u * v
964
+ with np.errstate(divide="ignore", invalid="ignore"):
965
+ numer = np.power(u**2 - v**2, 2)
966
+ denom = 2 * np.power(uvmult, 3 / 2)
967
+ return np.sum(np.where(uvmult != 0, numer / denom, 0))
968
+
969
+
970
+ def matusita(u, v):
971
+ """Calculate the Matusita distance between two vectors.
972
+
973
+ Parameters
974
+ ----------
975
+ - u, v: Input vectors between which the distance is to be calculated.
976
+
977
+ Returns
978
+ -------
979
+ - The Matusita distance between the two vectors.
980
+
981
+ References
982
+ ----------
983
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
984
+ Measures between Probability Density Functions. International
985
+ Journal of Mathematical Models and Methods in Applied Sciences.
986
+ 1(4):300-307.
987
+
988
+ Notes
989
+ -----
990
+ Equals square root of Squared-chord distance.
991
+ """
992
+ u, v = np.asarray(u), np.asarray(v)
993
+ with np.errstate(divide="ignore", invalid="ignore"):
994
+ return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
995
+
996
+
997
+ def minkowski(u, v, p=2):
998
+ """Calculate the Minkowski distance between two vectors.
999
+
1000
+ Parameters
1001
+ ----------
1002
+ - u, v: Input vectors between which the distance is to be calculated.
1003
+ - p: The order of the norm of the difference.
1004
+
1005
+ Returns
1006
+ -------
1007
+ - The Minkowski distance between the two vectors.
1008
+
1009
+ Notes
1010
+ -----
1011
+ When p goes to infinite, the Chebyshev distance is derived.
1012
+
1013
+ References
1014
+ ----------
1015
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1016
+ Measures between Probability Density Functions. International
1017
+ Journal of Mathematical Models and Methods in Applied Sciences.
1018
+ 1(4):300-307.
1019
+ """
1020
+ u, v = np.asarray(u), np.asarray(v)
1021
+ return np.linalg.norm(u - v, ord=p)
1022
+
1023
+
1024
+ def penroseshape(u, v):
1025
+ """Calculate the Penrose shape distance between two vectors.
1026
+
1027
+ Parameters
1028
+ ----------
1029
+ - u, v: Input vectors between which the distance is to be calculated.
1030
+
1031
+ Returns
1032
+ -------
1033
+ - The Penrose shape distance between the two vectors.
1034
+
1035
+ References
1036
+ ----------
1037
+ 1. Deza M, Deza E (2009) Encyclopedia of Distances.
1038
+ Springer-Verlag Berlin Heidelberg. 1-590.
1039
+ """
1040
+ u, v = np.asarray(u), np.asarray(v)
1041
+ umu = np.mean(u)
1042
+ vmu = np.mean(v)
1043
+ with np.errstate(divide="ignore", invalid="ignore"):
1044
+ return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
1045
+
1046
+
1047
+ def prob_chisq(u, v):
1048
+ """Calculate the Probabilistic chi-square distance between two vectors.
1049
+
1050
+ Parameters
1051
+ ----------
1052
+ - u, v: Input vectors between which the distance is to be calculated.
1053
+
1054
+ Returns
1055
+ -------
1056
+ - The Probabilistic chi-square distance between the two vectors.
1057
+
1058
+ Notes
1059
+ -----
1060
+ Added by SC.
1061
+ """
1062
+ u, v = np.asarray(u), np.asarray(v)
1063
+ uvsum = u + v
1064
+ with np.errstate(divide="ignore", invalid="ignore"):
1065
+ return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1066
+
1067
+
1068
+ def ruzicka(u, v):
1069
+ """Calculate the Ruzicka distance between two vectors.
1070
+
1071
+ Parameters
1072
+ ----------
1073
+ - u, v: Input vectors between which the distance is to be calculated.
1074
+
1075
+ Returns
1076
+ -------
1077
+ - The Ruzicka distance between the two vectors.
1078
+
1079
+ Notes
1080
+ -----
1081
+ Added by SC.
1082
+ """
1083
+ u, v = np.asarray(u), np.asarray(v)
1084
+ den = np.sum(np.maximum(u, v))
1085
+
1086
+ return 1 - np.sum(np.minimum(u, v)) / den
1087
+
1088
+
1089
+ def sorensen(u, v):
1090
+ """Calculate the Sorensen distance between two vectors.
1091
+
1092
+ Parameters
1093
+ ----------
1094
+ - u, v: Input vectors between which the distance is to be calculated.
1095
+
1096
+ Returns
1097
+ -------
1098
+ - The Sorensen distance between the two vectors.
1099
+
1100
+ Notes
1101
+ -----
1102
+ The Sorensen distance equals the Manhattan distance divided by the sum of
1103
+ the two vectors.
1104
+
1105
+ Added by SC.
1106
+ """
1107
+ u, v = np.asarray(u), np.asarray(v)
1108
+ return np.sum(np.abs(u - v)) / np.sum(u + v)
1109
+
1110
+
1111
+ def squared_chisq(u, v):
1112
+ """Calculate the Squared chi-square distance between two vectors.
1113
+
1114
+ Parameters
1115
+ ----------
1116
+ - u, v: Input vectors between which the distance is to be calculated.
1117
+
1118
+ Returns
1119
+ -------
1120
+ - The Squared chi-square distance between the two vectors.
1121
+
1122
+ References
1123
+ ----------
1124
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1125
+ Measures between Probability Density Functions. International
1126
+ Journal of Mathematical Models and Methods in Applied Sciences.
1127
+ 1(4), 300-307.
1128
+ """
1129
+ u, v = np.asarray(u), np.asarray(v)
1130
+ uvsum = u + v
1131
+ with np.errstate(divide="ignore", invalid="ignore"):
1132
+ return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1133
+
1134
+
1135
+ def squaredchord(u, v):
1136
+ """Calculate the Squared-chord distance between two vectors.
1137
+
1138
+ Parameters
1139
+ ----------
1140
+ - u, v: Input vectors between which the distance is to be calculated.
1141
+
1142
+ Returns
1143
+ -------
1144
+ - The Squared-chord distance between the two vectors.
1145
+
1146
+ References
1147
+ ----------
1148
+ 1. Gavin DG et al. (2003) A statistical approach to evaluating
1149
+ distance metrics and analog assignments for pollen records.
1150
+ Quaternary Research 60:356–367.
1151
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1152
+ Measures between Probability Density Functions. International
1153
+ Journal of Mathematical Models and Methods in Applied Sciences.
1154
+ 1(4), 300-307.
1155
+
1156
+ Notes
1157
+ -----
1158
+ Equals to squared Matusita distance.
1159
+ """
1160
+ u, v = np.asarray(u), np.asarray(v)
1161
+ with np.errstate(divide="ignore", invalid="ignore"):
1162
+ return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
1163
+
1164
+
1165
+ def squared_euclidean(u, v):
1166
+ """Calculate the Squared Euclidean distance between two vectors.
1167
+
1168
+ Parameters
1169
+ ----------
1170
+ - u, v: Input vectors between which the distance is to be calculated.
1171
+
1172
+ Returns
1173
+ -------
1174
+ - The Squared Euclidean distance between the two vectors.
1175
+
1176
+ References
1177
+ ----------
1178
+ 1. Gavin DG et al. (2003) A statistical approach to evaluating
1179
+ distance metrics and analog assignments for pollen records.
1180
+ Quaternary Research 60:356–367.
1181
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1182
+ Measures between Probability Density Functions. International
1183
+ Journal of Mathematical Models and Methods in Applied Sciences.
1184
+ 1(4), 300-307.
1185
+
1186
+ Notes
1187
+ -----
1188
+ Equals to squared Euclidean distance.
1189
+ """
1190
+ u, v = np.asarray(u), np.asarray(v)
1191
+ return np.dot((u - v), (u - v))
1192
+
1193
+
1194
+ def taneja(u, v):
1195
+ """Calculate the Taneja distance between two vectors.
1196
+
1197
+ Parameters
1198
+ ----------
1199
+ - u, v: Input vectors between which the distance is to be calculated.
1200
+
1201
+ Returns
1202
+ -------
1203
+ - The Taneja distance between the two vectors.
1204
+
1205
+ References
1206
+ ----------
1207
+ 1. Taneja IJ. (1995), New Developments in Generalized Information
1208
+ Measures, Chapter in: Advances in Imaging and Electron Physics,
1209
+ Ed. P.W. Hawkes, 91, 37-135.
1210
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1211
+ Measures between Probability Density Functions. International
1212
+ Journal of Mathematical Models and Methods in Applied Sciences.
1213
+ 1(4), 300-307.
1214
+ """
1215
+ u, v = np.asarray(u), np.asarray(v)
1216
+ with np.errstate(divide="ignore", invalid="ignore"):
1217
+ u[u == 0] = EPSILON
1218
+ v[v == 0] = EPSILON
1132
1219
  uvsum = u + v
1133
- with np.errstate(divide="ignore", invalid="ignore"):
1134
- return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1135
-
1136
- def squaredchord(self, u, v):
1137
- """Calculate the Squared-chord distance between two vectors.
1138
-
1139
- Parameters
1140
- ----------
1141
- - u, v: Input vectors between which the distance is to be calculated.
1142
-
1143
- Returns
1144
- -------
1145
- - The Squared-chord distance between the two vectors.
1146
-
1147
- References
1148
- ----------
1149
- 1. Gavin DG et al. (2003) A statistical approach to evaluating
1150
- distance metrics and analog assignments for pollen records.
1151
- Quaternary Research 60:356–367.
1152
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1153
- Measures between Probability Density Functions. International
1154
- Journal of Mathematical Models and Methods in Applied Sciences.
1155
- 1(4), 300-307.
1156
-
1157
- Notes
1158
- -----
1159
- Equals to squared Matusita distance.
1160
- """
1161
- u, v = np.asarray(u), np.asarray(v)
1162
- with np.errstate(divide="ignore", invalid="ignore"):
1163
- return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
1164
-
1165
- def squared_euclidean(self, u, v):
1166
- """Calculate the Squared Euclidean distance between two vectors.
1167
-
1168
- Parameters
1169
- ----------
1170
- - u, v: Input vectors between which the distance is to be calculated.
1171
-
1172
- Returns
1173
- -------
1174
- - The Squared Euclidean distance between the two vectors.
1175
-
1176
- References
1177
- ----------
1178
- 1. Gavin DG et al. (2003) A statistical approach to evaluating
1179
- distance metrics and analog assignments for pollen records.
1180
- Quaternary Research 60:356–367.
1181
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1182
- Measures between Probability Density Functions. International
1183
- Journal of Mathematical Models and Methods in Applied Sciences.
1184
- 1(4), 300-307.
1185
-
1186
- Notes
1187
- -----
1188
- Equals to squared Euclidean distance.
1189
- """
1190
- u, v = np.asarray(u), np.asarray(v)
1191
- return np.dot((u - v), (u - v))
1192
-
1193
- def taneja(self, u, v):
1194
- """Calculate the Taneja distance between two vectors.
1195
-
1196
- Parameters
1197
- ----------
1198
- - u, v: Input vectors between which the distance is to be calculated.
1199
-
1200
- Returns
1201
- -------
1202
- - The Taneja distance between the two vectors.
1203
-
1204
- References
1205
- ----------
1206
- 1. Taneja IJ. (1995), New Developments in Generalized Information
1207
- Measures, Chapter in: Advances in Imaging and Electron Physics,
1208
- Ed. P.W. Hawkes, 91, 37-135.
1209
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1210
- Measures between Probability Density Functions. International
1211
- Journal of Mathematical Models and Methods in Applied Sciences.
1212
- 1(4), 300-307.
1213
- """
1214
- u, v = np.asarray(u), np.asarray(v)
1215
- with np.errstate(divide="ignore", invalid="ignore"):
1216
- u[u == 0] = self.epsilon
1217
- v[v == 0] = self.epsilon
1218
- uvsum = u + v
1219
- logarg = np.clip(
1220
- uvsum / (2 * np.sqrt(u * v)), a_min=self.epsilon, a_max=None
1221
- )
1222
- return np.sum((uvsum / 2) * np.log(logarg))
1223
-
1224
- def tanimoto(self, u, v):
1225
- """Calculate the Tanimoto distance between two vectors.
1226
-
1227
- Parameters
1228
- ----------
1229
- - u, v: Input vectors between which the distance is to be calculated.
1230
-
1231
- Returns
1232
- -------
1233
- - The Tanimoto distance between the two vectors.
1234
-
1235
- References
1236
- ----------
1237
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1238
- Measures between Probability Density Functions. International
1239
- Journal of Mathematical Models and Methods in Applied Sciences.
1240
- 1(4), 300-307.
1241
-
1242
- Notes
1243
- -----
1244
- Equals Soergel distance.
1245
- """
1246
- u, v = np.asarray(u), np.asarray(v)
1247
- # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
1248
- usum = np.sum(u)
1249
- vsum = np.sum(v)
1250
- minsum = np.sum(np.minimum(u, v))
1251
- return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
1252
-
1253
- def topsoe(self, u, v):
1254
- """Calculate the Topsøe distance between two vectors.
1255
-
1256
- Parameters
1257
- ----------
1258
- - u, v: Input vectors between which the distance is to be calculated.
1259
-
1260
- Returns
1261
- -------
1262
- - The Topsøe distance between the two vectors.
1263
-
1264
- References
1265
- ----------
1266
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1267
- Measures between Probability Density Functions. International
1268
- Journal of Mathematical Models and Methods in Applied Sciences.
1269
- 1(4), 300-307.
1270
-
1271
- Notes
1272
- -----
1273
- Equals two times Jensen-Shannon divergence.
1274
- """
1275
- u, v = np.asarray(u), np.asarray(v)
1276
- with np.errstate(divide="ignore", invalid="ignore"):
1277
- u[u == 0] = self.epsilon
1278
- v[v == 0] = self.epsilon
1279
- logarg1 = np.clip(2 * u / (u + v), a_min=self.epsilon, a_max=None)
1280
- logarg2 = np.clip(2 * v / (u + v), a_min=self.epsilon, a_max=None)
1281
- dl = u * np.log(logarg1)
1282
- dr = v * np.log(logarg2)
1283
- return np.sum(dl + dr)
1284
-
1285
- def vicis_symmetric_chisq(self, u, v):
1286
- """Calculate the Vicis Symmetric chi-square distance.
1287
-
1288
- Parameters
1289
- ----------
1290
- - u, v: Input vectors between which the distance is to be calculated.
1291
-
1292
- Returns
1293
- -------
1294
- - The Vicis Symmetric chi-square distance between the two vectors.
1295
-
1296
- References
1297
- ----------
1298
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1299
- Measures between Probability Density Functions. International
1300
- Journal of Mathematical Models and Methods in Applied Sciences.
1301
- 1(4), 300-307
1302
- """
1303
- u, v = np.asarray(u), np.asarray(v)
1304
- with np.errstate(divide="ignore", invalid="ignore"):
1305
- u_v = (u - v) ** 2
1306
- uvmin = np.minimum(u, v) ** 2
1307
- return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1308
-
1309
- def vicis_wave_hedges(self, u, v):
1310
- """Calculate the Vicis-Wave Hedges distance between two vectors.
1311
-
1312
- Parameters
1313
- ----------
1314
- - u, v: Input vectors between which the distance is to be calculated.
1315
-
1316
- Returns
1317
- -------
1318
- - The Vicis-Wave Hedges distance between the two vectors.
1319
-
1320
- References
1321
- ----------
1322
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1323
- Measures between Probability Density Functions. International
1324
- Journal of Mathematical Models and Methods in Applied Sciences.
1325
- 1(4), 300-307.
1326
- """
1327
- u, v = np.asarray(u), np.asarray(v)
1328
- with np.errstate(divide="ignore", invalid="ignore"):
1329
- u_v = abs(u - v)
1330
- uvmin = np.minimum(u, v)
1331
- return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1332
-
1333
- # def fidelity(self, u, v):
1334
- # """
1335
- # Calculate the fidelity distance between two vectors.
1336
-
1337
- # The fidelity distance measures the similarity between two probability
1338
- # distributions.
1339
-
1340
- # Parameters
1341
- # ----------
1342
- # - u, v: Input vectors between which the distance is to be calculated.
1343
-
1344
- # Returns
1345
- # -------
1346
- # - The fidelity distance between the two vectors.
1347
-
1348
- # Notes
1349
- # -----
1350
- # Added by SC.
1351
- # """
1352
- # u, v = np.asarray(u), np.asarray(v)
1353
- # return 1 - (np.sum(np.sqrt(u * v)))
1354
-
1355
- # # NEEDS CHECKING
1356
- # # def harmonicmean(self, u, v):
1357
- # # """
1358
- # # Harmonic mean distance.
1359
- # # Notes:
1360
- # # Added by SC.
1361
- # # """
1362
- # # u, v = np.asarray(u), np.asarray(v)
1363
- # # return 1 - 2.0 * np.sum(u * v / (u + v))
1364
-
1365
- # # def inner(self, u, v):
1366
- # # """
1367
- # # Calculate the inner product distance between two vectors.
1368
-
1369
- # # The inner product distance is a measure of
1370
- # # similarity between two vectors,
1371
- # # based on their inner product.
1372
-
1373
- # # Parameters
1374
- # # ----------
1375
- # # - u, v: Input vectors between which the distance is to be calculated.
1376
-
1377
- # # Returns
1378
- # # -------
1379
- # # - The inner product distance between the two vectors.
1380
-
1381
- # # Notes
1382
- # # -----
1383
- # # Added by SC.
1384
- # # """
1385
- # # u, v = np.asarray(u), np.asarray(v)
1386
- # # return 1 - np.dot(u, v)
1387
-
1388
- # def k_divergence(self, u, v):
1389
- # """Calculate the K divergence between two vectors.
1390
-
1391
- # Parameters
1392
- # ----------
1393
- # - u, v: Input vectors between which the divergence is to be calculated.
1394
-
1395
- # Returns
1396
- # -------
1397
- # - The K divergence between the two vectors.
1398
-
1399
- # References
1400
- # ----------
1401
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1402
- # Measures between Probability Density Functions. International
1403
- # Journal of Mathematical Models and Methods in Applied Sciences.
1404
- # 1(4), 300-307.
1405
- # """
1406
- # u, v = np.asarray(u), np.asarray(v)
1407
- # u[u == 0] = self.epsilon
1408
- # v[v == 0] = self.epsilon
1409
- # with np.errstate(divide="ignore", invalid="ignore"):
1410
- # return np.sum(u * np.log(2 * u / (u + v)))
1411
-
1412
- # def kl_divergence(self, u, v):
1413
- # """Calculate the Kullback-Leibler divergence between two vectors.
1414
-
1415
- # The Kullback-Leibler divergence measures the difference between two
1416
- # probability distributions.
1417
-
1418
- # Parameters
1419
- # ----------
1420
- # - u, v: Input vectors between which the divergence is to be calculated.
1421
-
1422
- # Returns
1423
- # -------
1424
- # - The Kullback-Leibler divergence between the two vectors.
1425
-
1426
- # References
1427
- # ----------
1428
- # 1. Kullback S, Leibler RA (1951) On information and sufficiency.
1429
- # Ann. Math. Statist. 22:79–86
1430
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1431
- # Measures between Probability Density Functions. International
1432
- # Journal of Mathematical Models and Methods in Applied Sciences.
1433
- # 1(4):300-307.
1434
- # """
1435
- # u, v = np.asarray(u), np.asarray(v)
1436
- # u[u == 0] = self.epsilon
1437
- # v[v == 0] = self.epsilon
1438
- # with np.errstate(divide="ignore", invalid="ignore"):
1439
- # return np.sum(u * np.log(u / v))
1440
-
1441
- # def max_symmetric_chisq(self, u, v):
1442
- # """Calculate the maximum symmetric chi-square distance.
1443
-
1444
- # Parameters
1445
- # ----------
1446
- # - u, v: Input vectors between which the distance is to be calculated.
1447
-
1448
- # Returns
1449
- # -------
1450
- # - The maximum symmetric chi-square distance between the two vectors.
1451
-
1452
- # References
1453
- # ----------
1454
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1455
- # Measures between Probability Density Functions. International
1456
- # Journal of Mathematical Models and Methods in Applied Sciences.
1457
- # 1(4):300-307.
1458
- # """
1459
- # u, v = np.asarray(u), np.asarray(v)
1460
- # return max(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1461
-
1462
- # def min_symmetric_chisq(self, u, v):
1463
- # """Calculate the minimum symmetric chi-square distance.
1464
-
1465
- # Parameters
1466
- # ----------
1467
- # - u, v: Input vectors between which the distance is to be calculated.
1468
-
1469
- # Returns
1470
- # -------
1471
- # - The minimum symmetric chi-square distance between the two vectors.
1472
-
1473
- # Notes
1474
- # -----
1475
- # Added by SC.
1476
- # """
1477
- # u, v = np.asarray(u), np.asarray(v)
1478
- # return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1479
-
1480
- # def neyman_chisq(self, u, v):
1481
- # """Calculate the Neyman chi-square distance between two vectors.
1482
-
1483
- # Parameters
1484
- # ----------
1485
- # - u, v: Input vectors between which the distance is to be calculated.
1486
-
1487
- # Returns
1488
- # -------
1489
- # - The Neyman chi-square distance between the two vectors.
1490
-
1491
- # References
1492
- # ----------
1493
- # 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
1494
- # In Proceedings of the First Berkley Symposium on Mathematical
1495
- # Statistics and Probability.
1496
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1497
- # Measures between Probability Density Functions. International
1498
- # Journal of Mathematical Models and Methods in Applied Sciences.
1499
- # 1(4), 300-307.
1500
- # """
1501
- # u, v = np.asarray(u), np.asarray(v)
1502
- # with np.errstate(divide="ignore", invalid="ignore"):
1503
- # return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
1504
-
1505
- # def pearson_chisq(self, u, v):
1506
- # """Calculate the Pearson chi-square divergence between two vectors.
1507
-
1508
- # Parameters
1509
- # ----------
1510
- # - u, v: Input vectors between which the divergence is to be calculated.
1511
-
1512
- # Returns
1513
- # -------
1514
- # - The Pearson chi-square divergence between the two vectors.
1515
-
1516
- # References
1517
- # ----------
1518
- # 1. Pearson K. (1900) On the Criterion that a given system of
1519
- # deviations from the probable in the case of correlated system
1520
- # of variables is such that it can be reasonable supposed to have
1521
- # arisen from random sampling, Phil. Mag. 50, 157-172.
1522
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1523
- # Measures between Probability Density Functions. International
1524
- # Journal of Mathematical Models and Methods in Applied Sciences.
1525
- # 1(4), 300-307.
1526
-
1527
- # Notes
1528
- # -----
1529
- # Pearson chi-square divergence is asymmetric.
1530
- # """
1531
- # u, v = np.asarray(u), np.asarray(v)
1532
- # with np.errstate(divide="ignore", invalid="ignore"):
1533
- # return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
1534
-
1535
- # def nonintersection(self, u, v):
1536
- # """
1537
- # Calculate the Nonintersection distance between two vectors.
1538
-
1539
- # Parameters
1540
- # ----------
1541
- # - u, v: Input vectors between which the distance is to be calculated.
1542
-
1543
- # Returns
1544
- # -------
1545
- # - The Nonintersection distance between the two vectors.
1546
-
1547
- # References
1548
- # ----------
1549
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1550
- # Measures between Probability Density Functions. International
1551
- # Journal of Mathematical Models and Methods in Applied Sciences.
1552
- # 1(4), 300-307.
1553
-
1554
- # Notes
1555
- # -----
1556
- # When used for comparing two probability density functions (pdfs),
1557
- # Nonintersection distance equals half of Cityblock distance.
1558
- # """
1559
- # u, v = np.asarray(u), np.asarray(v)
1560
- # return 1 - np.sum(np.minimum(u, v))
1220
+ logarg = np.clip(uvsum / (2 * np.sqrt(u * v)), a_min=EPSILON, a_max=None)
1221
+ return np.sum((uvsum / 2) * np.log(logarg))
1222
+
1223
+
1224
+ def tanimoto(u, v):
1225
+ """Calculate the Tanimoto distance between two vectors.
1226
+
1227
+ Parameters
1228
+ ----------
1229
+ - u, v: Input vectors between which the distance is to be calculated.
1230
+
1231
+ Returns
1232
+ -------
1233
+ - The Tanimoto distance between the two vectors.
1234
+
1235
+ References
1236
+ ----------
1237
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1238
+ Measures between Probability Density Functions. International
1239
+ Journal of Mathematical Models and Methods in Applied Sciences.
1240
+ 1(4), 300-307.
1241
+
1242
+ Notes
1243
+ -----
1244
+ Equals Soergel distance.
1245
+ """
1246
+ u, v = np.asarray(u), np.asarray(v)
1247
+ # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
1248
+ usum = np.sum(u)
1249
+ vsum = np.sum(v)
1250
+ minsum = np.sum(np.minimum(u, v))
1251
+ return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
1252
+
1253
+
1254
+ def topsoe(u, v):
1255
+ """Calculate the Topsøe distance between two vectors.
1256
+
1257
+ Parameters
1258
+ ----------
1259
+ - u, v: Input vectors between which the distance is to be calculated.
1260
+
1261
+ Returns
1262
+ -------
1263
+ - The Topsøe distance between the two vectors.
1264
+
1265
+ References
1266
+ ----------
1267
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1268
+ Measures between Probability Density Functions. International
1269
+ Journal of Mathematical Models and Methods in Applied Sciences.
1270
+ 1(4), 300-307.
1271
+
1272
+ Notes
1273
+ -----
1274
+ Equals two times Jensen-Shannon divergence.
1275
+ """
1276
+ u, v = np.asarray(u), np.asarray(v)
1277
+ with np.errstate(divide="ignore", invalid="ignore"):
1278
+ u[u == 0] = EPSILON
1279
+ v[v == 0] = EPSILON
1280
+ logarg1 = np.clip(2 * u / (u + v), a_min=EPSILON, a_max=None)
1281
+ logarg2 = np.clip(2 * v / (u + v), a_min=EPSILON, a_max=None)
1282
+ dl = u * np.log(logarg1)
1283
+ dr = v * np.log(logarg2)
1284
+ return np.sum(dl + dr)
1285
+
1286
+
1287
+ def vicis_symmetric_chisq(u, v):
1288
+ """Calculate the Vicis Symmetric chi-square distance.
1289
+
1290
+ Parameters
1291
+ ----------
1292
+ - u, v: Input vectors between which the distance is to be calculated.
1293
+
1294
+ Returns
1295
+ -------
1296
+ - The Vicis Symmetric chi-square distance between the two vectors.
1297
+
1298
+ References
1299
+ ----------
1300
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1301
+ Measures between Probability Density Functions. International
1302
+ Journal of Mathematical Models and Methods in Applied Sciences.
1303
+ 1(4), 300-307
1304
+ """
1305
+ u, v = np.asarray(u), np.asarray(v)
1306
+ with np.errstate(divide="ignore", invalid="ignore"):
1307
+ u_v = (u - v) ** 2
1308
+ uvmin = np.minimum(u, v) ** 2
1309
+ return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1310
+
1311
+
1312
+ def vicis_wave_hedges(u, v):
1313
+ """Calculate the Vicis-Wave Hedges distance between two vectors.
1314
+
1315
+ Parameters
1316
+ ----------
1317
+ - u, v: Input vectors between which the distance is to be calculated.
1318
+
1319
+ Returns
1320
+ -------
1321
+ - The Vicis-Wave Hedges distance between the two vectors.
1322
+
1323
+ References
1324
+ ----------
1325
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1326
+ Measures between Probability Density Functions. International
1327
+ Journal of Mathematical Models and Methods in Applied Sciences.
1328
+ 1(4), 300-307.
1329
+ """
1330
+ u, v = np.asarray(u), np.asarray(v)
1331
+ with np.errstate(divide="ignore", invalid="ignore"):
1332
+ u_v = abs(u - v)
1333
+ uvmin = np.minimum(u, v)
1334
+ return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1335
+
1336
+
1337
+ # def fidelity(u, v):
1338
+ # """
1339
+ # Calculate the fidelity distance between two vectors.
1340
+
1341
+ # The fidelity distance measures the similarity between two probability
1342
+ # distributions.
1343
+
1344
+ # Parameters
1345
+ # ----------
1346
+ # - u, v: Input vectors between which the distance is to be calculated.
1347
+
1348
+ # Returns
1349
+ # -------
1350
+ # - The fidelity distance between the two vectors.
1351
+
1352
+ # Notes
1353
+ # -----
1354
+ # Added by SC.
1355
+ # """
1356
+ # u, v = np.asarray(u), np.asarray(v)
1357
+ # return 1 - (np.sum(np.sqrt(u * v)))
1358
+
1359
+ # # NEEDS CHECKING
1360
+ # # def harmonicmean(u, v):
1361
+ # # """
1362
+ # # Harmonic mean distance.
1363
+ # # Notes:
1364
+ # # Added by SC.
1365
+ # # """
1366
+ # # u, v = np.asarray(u), np.asarray(v)
1367
+ # # return 1 - 2.0 * np.sum(u * v / (u + v))
1368
+
1369
+ # # def inner(u, v):
1370
+ # # """
1371
+ # # Calculate the inner product distance between two vectors.
1372
+
1373
+ # # The inner product distance is a measure of
1374
+ # # similarity between two vectors,
1375
+ # # based on their inner product.
1376
+
1377
+ # # Parameters
1378
+ # # ----------
1379
+ # # - u, v: Input vectors between which the distance is to be calculated.
1380
+
1381
+ # # Returns
1382
+ # # -------
1383
+ # # - The inner product distance between the two vectors.
1384
+
1385
+ # # Notes
1386
+ # # -----
1387
+ # # Added by SC.
1388
+ # # """
1389
+ # # u, v = np.asarray(u), np.asarray(v)
1390
+ # # return 1 - np.dot(u, v)
1391
+
1392
+ # def k_divergence(u, v):
1393
+ # """Calculate the K divergence between two vectors.
1394
+
1395
+ # Parameters
1396
+ # ----------
1397
+ # - u, v: Input vectors between which the divergence is to be calculated.
1398
+
1399
+ # Returns
1400
+ # -------
1401
+ # - The K divergence between the two vectors.
1402
+
1403
+ # References
1404
+ # ----------
1405
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1406
+ # Measures between Probability Density Functions. International
1407
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1408
+ # 1(4), 300-307.
1409
+ # """
1410
+ # u, v = np.asarray(u), np.asarray(v)
1411
+ # u[u == 0] = EPSILON
1412
+ # v[v == 0] = EPSILON
1413
+ # with np.errstate(divide="ignore", invalid="ignore"):
1414
+ # return np.sum(u * np.log(2 * u / (u + v)))
1415
+
1416
+ # def kl_divergence(u, v):
1417
+ # """Calculate the Kullback-Leibler divergence between two vectors.
1418
+
1419
+ # The Kullback-Leibler divergence measures the difference between two
1420
+ # probability distributions.
1421
+
1422
+ # Parameters
1423
+ # ----------
1424
+ # - u, v: Input vectors between which the divergence is to be calculated.
1425
+
1426
+ # Returns
1427
+ # -------
1428
+ # - The Kullback-Leibler divergence between the two vectors.
1429
+
1430
+ # References
1431
+ # ----------
1432
+ # 1. Kullback S, Leibler RA (1951) On information and sufficiency.
1433
+ # Ann. Math. Statist. 22:79–86
1434
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1435
+ # Measures between Probability Density Functions. International
1436
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1437
+ # 1(4):300-307.
1438
+ # """
1439
+ # u, v = np.asarray(u), np.asarray(v)
1440
+ # u[u == 0] = EPSILON
1441
+ # v[v == 0] = EPSILON
1442
+ # with np.errstate(divide="ignore", invalid="ignore"):
1443
+ # return np.sum(u * np.log(u / v))
1444
+
1445
+ # def max_symmetric_chisq(u, v):
1446
+ # """Calculate the maximum symmetric chi-square distance.
1447
+
1448
+ # Parameters
1449
+ # ----------
1450
+ # - u, v: Input vectors between which the distance is to be calculated.
1451
+
1452
+ # Returns
1453
+ # -------
1454
+ # - The maximum symmetric chi-square distance between the two vectors.
1455
+
1456
+ # References
1457
+ # ----------
1458
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1459
+ # Measures between Probability Density Functions. International
1460
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1461
+ # 1(4):300-307.
1462
+ # """
1463
+ # u, v = np.asarray(u), np.asarray(v)
1464
+ # return max(neyman_chisq(u, v), pearson_chisq(u, v))
1465
+
1466
+ # def min_symmetric_chisq(u, v):
1467
+ # """Calculate the minimum symmetric chi-square distance.
1468
+
1469
+ # Parameters
1470
+ # ----------
1471
+ # - u, v: Input vectors between which the distance is to be calculated.
1472
+
1473
+ # Returns
1474
+ # -------
1475
+ # - The minimum symmetric chi-square distance between the two vectors.
1476
+
1477
+ # Notes
1478
+ # -----
1479
+ # Added by SC.
1480
+ # """
1481
+ # u, v = np.asarray(u), np.asarray(v)
1482
+ # return min(neyman_chisq(u, v), pearson_chisq(u, v))
1483
+
1484
+ # def neyman_chisq(u, v):
1485
+ # """Calculate the Neyman chi-square distance between two vectors.
1486
+
1487
+ # Parameters
1488
+ # ----------
1489
+ # - u, v: Input vectors between which the distance is to be calculated.
1490
+
1491
+ # Returns
1492
+ # -------
1493
+ # - The Neyman chi-square distance between the two vectors.
1494
+
1495
+ # References
1496
+ # ----------
1497
+ # 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
1498
+ # In Proceedings of the First Berkley Symposium on Mathematical
1499
+ # Statistics and Probability.
1500
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1501
+ # Measures between Probability Density Functions. International
1502
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1503
+ # 1(4), 300-307.
1504
+ # """
1505
+ # u, v = np.asarray(u), np.asarray(v)
1506
+ # with np.errstate(divide="ignore", invalid="ignore"):
1507
+ # return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
1508
+
1509
+ # def pearson_chisq(u, v):
1510
+ # """Calculate the Pearson chi-square divergence between two vectors.
1511
+
1512
+ # Parameters
1513
+ # ----------
1514
+ # - u, v: Input vectors between which the divergence is to be calculated.
1515
+
1516
+ # Returns
1517
+ # -------
1518
+ # - The Pearson chi-square divergence between the two vectors.
1519
+
1520
+ # References
1521
+ # ----------
1522
+ # 1. Pearson K. (1900) On the Criterion that a given system of
1523
+ # deviations from the probable in the case of correlated system
1524
+ # of variables is such that it can be reasonable supposed to have
1525
+ # arisen from random sampling, Phil. Mag. 50, 157-172.
1526
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1527
+ # Measures between Probability Density Functions. International
1528
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1529
+ # 1(4), 300-307.
1530
+
1531
+ # Notes
1532
+ # -----
1533
+ # Pearson chi-square divergence is asymmetric.
1534
+ # """
1535
+ # u, v = np.asarray(u), np.asarray(v)
1536
+ # with np.errstate(divide="ignore", invalid="ignore"):
1537
+ # return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
1538
+
1539
+ # def nonintersection(u, v):
1540
+ # """
1541
+ # Calculate the Nonintersection distance between two vectors.
1542
+
1543
+ # Parameters
1544
+ # ----------
1545
+ # - u, v: Input vectors between which the distance is to be calculated.
1546
+
1547
+ # Returns
1548
+ # -------
1549
+ # - The Nonintersection distance between the two vectors.
1550
+
1551
+ # References
1552
+ # ----------
1553
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1554
+ # Measures between Probability Density Functions. International
1555
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1556
+ # 1(4), 300-307.
1557
+
1558
+ # Notes
1559
+ # -----
1560
+ # When used for comparing two probability density functions (pdfs),
1561
+ # Nonintersection distance equals half of Cityblock distance.
1562
+ # """
1563
+ # u, v = np.asarray(u), np.asarray(v)
1564
+ # return 1 - np.sum(np.minimum(u, v))