distclassipy 0.0.2__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
distclassipy/distances.py CHANGED
@@ -1,310 +1,439 @@
1
- # This code is based on the work of Andrzej Zielezinski, originally retrieved on 20 November 2022 from
2
- # https://github.com/aziele/statistical-distances/blob/04412b3155c59fc7238b3d8ecf6f3723ac5befff/distance.py
3
- #
4
- # It has been modified by Siddharth Chaini on 27 November 2022.
5
- #
6
- # Licensed GNU General Public License v3.0;
7
- # you may not use this file except in compliance with the License.
8
- # You may obtain a copy of the License at
9
- #
10
- # https://www.gnu.org/licenses/gpl-3.0.en.html
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS,
14
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- # See the License for the specific language governing permissions and
16
- # limitations under the License.
17
- #
18
- # Modifications by Siddharth Chaini include the addition of the following distance measures:
19
- # 1. Meehl distance
20
- # 2. Sorensen distance
21
- # 3. Ruzicka distance
22
- # 4. Inner product distance
23
- # 5. Harmonic mean distance
24
- # 6. Fidelity
25
- # 7. Minimimum Symmetric Chi Squared
26
- # 8. Probabilistic Symmetric Chi Squared
27
- #
28
- # In addition, the following code was added to all functions for array conversion:
29
- # u,v = np.asarray(u), np.asarray(v)
30
- #
31
- # Todos:
32
- # ALSO COMPARE RUNTIME OF THIS v/s custom v/s Tschopp
33
-
34
-
35
1
  """
36
- A variety of distance metrics to calculate the distance between two points.
2
+ A module providing a variety of distance metrics to calculate the distance between two points.
3
+
4
+ This module includes implementations of various distance metrics, including both common and less
5
+ common measures. It allows for the calculation of distances between data points in a vectorized
6
+ manner using numpy arrays.
7
+ This code is based on the work of Andrzej Zielezinski, originally retrieved on 20 November 2022 from
8
+ https://github.com/aziele/statistical-distances/blob/04412b3155c59fc7238b3d8ecf6f3723ac5befff/distance.py
9
+
10
+ It has been modified by Siddharth Chaini on 27 November 2022.
11
+
12
+ Licensed GNU General Public License v3.0;
13
+ you may not use this file except in compliance with the License.
14
+ You may obtain a copy of the License at
15
+
16
+ https://www.gnu.org/licenses/gpl-3.0.en.html
17
+
18
+ Unless required by applicable law or agreed to in writing, software
19
+ distributed under the License is distributed on an "AS IS" BASIS,
20
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
+ See the License for the specific language governing permissions and
22
+ limitations under the License.
23
+
24
+ Modifications by Siddharth Chaini include the addition of the following distance measures:
25
+ 1. Meehl distance
26
+ 2. Sorensen distance
27
+ 3. Ruzicka distance
28
+ 4. Inner product distance
29
+ 5. Harmonic mean distance
30
+ 6. Fidelity
31
+ 7. Minimimum Symmetric Chi Squared
32
+ 8. Probabilistic Symmetric Chi Squared
33
+
34
+ In addition, the following code was added to all functions for array conversion:
35
+ u,v = np.asarray(u), np.asarray(v)
36
+
37
+ Todos:
38
+ ALSO COMPARE RUNTIME OF THIS v/s custom v/s Tschopp
37
39
  """
38
40
 
39
41
  import numpy as np
40
42
 
43
+
41
44
  class Distance:
42
45
 
43
46
  def __init__(self, epsilon=None):
44
- self.epsilon = np.finfo(float).eps if not epsilon else epsilon
47
+ """
48
+ Initialize the Distance class with an optional epsilon value.
45
49
 
50
+ Parameters:
51
+ - epsilon: A small value to avoid division by zero errors.
52
+ """
53
+ self.epsilon = np.finfo(float).eps if not epsilon else epsilon
46
54
 
47
55
  def acc(self, u, v):
48
56
  """
49
- The average of Cityblock/Manhattan and Chebyshev distances.
50
- Synonyms:
51
- ACC distance
52
- Average distance
57
+ Calculate the average of Cityblock/Manhattan and Chebyshev distances.
58
+
59
+ This function computes the ACC distance, also known as the Average distance, between two
60
+ vectors u and v. It is the average of the Cityblock (or Manhattan) and Chebyshev distances.
61
+
62
+ Parameters:
63
+ - u, v: Input vectors between which the distance is to be calculated.
64
+
65
+ Returns:
66
+ - The ACC distance between the two vectors.
67
+
53
68
  References:
54
- 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
55
- Geometry. Dover Publications.
56
- 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
57
- Measures between Probability Density Functions. International
58
- Journal of Mathematical Models and Methods in Applied Sciences.
59
- vol. 1(4), pp. 300-307.
69
+ 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean Geometry. Dover Publications.
70
+ 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity Measures between Probability
71
+ Density Functions. International Journal of Mathematical Models and Methods in Applied Sciences.
72
+ vol. 1(4), pp. 300-307.
60
73
  """
61
- return (self.cityblock(u, v) + self.chebyshev(u, v))/2
62
-
74
+ return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
63
75
 
64
76
  def add_chisq(self, u, v):
65
77
  """
66
- Additive Symmetric Chi-square distance.
78
+ Compute the Additive Symmetric Chi-square distance between two vectors.
79
+
80
+ The Additive Symmetric Chi-square distance is a measure that can be used to compare two vectors.
81
+ This function calculates it based on the input vectors u and v.
82
+
83
+ Parameters:
84
+ - u, v: Input vectors between which the distance is to be calculated.
85
+
86
+ Returns:
87
+ - The Additive Symmetric Chi-square distance between the two vectors.
88
+
67
89
  References:
68
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
69
- Measures between Probability Density Functions. International
70
- Journal of Mathematical Models and Methods in Applied Sciences.
71
- vol. 1(4), pp. 300-307.
90
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity Measures between Probability
91
+ Density Functions. International Journal of Mathematical Models and Methods in Applied Sciences.
92
+ vol. 1(4), pp. 300-307.
72
93
  """
73
- u,v = np.asarray(u), np.asarray(v)
94
+ u, v = np.asarray(u), np.asarray(v)
74
95
  uvmult = u * v
75
- with np.errstate(divide='ignore', invalid="ignore"):
76
- return np.sum(np.where(uvmult != 0, ((u-v)**2 * (u+v))/uvmult, 0))
77
-
96
+ with np.errstate(divide="ignore", invalid="ignore"):
97
+ return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
78
98
 
79
99
  def bhattacharyya(self, u, v):
80
100
  """
81
- Bhattacharyya distance.
101
+ Calculate the Bhattacharyya distance between two vectors.
102
+
82
103
  Returns a distance value between 0 and 1.
83
- References:
84
- 1. Bhattacharyya A (1947) On a measure of divergence between two
85
- statistical populations defined by probability distributions,
86
- Bull. Calcutta Math. Soc., 35, 99–109.
87
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
88
- Measures between Probability Density Functions. International
89
- Journal of Mathematical Models and Methods in Applied Sciences.
90
- 1(4), 300-307.
91
- 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
92
- """
93
- u,v = np.asarray(u), np.asarray(v)
94
- return -np.log(np.sum(np.sqrt(u*v)))
95
104
 
105
+ Parameters:
106
+ - u, v: Input vectors between which the distance is to be calculated.
107
+
108
+ Returns:
109
+ - The Bhattacharyya distance between the two vectors.
110
+
111
+ References:
112
+ 1. Bhattacharyya A (1947) On a measure of divergence between two
113
+ statistical populations defined by probability distributions,
114
+ Bull. Calcutta Math. Soc., 35, 99–109.
115
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
116
+ Measures between Probability Density Functions. International
117
+ Journal of Mathematical Models and Methods in Applied Sciences.
118
+ 1(4), 300-307.
119
+ 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
120
+ """
121
+ u, v = np.asarray(u), np.asarray(v)
122
+ return -np.log(np.sum(np.sqrt(u * v)))
96
123
 
97
124
  def braycurtis(self, u, v):
98
125
  """
99
- Bray-Curtis distance.
100
- Synonyms:
101
- Sørensen distance
102
- Bray-Curtis dissimilarity
103
-
126
+ Calculate the Bray-Curtis distance between two vectors.
127
+
128
+ The Bray-Curtis distance is a measure of dissimilarity between two non-negative vectors,
129
+ often used in ecology to measure the compositional dissimilarity between two sites based on counts
130
+ of species at both sites. It is closely related to the Sørensen distance and is also known as
131
+ Bray-Curtis dissimilarity.
132
+
104
133
  Notes:
105
134
  When used for comparing two probability density functions (pdfs),
106
- Bray-Curtis distance equals Cityblock distance divided by 2.
135
+ the Bray-Curtis distance equals the Cityblock distance divided by 2.
136
+
137
+ Parameters:
138
+ - u, v: Input vectors between which the distance is to be calculated.
139
+
140
+ Returns:
141
+ - The Bray-Curtis distance between the two vectors.
142
+
107
143
  References:
108
144
  1. Bray JR, Curtis JT (1957) An ordination of the upland forest of
109
- the southern Winsconsin. Ecological Monographies, 27, 325-349.
110
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
145
+ southern Wisconsin. Ecological Monographs, 27, 325-349.
146
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
111
147
  Measures between Probability Density Functions. International
112
148
  Journal of Mathematical Models and Methods in Applied Sciences.
113
149
  1(4), 300-307.
114
150
  3. https://en.wikipedia.org/wiki/Bray–Curtis_dissimilarity
115
151
  """
116
- u,v = np.asarray(u), np.asarray(v)
152
+ u, v = np.asarray(u), np.asarray(v)
117
153
  return np.sum(np.abs(u - v)) / np.sum(np.abs(u + v))
118
154
 
119
-
120
155
  def canberra(self, u, v):
121
156
  """
122
- Canberra distance.
157
+ Calculate the Canberra distance between two vectors.
158
+
159
+ The Canberra distance is a weighted version of the Manhattan distance, used in numerical analysis.
160
+
123
161
  Notes:
124
162
  When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
125
163
  is used in the calculation.
164
+
165
+ Parameters:
166
+ - u, v: Input vectors between which the distance is to be calculated.
167
+
168
+ Returns:
169
+ - The Canberra distance between the two vectors.
170
+
126
171
  References:
127
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
172
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
128
173
  Measures between Probability Density Functions. International
129
174
  Journal of Mathematical Models and Methods in Applied Sciences.
130
175
  1(4), 300-307.
131
176
  """
132
- u,v = np.asarray(u), np.asarray(v)
133
- with np.errstate(invalid='ignore'):
177
+ u, v = np.asarray(u), np.asarray(v)
178
+ with np.errstate(invalid="ignore"):
134
179
  return np.nansum(np.abs(u - v) / (np.abs(u) + np.abs(v)))
135
180
 
136
-
137
181
  def chebyshev(self, u, v):
138
182
  """
139
- Chebyshev distance.
183
+ Calculate the Chebyshev distance between two vectors.
184
+
185
+ The Chebyshev distance is a metric defined on a vector space where the distance between two vectors
186
+ is the greatest of their differences along any coordinate dimension.
187
+
140
188
  Synonyms:
141
189
  Chessboard distance
142
190
  King-move metric
143
191
  Maximum value distance
144
192
  Minimax approximation
193
+
194
+ Parameters:
195
+ - u, v: Input vectors between which the distance is to be calculated.
196
+
197
+ Returns:
198
+ - The Chebyshev distance between the two vectors.
199
+
145
200
  References:
146
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
201
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
147
202
  Measures between Probability Density Functions. International
148
203
  Journal of Mathematical Models and Methods in Applied Sciences.
149
- 1(4), 300-307.
204
+ 1(4), 300-307.
150
205
  """
151
- u,v = np.asarray(u), np.asarray(v)
206
+ u, v = np.asarray(u), np.asarray(v)
152
207
  return np.amax(np.abs(u - v))
153
208
 
154
-
155
209
  def chebyshev_min(self, u, v):
156
210
  """
157
- Minimum value distance (my measure).
211
+ Calculate the minimum value distance between two vectors.
212
+
213
+ This measure represents a custom approach by Zielezinski to distance measurement, focusing on the minimum absolute difference.
214
+
215
+ Parameters:
216
+ - u, v: Input vectors between which the distance is to be calculated.
217
+
218
+ Returns:
219
+ - The minimum value distance between the two vectors.
158
220
  """
159
- u,v = np.asarray(u), np.asarray(v)
221
+ u, v = np.asarray(u), np.asarray(v)
160
222
  return np.amin(np.abs(u - v))
161
223
 
162
-
163
224
  def clark(self, u, v):
164
225
  """
165
- Clark distance.
166
- Clark distance equals the squared root of half of the divergence.
226
+ Calculate the Clark distance between two vectors.
227
+
228
+ The Clark distance equals the square root of half of the divergence.
229
+
167
230
  Notes:
168
231
  When `u[i]` and `v[i]` are 0 for given i, then the fraction 0/0 = 0
169
232
  is used in the calculation.
233
+
234
+ Parameters:
235
+ - u, v: Input vectors between which the distance is to be calculated.
236
+
237
+ Returns:
238
+ - The Clark distance between the two vectors.
239
+
170
240
  References:
171
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
241
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
172
242
  Measures between Probability Density Functions. International
173
243
  Journal of Mathematical Models and Methods in Applied Sciences.
174
244
  1(4), 300-307.
175
245
  """
176
- u,v = np.asarray(u), np.asarray(v)
177
- with np.errstate(divide='ignore', invalid="ignore"):
178
- return np.sqrt(np.nansum(np.power(np.abs(u-v)/(u+v),2)))
179
-
246
+ u, v = np.asarray(u), np.asarray(v)
247
+ with np.errstate(divide="ignore", invalid="ignore"):
248
+ return np.sqrt(np.nansum(np.power(np.abs(u - v) / (u + v), 2)))
180
249
 
181
250
  def cosine(self, u, v):
182
251
  """
183
- Cosine distance.
252
+ Calculate the cosine distance between two vectors.
253
+
254
+ Parameters:
255
+ - u, v: Input vectors between which the distance is to be calculated.
256
+
257
+ Returns:
258
+ - The cosine distance between the two vectors.
259
+
184
260
  References:
185
261
  1. SciPy.
186
262
  """
187
- u,v = np.asarray(u), np.asarray(v)
188
- return 1 - np.dot(u, v)/(np.sqrt(np.dot(u, u))*np.sqrt(np.dot(v, v)))
189
-
263
+ u, v = np.asarray(u), np.asarray(v)
264
+ return 1 - np.dot(u, v) / (np.sqrt(np.dot(u, u)) * np.sqrt(np.dot(v, v)))
190
265
 
191
266
  def correlation_pearson(self, u, v):
192
267
  """
193
- Pearson correlation distance.
268
+ Calculate the Pearson correlation distance between two vectors.
269
+
194
270
  Returns a distance value between 0 and 2.
271
+
272
+ Parameters:
273
+ - u, v: Input vectors between which the distance is to be calculated.
274
+
275
+ Returns:
276
+ - The Pearson correlation distance between the two vectors.
195
277
  """
196
- u,v = np.asarray(u), np.asarray(v)
278
+
279
+ u, v = np.asarray(u), np.asarray(v)
197
280
  r = np.ma.corrcoef(u, v)[0, 1]
198
281
  return 1.0 - r
199
282
 
200
-
201
283
  def czekanowski(self, u, v):
202
284
  """
203
- Czekanowski distance.
285
+ Calculate the Czekanowski distance between two vectors.
286
+
287
+ Parameters:
288
+ - u, v: Input vectors between which the distance is to be calculated.
289
+
290
+ Returns:
291
+ - The Czekanowski distance between the two vectors.
292
+
204
293
  References:
205
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
294
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
206
295
  Measures between Probability Density Functions. International
207
296
  Journal of Mathematical Models and Methods in Applied Sciences.
208
297
  1(4), 300-307.
209
298
  """
210
- u,v = np.asarray(u), np.asarray(v)
299
+ u, v = np.asarray(u), np.asarray(v)
211
300
  return np.sum(np.abs(u - v)) / np.sum(u + v)
212
301
 
213
-
214
302
  def dice(self, u, v):
215
303
  """
216
- Dice dissimilarity.
304
+ Calculate the Dice dissimilarity between two vectors.
305
+
217
306
  Synonyms:
218
307
  Sorensen distance
219
- Referemces:
308
+
309
+ Parameters:
310
+ - u, v: Input vectors between which the distance is to be calculated.
311
+
312
+ Returns:
313
+ - The Dice dissimilarity between the two vectors.
314
+
315
+ References:
220
316
  1. Dice LR (1945) Measures of the amount of ecologic association
221
317
  between species. Ecology. 26, 297-302.
222
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
318
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
223
319
  Measures between Probability Density Functions. International
224
320
  Journal of Mathematical Models and Methods in Applied Sciences.
225
321
  1(4), 300-307.
226
322
  """
227
- u,v = np.asarray(u), np.asarray(v)
323
+ u, v = np.asarray(u), np.asarray(v)
228
324
  u_v = u - v
229
325
  return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
230
326
 
231
-
232
327
  def divergence(self, u, v):
233
328
  """
234
- Divergence.
329
+ Calculate the divergence between two vectors.
330
+
235
331
  Divergence equals squared Clark distance multiplied by 2.
332
+
333
+ Parameters:
334
+ - u, v: Input vectors between which the distance is to be calculated.
335
+
336
+ Returns:
337
+ - The divergence between the two vectors.
338
+
236
339
  References:
237
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
340
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
238
341
  Measures between Probability Density Functions. International
239
342
  Journal of Mathematical Models and Methods in Applied Sciences.
240
343
  1(4), 300-307.
241
344
  """
242
- u,v = np.asarray(u), np.asarray(v)
345
+ u, v = np.asarray(u), np.asarray(v)
243
346
  with np.errstate(invalid="ignore"):
244
- return 2 * np.nansum(np.power(u-v,2) / np.power(u+v,2))
245
-
347
+ return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
246
348
 
247
349
  def euclidean(self, u, v):
248
350
  """
249
- Euclidean distance.
250
- Synonyms:
251
- Pythagorean metric
351
+ Calculate the Euclidean distance between two vectors.
352
+
353
+ The Euclidean distance is the "ordinary" straight-line distance between two points in Euclidean space.
354
+
355
+ Parameters:
356
+ - u, v: Input vectors between which the distance is to be calculated.
357
+
358
+ Returns:
359
+ - The Euclidean distance between the two vectors.
360
+
252
361
  References:
253
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
362
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
254
363
  Measures between Probability Density Functions. International
255
364
  Journal of Mathematical Models and Methods in Applied Sciences.
256
- 1(4), 300-307.
365
+ 1(4), 300-307.
257
366
  """
258
- u,v = np.asarray(u), np.asarray(v)
259
- return np.linalg.norm(u-v)
260
-
367
+ u, v = np.asarray(u), np.asarray(v)
368
+ return np.linalg.norm(u - v)
261
369
 
262
370
  def fidelity(self, u, v):
263
371
  """
264
- Fidelity distance.
372
+ Calculate the fidelity distance between two vectors.
373
+
374
+ The fidelity distance measures the similarity between two probability distributions.
375
+
376
+ Parameters:
377
+ - u, v: Input vectors between which the distance is to be calculated.
378
+
379
+ Returns:
380
+ - The fidelity distance between the two vectors.
381
+
265
382
  Notes:
266
383
  Added by SC.
267
384
  """
268
- u,v = np.asarray(u), np.asarray(v)
269
- return 1 - (np.sum(np.sqrt(u*v)))
270
-
385
+ u, v = np.asarray(u), np.asarray(v)
386
+ return 1 - (np.sum(np.sqrt(u * v)))
271
387
 
272
388
  def google(self, u, v):
273
389
  """
274
- Normalized Google Distance (NGD).
275
- Returns a distance value between 0 and 1. Two sequences are treated
276
- as two different web pages and the each word frequency represents
277
- terms found in each webpage.
390
+ Calculate the Normalized Google Distance (NGD) between two vectors.
391
+
392
+ NGD is a measure of similarity derived from the number of hits returned by the Google search engine for a given set of keywords.
393
+
394
+ Parameters:
395
+ - u, v: Input vectors between which the distance is to be calculated.
396
+
397
+ Returns:
398
+ - The Normalized Google Distance between the two vectors.
399
+
278
400
  Notes:
279
401
  When used for comparing two probability density functions (pdfs),
280
402
  Google distance equals half of Cityblock distance.
403
+
281
404
  References:
282
405
  1. Lee & Rashid (2008) Information Technology, ITSim 2008.
283
406
  doi:10.1109/ITSIM.2008.4631601.
284
407
  """
285
- u,v = np.asarray(u), np.asarray(v)
408
+ u, v = np.asarray(u), np.asarray(v)
286
409
  x = float(np.sum(u))
287
410
  y = float(np.sum(v))
288
411
  summin = float(np.sum(np.minimum(u, v)))
289
412
  return (max([x, y]) - summin) / ((x + y) - min([x, y]))
290
413
 
291
-
292
414
  def gower(self, u, v):
293
415
  """
294
- Gower distance.
295
- Gower distance equals Cityblock distance divided by vector length.
416
+ Calculate the Gower distance between two vectors.
417
+
418
+ The Gower distance equals the Cityblock distance divided by the vector length.
419
+
420
+ Parameters:
421
+ - u, v: Input vectors between which the distance is to be calculated.
422
+
423
+ Returns:
424
+ - The Gower distance between the two vectors.
425
+
296
426
  References:
297
427
  1. Gower JC. (1971) General Coefficient of Similarity
298
428
  and Some of Its Properties, Biometrics 27, 857-874.
299
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
429
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
300
430
  Measures between Probability Density Functions. International
301
431
  Journal of Mathematical Models and Methods in Applied Sciences.
302
432
  1(4), 300-307.
303
433
  """
304
- u,v = np.asarray(u), np.asarray(v)
434
+ u, v = np.asarray(u), np.asarray(v)
305
435
  return np.sum(np.abs(u - v)) / u.size
306
436
 
307
-
308
437
  #### NEEDS CHECKING ####
309
438
  # def harmonicmean(self, u, v):
310
439
  # """
@@ -316,84 +445,118 @@ class Distance:
316
445
  # return 1 - 2.*np.sum(u*v/(u+v))
317
446
  #########
318
447
 
319
-
320
448
  def hellinger(self, u, v):
321
449
  """
322
- Hellinger distance.
450
+ Calculate the Hellinger distance between two vectors.
451
+
452
+ The Hellinger distance is a measure of similarity between two probability distributions.
453
+
454
+ Parameters:
455
+ - u, v: Input vectors between which the distance is to be calculated.
456
+
457
+ Returns:
458
+ - The Hellinger distance between the two vectors.
459
+
323
460
  Notes:
324
461
  This implementation produces values two times larger than values
325
462
  obtained by Hellinger distance described in Wikipedia and also
326
463
  in https://gist.github.com/larsmans/3116927.
327
- Wikipedia:
328
- np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)) / np.sqrt(2)
464
+
329
465
  References:
330
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
331
- Measures between Probability Density Functions. International
332
- Journal of Mathematical Models and Methods in Applied Sciences.
333
- 1(4), 300-307.
466
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
467
+ Measures between Probability Density Functions. International
468
+ Journal of Mathematical Models and Methods in Applied Sciences.
469
+ 1(4), 300-307.
334
470
  """
335
- u,v = np.asarray(u), np.asarray(v)
336
- return np.sqrt(2*np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
337
-
471
+ u, v = np.asarray(u), np.asarray(v)
472
+ return np.sqrt(2 * np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
338
473
 
339
474
  def inner(self, u, v):
340
475
  """
341
- Inner product distance.
476
+ Calculate the inner product distance between two vectors.
477
+
478
+ The inner product distance is a measure of similarity between two vectors, based on their inner product.
479
+
480
+ Parameters:
481
+ - u, v: Input vectors between which the distance is to be calculated.
482
+
483
+ Returns:
484
+ - The inner product distance between the two vectors.
485
+
342
486
  Notes:
343
487
  Added by SC.
344
488
  """
345
- u,v = np.asarray(u), np.asarray(v)
489
+ u, v = np.asarray(u), np.asarray(v)
346
490
  return 1 - np.dot(u, v)
347
491
 
348
-
349
492
  def jaccard(self, u, v):
350
493
  """
351
- Jaccard distance.
494
+ Calculate the Jaccard distance between two vectors.
495
+
496
+ The Jaccard distance measures dissimilarity between sample sets.
497
+
498
+ Parameters:
499
+ - u, v: Input vectors between which the distance is to be calculated.
500
+
501
+ Returns:
502
+ - The Jaccard distance between the two vectors.
503
+
352
504
  References:
353
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
354
- Measures between Probability Density Functions. International
355
- Journal of Mathematical Models and Methods in Applied Sciences.
356
- 1(4), 300-307.
505
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
506
+ Measures between Probability Density Functions. International
507
+ Journal of Mathematical Models and Methods in Applied Sciences.
508
+ 1(4), 300-307.
357
509
  """
358
- u,v = np.asarray(u), np.asarray(v)
510
+ u, v = np.asarray(u), np.asarray(v)
359
511
  uv = np.dot(u, v)
360
512
  return 1 - (uv / (np.dot(u, u) + np.dot(v, v) - uv))
361
513
 
362
-
363
514
  def jeffreys(self, u, v):
364
515
  """
365
- Jeffreys divergence.
366
- Synonyms:
367
- J divergence
516
+ Calculate the Jeffreys divergence between two vectors.
517
+
518
+ The Jeffreys divergence is a symmetric version of the Kullback-Leibler divergence.
519
+
520
+ Parameters:
521
+ - u, v: Input vectors between which the divergence is to be calculated.
522
+
523
+ Returns:
524
+ - The Jeffreys divergence between the two vectors.
525
+
368
526
  References:
369
527
  1. Jeffreys H (1946) An Invariant Form for the Prior Probability
370
528
  in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
371
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
529
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
372
530
  Measures between Probability Density Functions. International
373
531
  Journal of Mathematical Models and Methods in Applied Sciences.
374
532
  1(4), 300-307.
375
533
  """
376
- u,v = np.asarray(u), np.asarray(v)
534
+ u, v = np.asarray(u), np.asarray(v)
377
535
  # Add epsilon to zeros in vectors to avoid division
378
536
  # by 0 and/or log of 0. Alternatively, zeros in the
379
537
  # vectors could be ignored or masked (see below).
380
538
  # u = ma.masked_where(u == 0, u)
381
539
  # v = ma.masked_where(v == 0, u)
382
- u = np.where(u==0, self.epsilon, u)
383
- v = np.where(v==0, self.epsilon, v)
384
- return np.sum((u-v) * np.log(u / v))
385
-
540
+ u = np.where(u == 0, self.epsilon, u)
541
+ v = np.where(v == 0, self.epsilon, v)
542
+ return np.sum((u - v) * np.log(u / v))
386
543
 
387
544
  def jensenshannon_divergence(self, u, v):
388
545
  """
389
- Jensen-Shannon divergence.
390
- Notes:
391
- 1. Equals half of Topsøe distance
392
- 2. Equals squared jensenshannon_distance.
546
+ Calculate the Jensen-Shannon divergence between two vectors.
547
+
548
+ The Jensen-Shannon divergence is a symmetric and finite measure of similarity between two probability distributions.
549
+
550
+ Parameters:
551
+ - u, v: Input vectors between which the divergence is to be calculated.
552
+
553
+ Returns:
554
+ - The Jensen-Shannon divergence between the two vectors.
555
+
393
556
  References:
394
557
  1. Lin J. (1991) Divergence measures based on the Shannon entropy.
395
558
  IEEE Transactions on Information Theory, 37(1):145–151.
396
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
559
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
397
560
  Measures between Probability Density Functions. International
398
561
  Journal of Mathematical Models and Methods in Applied Sciences.
399
562
  1(4), 300-307.
@@ -404,523 +567,718 @@ class Distance:
404
567
  el1 = (u * np.log(u) + v * np.log(v)) / 2
405
568
  el2 = (u + v)/2
406
569
  el3 = np.log(el2)
407
- return np.sum(el1 - el2 * el3)
570
+ return np.sum(el1 - el2 * el3)
408
571
  """
409
- u,v = np.asarray(u), np.asarray(v)
410
- u = np.where(u==0, self.epsilon, u)
411
- v = np.where(v==0, self.epsilon, v)
412
- dl = u * np.log(2*u/(u+v))
413
- dr = v * np.log(2*v/(u+v))
572
+ u, v = np.asarray(u), np.asarray(v)
573
+ u = np.where(u == 0, self.epsilon, u)
574
+ v = np.where(v == 0, self.epsilon, v)
575
+ dl = u * np.log(2 * u / (u + v))
576
+ dr = v * np.log(2 * v / (u + v))
414
577
  return (np.sum(dl) + np.sum(dr)) / 2
415
578
 
416
-
417
579
  def jensen_difference(self, u, v):
418
580
  """
419
- Jensen difference
420
- Comments:
421
- Seems equal to Jensen-Shannon divergence.
581
+ Calculate the Jensen difference between two vectors.
582
+
583
+ The Jensen difference is considered similar to the Jensen-Shannon divergence.
584
+
585
+ Parameters:
586
+ - u, v: Input vectors between which the distance is to be calculated.
587
+
588
+ Returns:
589
+ - The Jensen difference between the two vectors.
590
+
591
+ Notes:
592
+ 1. Equals half of Topsøe distance
593
+ 2. Equals squared jensenshannon_distance.
594
+
595
+
422
596
  References:
423
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
597
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
424
598
  Measures between Probability Density Functions. International
425
599
  Journal of Mathematical Models and Methods in Applied Sciences.
426
- 1(4), 300-307.
600
+ 1(4), 300-307.
427
601
  """
428
- u,v = np.asarray(u), np.asarray(v)
429
- u = np.where(u==0, self.epsilon, u)
430
- v = np.where(v==0, self.epsilon, v)
602
+ u, v = np.asarray(u), np.asarray(v)
603
+ u = np.where(u == 0, self.epsilon, u)
604
+ v = np.where(v == 0, self.epsilon, v)
431
605
  el1 = (u * np.log(u) + v * np.log(v)) / 2
432
606
  el2 = (u + v) / 2
433
607
  return np.sum(el1 - el2 * np.log(el2))
434
608
 
435
-
436
609
  def k_divergence(self, u, v):
437
610
  """
438
- K divergence.
611
+ Calculate the K divergence between two vectors.
612
+
613
+ Parameters:
614
+ - u, v: Input vectors between which the divergence is to be calculated.
615
+
616
+ Returns:
617
+ - The K divergence between the two vectors.
618
+
439
619
  References:
440
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
620
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
441
621
  Measures between Probability Density Functions. International
442
622
  Journal of Mathematical Models and Methods in Applied Sciences.
443
623
  1(4), 300-307.
444
624
  """
445
- u,v = np.asarray(u), np.asarray(v)
446
- u = np.where(u==0, self.epsilon, u)
447
- v = np.where(v==0, self.epsilon, v)
448
- return np.sum(u*np.log(2*u/(u+v)))
449
-
625
+ u, v = np.asarray(u), np.asarray(v)
626
+ u = np.where(u == 0, self.epsilon, u)
627
+ v = np.where(v == 0, self.epsilon, v)
628
+ return np.sum(u * np.log(2 * u / (u + v)))
450
629
 
451
630
  def kl_divergence(self, u, v):
452
631
  """
453
- Kullback-Leibler divergence.
454
- Syonymes:
455
- KL divergence, relative entropy, information deviation
632
+ Calculate the Kullback-Leibler divergence between two vectors.
633
+
634
+ The Kullback-Leibler divergence measures the difference between two probability distributions.
635
+
636
+ Parameters:
637
+ - u, v: Input vectors between which the divergence is to be calculated.
638
+
639
+ Returns:
640
+ - The Kullback-Leibler divergence between the two vectors.
641
+
456
642
  References:
457
643
  1. Kullback S, Leibler RA (1951) On information and sufficiency.
458
644
  Ann. Math. Statist. 22:79–86
459
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
645
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
460
646
  Measures between Probability Density Functions. International
461
647
  Journal of Mathematical Models and Methods in Applied Sciences.
462
648
  1(4):300-307.
463
649
  """
464
- u,v = np.asarray(u), np.asarray(v)
465
- u = np.where(u==0, self.epsilon, u)
466
- v = np.where(v==0, self.epsilon, v)
650
+ u, v = np.asarray(u), np.asarray(v)
651
+ u = np.where(u == 0, self.epsilon, u)
652
+ v = np.where(v == 0, self.epsilon, v)
467
653
  return np.sum(u * np.log(u / v))
468
654
 
469
-
470
655
  def kulczynski(self, u, v):
471
656
  """
472
- Kulczynski distance.
657
+ Calculate the Kulczynski distance between two vectors.
658
+
659
+ Parameters:
660
+ - u, v: Input vectors between which the distance is to be calculated.
661
+
662
+ Returns:
663
+ - The Kulczynski distance between the two vectors.
664
+
473
665
  References:
474
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
666
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
475
667
  Measures between Probability Density Functions. International
476
668
  Journal of Mathematical Models and Methods in Applied Sciences.
477
669
  1(4):300-307.
478
670
  """
479
- u,v = np.asarray(u), np.asarray(v)
671
+ u, v = np.asarray(u), np.asarray(v)
480
672
  return np.sum(np.abs(u - v)) / np.sum(np.minimum(u, v))
481
673
 
482
-
483
674
  def kumarjohnson(self, u, v):
484
675
  """
485
- Kumar-Johnson distance.
676
+ Calculate the Kumar-Johnson distance between two vectors.
677
+
678
+ Parameters:
679
+ - u, v: Input vectors between which the distance is to be calculated.
680
+
681
+ Returns:
682
+ - The Kumar-Johnson distance between the two vectors.
683
+
486
684
  References:
487
685
  1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
488
686
  and information inequalities, Journal of Inequalities in pure
489
687
  and applied Mathematics. 6(3).
490
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
688
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
491
689
  Measures between Probability Density Functions. International
492
690
  Journal of Mathematical Models and Methods in Applied Sciences.
493
691
  1(4):300-307.
494
692
  """
495
- u,v = np.asarray(u), np.asarray(v)
496
- uvmult = u*v
497
- with np.errstate(divide='ignore', invalid='ignore'):
693
+ u, v = np.asarray(u), np.asarray(v)
694
+ uvmult = u * v
695
+ with np.errstate(divide="ignore", invalid="ignore"):
498
696
  numer = np.power(u**2 - v**2, 2)
499
- denom = 2 * np.power(uvmult, 3/2)
500
- return np.sum(np.where(uvmult != 0, numer/denom, 0))
501
-
697
+ denom = 2 * np.power(uvmult, 3 / 2)
698
+ return np.sum(np.where(uvmult != 0, numer / denom, 0))
502
699
 
503
700
  def lorentzian(self, u, v):
504
701
  """
505
- Lorentzian distance.
702
+ Calculate the Lorentzian distance between two vectors.
703
+
704
+ Parameters:
705
+ - u, v: Input vectors between which the distance is to be calculated.
706
+
707
+ Returns:
708
+ - The Lorentzian distance between the two vectors.
709
+
506
710
  References:
507
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
711
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
508
712
  Measures between Probability Density Functions. International
509
713
  Journal of Mathematical Models and Methods in Applied Sciences.
510
- 1(4):300-307.
714
+ 1(4):300-307.
715
+
511
716
  Notes:
512
- One (1) is added to guarantee the non-negativity property and to
513
- eschew the log of zero
717
+ One (1) is added to guarantee the non-negativity property and to
718
+ eschew the log of zero.
514
719
  """
515
- u,v = np.asarray(u), np.asarray(v)
516
- return np.sum(np.log(np.abs(u-v)+1))
517
-
720
+ u, v = np.asarray(u), np.asarray(v)
721
+ return np.sum(np.log(np.abs(u - v) + 1))
518
722
 
519
723
  def cityblock(self, u, v):
520
724
  """
521
- Manhattan distance.
725
+ Calculate the Cityblock (Manhattan) distance between two vectors.
726
+
727
+ Parameters:
728
+ - u, v: Input vectors between which the distance is to be calculated.
729
+
730
+ Returns:
731
+ - The Cityblock distance between the two vectors.
732
+
733
+ References:
734
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
735
+ Measures between Probability Density Functions. International
736
+ Journal of Mathematical Models and Methods in Applied Sciences.
737
+ 1(4):300-307.
738
+
522
739
  Synonyms:
523
740
  City block distance
524
741
  Manhattan distance
525
742
  Rectilinear distance
526
743
  Taxicab norm
744
+
527
745
  Notes:
528
- Cityblock distance between two probability density functions
746
+ Cityblock distance between two probability density functions
529
747
  (pdfs) equals:
530
748
  1. Non-intersection distance multiplied by 2.
531
749
  2. Gower distance multiplied by vector length.
532
750
  3. Bray-Curtis distance multiplied by 2.
533
751
  4. Google distance multiplied by 2.
534
- References:
535
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
536
- Measures between Probability Density Functions. International
537
- Journal of Mathematical Models and Methods in Applied Sciences.
538
- 1(4):300-307.
539
752
  """
540
- u,v = np.asarray(u), np.asarray(v)
753
+ u, v = np.asarray(u), np.asarray(v)
541
754
  return np.sum(np.abs(u - v))
542
755
 
543
-
544
756
  def marylandbridge(self, u, v):
545
757
  """
546
- Maryland Bridge distance.
758
+ Calculate the Maryland Bridge distance between two vectors.
759
+
760
+ Parameters:
761
+ - u, v: Input vectors between which the distance is to be calculated.
762
+
763
+ Returns:
764
+ - The Maryland Bridge distance between the two vectors.
765
+
547
766
  References:
548
- 1. Deza M, Deza E (2009) Encyclopedia of Distances.
767
+ 1. Deza M, Deza E (2009) Encyclopedia of Distances.
549
768
  Springer-Verlag Berlin Heidelberg. 1-590.
550
769
  """
551
- u,v = np.asarray(u), np.asarray(v)
770
+ u, v = np.asarray(u), np.asarray(v)
552
771
  uvdot = np.dot(u, v)
553
- return 1 - (uvdot/np.dot(u, u) + uvdot/np.dot(v, v))/2
554
-
772
+ return 1 - (uvdot / np.dot(u, u) + uvdot / np.dot(v, v)) / 2
555
773
 
556
774
  def matusita(self, u, v):
557
775
  """
558
- Matusita distance.
559
- Notes:
560
- Equals square root of Squared-chord distance.
776
+ Calculate the Matusita distance between two vectors.
777
+
778
+ Parameters:
779
+ - u, v: Input vectors between which the distance is to be calculated.
780
+
781
+ Returns:
782
+ - The Matusita distance between the two vectors.
783
+
561
784
  References:
562
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
785
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
563
786
  Measures between Probability Density Functions. International
564
787
  Journal of Mathematical Models and Methods in Applied Sciences.
565
788
  1(4):300-307.
566
- """
567
- u,v = np.asarray(u), np.asarray(v)
568
- return np.sqrt(np.sum((np.sqrt(u)-np.sqrt(v))**2))
569
789
 
790
+ Notes:
791
+ Equals square root of Squared-chord distance.
792
+ """
793
+ u, v = np.asarray(u), np.asarray(v)
794
+ return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
570
795
 
571
796
  def max_symmetric_chisq(self, u, v):
572
797
  """
573
- Max-symmetric chisq.
798
+ Calculate the maximum symmetric chi-square distance between two vectors.
799
+
800
+ Parameters:
801
+ - u, v: Input vectors between which the distance is to be calculated.
802
+
803
+ Returns:
804
+ - The maximum symmetric chi-square distance between the two vectors.
805
+
574
806
  References:
575
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
807
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
576
808
  Measures between Probability Density Functions. International
577
809
  Journal of Mathematical Models and Methods in Applied Sciences.
578
810
  1(4):300-307.
579
811
  """
580
- u,v = np.asarray(u), np.asarray(v)
812
+ u, v = np.asarray(u), np.asarray(v)
581
813
  return max(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
582
814
 
583
-
584
815
  def min_symmetric_chisq(self, u, v):
585
816
  """
586
- Min-symmetric chisq.
817
+ Calculate the minimum symmetric chi-square distance between two vectors.
818
+
819
+ Parameters:
820
+ - u, v: Input vectors between which the distance is to be calculated.
821
+
822
+ Returns:
823
+ - The minimum symmetric chi-square distance between the two vectors.
824
+
587
825
  Notes:
588
826
  Added by SC.
589
827
  """
590
- u,v = np.asarray(u), np.asarray(v)
828
+ u, v = np.asarray(u), np.asarray(v)
591
829
  return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
592
830
 
593
-
594
831
  def meehl(self, u, v):
595
832
  """
596
- The Meehl distance.
833
+ Calculate the Meehl distance between two vectors.
834
+
835
+ Parameters:
836
+ - u, v: Input vectors between which the distance is to be calculated.
837
+
838
+ Returns:
839
+ - The Meehl distance between the two vectors.
840
+
597
841
  Notes:
598
842
  Added by SC.
843
+
599
844
  References:
600
845
  1. Deza M. and Deza E. (2013) Encyclopedia of Distances.
601
- Berlin, Heidelberg: Springer Berlin Heidelberg.
846
+ Berlin, Heidelberg: Springer Berlin Heidelberg.
602
847
  https://doi.org/10.1007/978-3-642-30958-8.
603
848
  """
604
- u,v = np.asarray(u), np.asarray(v)
849
+ u, v = np.asarray(u), np.asarray(v)
605
850
 
606
851
  xi = u[:-1]
607
852
  yi = v[:-1]
608
- xiplus1 = np.roll(u,1)[:-1]
609
- yiplus1 = np.roll(v,1)[:-1]
610
-
611
- with np.errstate(divide='ignore', invalid="ignore"):
612
- return np.nansum((xi - yi - xiplus1 + yiplus1)**2)
853
+ xiplus1 = np.roll(u, 1)[:-1]
854
+ yiplus1 = np.roll(v, 1)[:-1]
613
855
 
856
+ with np.errstate(divide="ignore", invalid="ignore"):
857
+ return np.nansum((xi - yi - xiplus1 + yiplus1) ** 2)
614
858
 
615
859
  def minkowski(self, u, v, p=2):
616
860
  """
617
- Minkowski distance.
861
+ Calculate the Minkowski distance between two vectors.
862
+
618
863
  Parameters:
619
- p : int
620
- The order of the norm of the difference.
864
+ - u, v: Input vectors between which the distance is to be calculated.
865
+ - p: The order of the norm of the difference.
866
+
867
+ Returns:
868
+ - The Minkowski distance between the two vectors.
869
+
621
870
  Notes:
622
871
  When p goes to infinite, the Chebyshev distance is derived.
872
+
623
873
  References:
624
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
874
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
625
875
  Measures between Probability Density Functions. International
626
876
  Journal of Mathematical Models and Methods in Applied Sciences.
627
877
  1(4):300-307.
628
878
  """
629
- u,v = np.asarray(u), np.asarray(v)
879
+ u, v = np.asarray(u), np.asarray(v)
630
880
  return np.linalg.norm(u - v, ord=p)
631
881
 
632
-
633
882
  def motyka(self, u, v):
634
883
  """
635
- Motyka distance.
884
+ Calculate the Motyka distance between two vectors.
885
+
886
+ Parameters:
887
+ - u, v: Input vectors between which the distance is to be calculated.
888
+
889
+ Returns:
890
+ - The Motyka distance between the two vectors.
891
+
636
892
  Notes:
637
893
  The distance between identical vectors is not equal to 0 but 0.5.
638
-
894
+
639
895
  References:
640
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
896
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
641
897
  Measures between Probability Density Functions. International
642
898
  Journal of Mathematical Models and Methods in Applied Sciences.
643
899
  1(4), 300-307.
644
900
  """
645
- u,v = np.asarray(u), np.asarray(v)
901
+ u, v = np.asarray(u), np.asarray(v)
646
902
  return np.sum(np.maximum(u, v)) / np.sum(u + v)
647
903
 
648
-
649
904
  def neyman_chisq(self, u, v):
650
905
  """
651
- Neyman chi-square distance.
906
+ Calculate the Neyman chi-square distance between two vectors.
907
+
908
+ Parameters:
909
+ - u, v: Input vectors between which the distance is to be calculated.
910
+
911
+ Returns:
912
+ - The Neyman chi-square distance between the two vectors.
913
+
652
914
  References:
653
- 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
915
+ 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
654
916
  In Proceedings of the First Berkley Symposium on Mathematical
655
917
  Statistics and Probability.
656
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
918
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
657
919
  Measures between Probability Density Functions. International
658
920
  Journal of Mathematical Models and Methods in Applied Sciences.
659
921
  1(4), 300-307.
660
922
  """
661
- u,v = np.asarray(u), np.asarray(v)
662
- with np.errstate(divide='ignore', invalid='ignore'):
663
- return np.sum(np.where(u != 0, (u-v)**2/u, 0))
664
-
923
+ u, v = np.asarray(u), np.asarray(v)
924
+ with np.errstate(divide="ignore", invalid="ignore"):
925
+ return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
665
926
 
666
927
  def nonintersection(self, u, v):
667
928
  """
668
- Distance based on intersection.
669
- Synonyms:
670
- Non-overlaps
671
- Intersection distance
672
- Notes:
673
- When used for comparing two probability density functions (pdfs),
674
- Nonintersection distance equals half of Cityblock distance.
929
+ Calculate the Nonintersection distance between two vectors.
930
+
931
+ Parameters:
932
+ - u, v: Input vectors between which the distance is to be calculated.
933
+
934
+ Returns:
935
+ - The Nonintersection distance between the two vectors.
936
+
675
937
  References:
676
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
938
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
677
939
  Measures between Probability Density Functions. International
678
940
  Journal of Mathematical Models and Methods in Applied Sciences.
679
941
  1(4), 300-307.
942
+
943
+ Notes:
944
+ When used for comparing two probability density functions (pdfs),
945
+ Nonintersection distance equals half of Cityblock distance.
680
946
  """
681
- u,v = np.asarray(u), np.asarray(v)
947
+ u, v = np.asarray(u), np.asarray(v)
682
948
  return 1 - np.sum(np.minimum(u, v))
683
949
 
684
-
685
950
  def pearson_chisq(self, u, v):
686
951
  """
687
- Pearson chi-square divergence.
688
- Notes:
689
- Pearson chi-square divergence is asymmetric.
952
+ Calculate the Pearson chi-square divergence between two vectors.
953
+
954
+ Parameters:
955
+ - u, v: Input vectors between which the divergence is to be calculated.
956
+
957
+ Returns:
958
+ - The Pearson chi-square divergence between the two vectors.
959
+
690
960
  References:
691
- 1. Pearson K. (1900) On the Criterion that a given system of
961
+ 1. Pearson K. (1900) On the Criterion that a given system of
692
962
  deviations from the probable in the case of correlated system
693
963
  of variables is such that it can be reasonable supposed to have
694
964
  arisen from random sampling, Phil. Mag. 50, 157-172.
695
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
965
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
696
966
  Measures between Probability Density Functions. International
697
967
  Journal of Mathematical Models and Methods in Applied Sciences.
698
968
  1(4), 300-307.
699
- """
700
- u,v = np.asarray(u), np.asarray(v)
701
- with np.errstate(divide='ignore', invalid='ignore'):
702
- return np.sum(np.where(v != 0, (u-v)**2/v, 0))
703
969
 
970
+ Notes:
971
+ Pearson chi-square divergence is asymmetric.
972
+ """
973
+ u, v = np.asarray(u), np.asarray(v)
974
+ with np.errstate(divide="ignore", invalid="ignore"):
975
+ return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
704
976
 
705
977
  def penroseshape(self, u, v):
706
978
  """
707
- Penrose shape distance.
979
+ Calculate the Penrose shape distance between two vectors.
980
+
981
+ Parameters:
982
+ - u, v: Input vectors between which the distance is to be calculated.
983
+
984
+ Returns:
985
+ - The Penrose shape distance between the two vectors.
986
+
708
987
  References:
709
- 1. Deza M, Deza E (2009) Encyclopedia of Distances.
988
+ 1. Deza M, Deza E (2009) Encyclopedia of Distances.
710
989
  Springer-Verlag Berlin Heidelberg. 1-590.
711
990
  """
712
- u,v = np.asarray(u), np.asarray(v)
991
+ u, v = np.asarray(u), np.asarray(v)
713
992
  umu = np.mean(u)
714
993
  vmu = np.mean(v)
715
- return np.sqrt(np.sum(((u-umu)-(v-vmu))**2))
716
-
994
+ return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
717
995
 
718
996
  def prob_chisq(self, u, v):
719
997
  """
720
- Probabilistic chi-square distance.
998
+ Calculate the Probabilistic chi-square distance between two vectors.
999
+
1000
+ Parameters:
1001
+ - u, v: Input vectors between which the distance is to be calculated.
1002
+
1003
+ Returns:
1004
+ - The Probabilistic chi-square distance between the two vectors.
1005
+
721
1006
  Notes:
722
1007
  Added by SC.
723
1008
  """
724
- u,v = np.asarray(u), np.asarray(v)
1009
+ u, v = np.asarray(u), np.asarray(v)
725
1010
  uvsum = u + v
726
- with np.errstate(divide='ignore', invalid='ignore'):
727
- return 2*np.sum(np.where(uvsum != 0, (u-v)**2/uvsum, 0))
728
-
1011
+ with np.errstate(divide="ignore", invalid="ignore"):
1012
+ return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
729
1013
 
730
1014
  def ruzicka(self, u, v):
731
1015
  """
732
- Ruzicka distance.
1016
+ Calculate the Ruzicka distance between two vectors.
1017
+
1018
+ Parameters:
1019
+ - u, v: Input vectors between which the distance is to be calculated.
1020
+
1021
+ Returns:
1022
+ - The Ruzicka distance between the two vectors.
1023
+
733
1024
  Notes:
734
- Added by SC.
1025
+ Added by SC.
735
1026
  """
736
- u,v = np.asarray(u), np.asarray(v)
1027
+ u, v = np.asarray(u), np.asarray(v)
737
1028
  den = np.sum(np.maximum(u, v))
738
-
739
- return 1 - np.sum(np.minimum(u, v)) / den
740
1029
 
1030
+ return 1 - np.sum(np.minimum(u, v)) / den
741
1031
 
742
1032
  def sorensen(self, u, v):
743
1033
  """
744
- Sorensen distance.
745
- Sorensen distance equals Manhattan distance divided by the sum of the two vectors.
1034
+ Calculate the Sorensen distance between two vectors.
1035
+
1036
+ Parameters:
1037
+ - u, v: Input vectors between which the distance is to be calculated.
1038
+
1039
+ Returns:
1040
+ - The Sorensen distance between the two vectors.
1041
+
746
1042
  Notes:
1043
+ The Sorensen distance equals the Manhattan distance divided by the sum of the two vectors.
1044
+
747
1045
  Added by SC.
748
1046
  """
749
- u,v = np.asarray(u), np.asarray(v)
1047
+ u, v = np.asarray(u), np.asarray(v)
750
1048
  return np.sum(np.abs(u - v)) / np.sum(u + v)
751
1049
 
752
-
753
1050
  def soergel(self, u, v):
754
1051
  """
755
- Soergel distance.
1052
+ Calculate the Soergel distance between two vectors.
1053
+
1054
+ Parameters:
1055
+ - u, v: Input vectors between which the distance is to be calculated.
1056
+
1057
+ Returns:
1058
+ - The Soergel distance between the two vectors.
1059
+
756
1060
  Notes:
757
1061
  Equals Tanimoto distance.
1062
+
758
1063
  References:
759
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1064
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
760
1065
  Measures between Probability Density Functions. International
761
1066
  Journal of Mathematical Models and Methods in Applied Sciences.
762
1067
  1(4), 300-307.
763
1068
  """
764
- u,v = np.asarray(u), np.asarray(v)
1069
+ u, v = np.asarray(u), np.asarray(v)
765
1070
  return np.sum(np.abs(u - v)) / np.sum(np.maximum(u, v))
766
1071
 
767
-
768
1072
  def squared_chisq(self, u, v):
769
1073
  """
770
- Squared chi-square distance.
771
- Synonyms:
772
- Triangular discrimination
1074
+ Calculate the Squared chi-square distance between two vectors.
1075
+
1076
+ Parameters:
1077
+ - u, v: Input vectors between which the distance is to be calculated.
1078
+
1079
+ Returns:
1080
+ - The Squared chi-square distance between the two vectors.
1081
+
773
1082
  References:
774
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1083
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
775
1084
  Measures between Probability Density Functions. International
776
1085
  Journal of Mathematical Models and Methods in Applied Sciences.
777
1086
  1(4), 300-307.
778
1087
  """
779
- u,v = np.asarray(u), np.asarray(v)
1088
+ u, v = np.asarray(u), np.asarray(v)
780
1089
  uvsum = u + v
781
- with np.errstate(divide='ignore', invalid='ignore'):
782
- return np.sum(np.where(uvsum != 0, (u-v)**2/uvsum, 0))
783
-
1090
+ with np.errstate(divide="ignore", invalid="ignore"):
1091
+ return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
784
1092
 
785
1093
  def squaredchord(self, u, v):
786
1094
  """
787
- Squared-chord distance.
788
- Notes:
789
- Equals to squared Matusita distance.
790
- Reference:
791
- 1. Gavin DG et al. (2003) A statistical approach to evaluating
1095
+ Calculate the Squared-chord distance between two vectors.
1096
+
1097
+ Parameters:
1098
+ - u, v: Input vectors between which the distance is to be calculated.
1099
+
1100
+ Returns:
1101
+ - The Squared-chord distance between the two vectors.
1102
+
1103
+ References:
1104
+ 1. Gavin DG et al. (2003) A statistical approach to evaluating
792
1105
  distance metrics and analog assignments for pollen records.
793
1106
  Quaternary Research 60:356–367.
794
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1107
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
795
1108
  Measures between Probability Density Functions. International
796
1109
  Journal of Mathematical Models and Methods in Applied Sciences.
797
1110
  1(4), 300-307.
798
-
799
- """
800
- u,v = np.asarray(u), np.asarray(v)
801
- return np.sum((np.sqrt(u) - np.sqrt(v))**2)
802
1111
 
1112
+ Notes:
1113
+ Equals to squared Matusita distance.
1114
+ """
1115
+ u, v = np.asarray(u), np.asarray(v)
1116
+ return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
803
1117
 
804
1118
  def squared_euclidean(self, u, v):
805
1119
  """
806
- Squared Euclidean distance.
807
- Notes:
808
- Equals to squared Euclidean distance.
809
- Reference:
810
- 1. Gavin DG et al. (2003) A statistical approach to evaluating
1120
+ Calculate the Squared Euclidean distance between two vectors.
1121
+
1122
+ Parameters:
1123
+ - u, v: Input vectors between which the distance is to be calculated.
1124
+
1125
+ Returns:
1126
+ - The Squared Euclidean distance between the two vectors.
1127
+
1128
+ References:
1129
+ 1. Gavin DG et al. (2003) A statistical approach to evaluating
811
1130
  distance metrics and analog assignments for pollen records.
812
1131
  Quaternary Research 60:356–367.
813
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1132
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
814
1133
  Measures between Probability Density Functions. International
815
1134
  Journal of Mathematical Models and Methods in Applied Sciences.
816
1135
  1(4), 300-307.
1136
+
1137
+ Notes:
1138
+ Equals to squared Euclidean distance.
817
1139
  """
818
- u,v = np.asarray(u), np.asarray(v)
1140
+ u, v = np.asarray(u), np.asarray(v)
819
1141
  return np.dot((u - v), (u - v))
820
1142
 
821
-
822
1143
  def taneja(self, u, v):
823
1144
  """
824
- Taneja distance.
1145
+ Calculate the Taneja distance between two vectors.
1146
+
1147
+ Parameters:
1148
+ - u, v: Input vectors between which the distance is to be calculated.
1149
+
1150
+ Returns:
1151
+ - The Taneja distance between the two vectors.
1152
+
825
1153
  References:
826
1154
  1. Taneja IJ. (1995), New Developments in Generalized Information
827
1155
  Measures, Chapter in: Advances in Imaging and Electron Physics,
828
1156
  Ed. P.W. Hawkes, 91, 37-135.
829
- 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1157
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
830
1158
  Measures between Probability Density Functions. International
831
1159
  Journal of Mathematical Models and Methods in Applied Sciences.
832
1160
  1(4), 300-307.
833
1161
  """
834
- u,v = np.asarray(u), np.asarray(v)
835
- u = np.where(u==0, self.epsilon, u)
836
- v = np.where(v==0, self.epsilon, v)
1162
+ u, v = np.asarray(u), np.asarray(v)
1163
+ u = np.where(u == 0, self.epsilon, u)
1164
+ v = np.where(v == 0, self.epsilon, v)
837
1165
  uvsum = u + v
838
- return np.sum((uvsum/2)*np.log(uvsum/(2*np.sqrt(u*v))))
839
-
1166
+ return np.sum((uvsum / 2) * np.log(uvsum / (2 * np.sqrt(u * v))))
840
1167
 
841
1168
  def tanimoto(self, u, v):
842
1169
  """
843
- Tanimoto distance.
844
- Notes:
845
- Equals Soergel distance.
1170
+ Calculate the Tanimoto distance between two vectors.
1171
+
1172
+ Parameters:
1173
+ - u, v: Input vectors between which the distance is to be calculated.
1174
+
1175
+ Returns:
1176
+ - The Tanimoto distance between the two vectors.
1177
+
846
1178
  References:
847
- 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1179
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
848
1180
  Measures between Probability Density Functions. International
849
1181
  Journal of Mathematical Models and Methods in Applied Sciences.
850
- 1(4), 300-307
1182
+ 1(4), 300-307.
1183
+
1184
+ Notes:
1185
+ Equals Soergel distance.
851
1186
  """
852
- u,v = np.asarray(u), np.asarray(v)
853
- #return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
1187
+ u, v = np.asarray(u), np.asarray(v)
1188
+ # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
854
1189
  usum = np.sum(u)
855
1190
  vsum = np.sum(v)
856
1191
  minsum = np.sum(np.minimum(u, v))
857
- return (usum + vsum - 2*minsum) / (usum + vsum - minsum)
858
-
1192
+ return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
859
1193
 
860
1194
  def topsoe(self, u, v):
861
1195
  """
862
- Topsøe distance.
863
- Synonyms:
864
- Information statistic
865
- Notes:
866
- Equals two times Jensen-Shannon divergence.
1196
+ Calculate the Topsøe distance between two vectors.
1197
+
1198
+ Parameters:
1199
+ - u, v: Input vectors between which the distance is to be calculated.
1200
+
1201
+ Returns:
1202
+ - The Topsøe distance between the two vectors.
1203
+
867
1204
  References:
868
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1205
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
869
1206
  Measures between Probability Density Functions. International
870
1207
  Journal of Mathematical Models and Methods in Applied Sciences.
871
- 1(4), 300-307
1208
+ 1(4), 300-307.
1209
+
1210
+ Notes:
1211
+ Equals two times Jensen-Shannon divergence.
872
1212
  """
873
- u,v = np.asarray(u), np.asarray(v)
874
- u = np.where(u==0, self.epsilon, u)
875
- v = np.where(v==0, self.epsilon, v)
876
- dl = u * np.log(2*u/(u+v))
877
- dr = v * np.log(2*v/(u+v))
1213
+ u, v = np.asarray(u), np.asarray(v)
1214
+ u = np.where(u == 0, self.epsilon, u)
1215
+ v = np.where(v == 0, self.epsilon, v)
1216
+ dl = u * np.log(2 * u / (u + v))
1217
+ dr = v * np.log(2 * v / (u + v))
878
1218
  return np.sum(dl + dr)
879
1219
 
880
-
881
1220
  def vicis_symmetric_chisq(self, u, v):
882
1221
  """
883
- Vicis Symmetric chi-square distance.
1222
+ Calculate the Vicis Symmetric chi-square distance between two vectors.
1223
+
1224
+ Parameters:
1225
+ - u, v: Input vectors between which the distance is to be calculated.
1226
+
1227
+ Returns:
1228
+ - The Vicis Symmetric chi-square distance between the two vectors.
1229
+
884
1230
  References:
885
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1231
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
886
1232
  Measures between Probability Density Functions. International
887
1233
  Journal of Mathematical Models and Methods in Applied Sciences.
888
1234
  1(4), 300-307
889
1235
  """
890
- u,v = np.asarray(u), np.asarray(v)
891
- with np.errstate(divide='ignore', invalid='ignore'):
892
- u_v = (u - v)**2
893
- uvmin = np.minimum(u, v)**2
894
- return np.sum(np.where(uvmin != 0, u_v/uvmin, 0))
895
-
1236
+ u, v = np.asarray(u), np.asarray(v)
1237
+ with np.errstate(divide="ignore", invalid="ignore"):
1238
+ u_v = (u - v) ** 2
1239
+ uvmin = np.minimum(u, v) ** 2
1240
+ return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
896
1241
 
897
1242
  def vicis_wave_hedges(self, u, v):
898
1243
  """
899
- Vicis-Wave Hedges distance.
1244
+ Calculate the Vicis-Wave Hedges distance between two vectors.
1245
+
1246
+ Parameters:
1247
+ - u, v: Input vectors between which the distance is to be calculated.
1248
+
1249
+ Returns:
1250
+ - The Vicis-Wave Hedges distance between the two vectors.
1251
+
900
1252
  References:
901
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1253
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
902
1254
  Measures between Probability Density Functions. International
903
1255
  Journal of Mathematical Models and Methods in Applied Sciences.
904
- 1(4), 300-307
1256
+ 1(4), 300-307.
905
1257
  """
906
- u,v = np.asarray(u), np.asarray(v)
907
- with np.errstate(divide='ignore', invalid='ignore'):
1258
+ u, v = np.asarray(u), np.asarray(v)
1259
+ with np.errstate(divide="ignore", invalid="ignore"):
908
1260
  u_v = abs(u - v)
909
1261
  uvmin = np.minimum(u, v)
910
- return np.sum(np.where(uvmin != 0, u_v/uvmin, 0))
911
-
1262
+ return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
912
1263
 
913
1264
  def wave_hedges(self, u, v):
914
1265
  """
915
- Wave Hedges distance.
1266
+ Calculate the Wave Hedges distance between two vectors.
1267
+
1268
+ Parameters:
1269
+ - u, v: Input vectors between which the distance is to be calculated.
1270
+
1271
+ Returns:
1272
+ - The Wave Hedges distance between the two vectors.
1273
+
916
1274
  References:
917
- 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1275
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
918
1276
  Measures between Probability Density Functions. International
919
1277
  Journal of Mathematical Models and Methods in Applied Sciences.
920
1278
  1(4), 300-307
921
1279
  """
922
- u,v = np.asarray(u), np.asarray(v)
923
- with np.errstate(divide='ignore', invalid='ignore'):
1280
+ u, v = np.asarray(u), np.asarray(v)
1281
+ with np.errstate(divide="ignore", invalid="ignore"):
924
1282
  u_v = abs(u - v)
925
1283
  uvmax = np.maximum(u, v)
926
- return np.sum(np.where(((u_v != 0) & (uvmax != 0)), u_v/uvmax, 0))
1284
+ return np.sum(np.where(((u_v != 0) & (uvmax != 0)), u_v / uvmax, 0))