risk-network 0.0.9b39__py3-none-any.whl → 0.0.9b40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.9-beta.39"
10
+ __version__ = "0.0.9-beta.40"
@@ -12,8 +12,9 @@ import networkx as nx
12
12
  import nltk
13
13
  import numpy as np
14
14
  import pandas as pd
15
- from nltk.tokenize import word_tokenize
16
15
  from nltk.corpus import stopwords
16
+ from nltk.stem import WordNetLemmatizer
17
+ from nltk.tokenize import word_tokenize
17
18
 
18
19
  from risk.log import logger
19
20
  from scipy.sparse import coo_matrix
@@ -31,11 +32,17 @@ def _setup_nltk():
31
32
  except LookupError:
32
33
  nltk.download("stopwords")
33
34
 
35
+ try:
36
+ nltk.data.find("corpora/wordnet")
37
+ except LookupError:
38
+ nltk.download("wordnet")
39
+
34
40
 
35
41
  # Ensure you have the necessary NLTK data
36
42
  _setup_nltk()
37
- # Initialize English stopwords
38
- stop_words = set(stopwords.words("english"))
43
+ # Use NLTK's stopwords
44
+ STOP_WORDS = set(stopwords.words("english"))
45
+ LEMMATIZER = WordNetLemmatizer()
39
46
 
40
47
 
41
48
  def load_annotations(
@@ -208,104 +215,121 @@ def define_top_annotations(
208
215
 
209
216
  def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
210
217
  """Generate a weighted description from words and their corresponding scores,
211
- with support for stopwords filtering and improved weighting logic.
218
+ using improved weighting logic with normalization, lemmatization, and aggregation.
212
219
 
213
220
  Args:
214
- words_column (pd.Series): A pandas Series containing strings to process.
221
+ words_column (pd.Series): A pandas Series containing strings (phrases) to process.
215
222
  scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
216
223
 
217
224
  Returns:
218
- str: A coherent description formed from the most frequent and significant words, weighed by significance scores.
225
+ str: A coherent description formed from the most frequent and significant words.
219
226
  """
220
- # Handle case where all scores are the same
227
+ # Normalize significance scores to [0,1]. If all scores are identical, use 1.
221
228
  if scores_column.max() == scores_column.min():
222
- normalized_scores = pd.Series([1] * len(scores_column))
229
+ normalized_scores = pd.Series([1] * len(scores_column), index=scores_column.index)
223
230
  else:
224
- # Normalize the significance scores to be between 0 and 1
225
231
  normalized_scores = (scores_column - scores_column.min()) / (
226
232
  scores_column.max() - scores_column.min()
227
233
  )
228
234
 
229
- # Combine words and normalized scores to create weighted words
235
+ # Accumulate weighted counts for each token (after cleaning and lemmatization)
236
+ weighted_counts = {}
237
+ for phrase, score in zip(words_column, normalized_scores):
238
+ # Tokenize the phrase
239
+ tokens = word_tokenize(str(phrase))
240
+ # Determine the weight (scale factor; here multiplying normalized score by 10)
241
+ weight = max(1, int((0 if pd.isna(score) else score) * 10))
242
+ for token in tokens:
243
+ # Clean token: lowercase and remove extraneous punctuation (but preserve intra-word hyphens)
244
+ token_clean = re.sub(r"[^\w\-]", "", token.lower()).strip()
245
+ if not token_clean:
246
+ continue
247
+ # Skip tokens that are pure numbers
248
+ if token_clean.isdigit():
249
+ continue
250
+ # Skip stopwords
251
+ if token_clean in STOP_WORDS:
252
+ continue
253
+ # Lemmatize the token to merge similar forms
254
+ token_norm = LEMMATIZER.lemmatize(token_clean)
255
+ weighted_counts[token_norm] = weighted_counts.get(token_norm, 0) + weight
256
+
257
+ # Reconstruct a weighted token list by repeating each token by its aggregated count.
230
258
  weighted_words = []
231
- for word, score in zip(words_column, normalized_scores):
232
- word = str(word)
233
- if word not in stop_words: # Skip stopwords
234
- weight = max(1, int((0 if pd.isna(score) else score) * 10))
235
- weighted_words.extend([word] * weight)
236
-
237
- # Tokenize the weighted words, but preserve number-word patterns like '4-alpha'
238
- tokens = word_tokenize(" ".join(weighted_words))
239
- # Ensure we treat "4-alpha" or other "number-word" patterns as single tokens
259
+ for token, count in weighted_counts.items():
260
+ weighted_words.extend([token] * count)
261
+
262
+ # Combine tokens that match number-word patterns (e.g. "4-alpha") and remove pure numeric tokens.
240
263
  combined_tokens = []
241
- for token in tokens:
242
- # Match patterns like '4-alpha' or '5-hydroxy' and keep them together
264
+ for token in weighted_words:
243
265
  if re.match(r"^\d+-\w+", token):
244
266
  combined_tokens.append(token)
245
- elif token.replace(".", "", 1).isdigit(): # Handle pure numeric tokens
246
- # Ignore pure numbers as descriptions unless necessary
267
+ elif token.replace(".", "", 1).isdigit():
247
268
  continue
248
269
  else:
249
270
  combined_tokens.append(token)
250
271
 
251
- # Prevent descriptions like just '4' from being selected
272
+ # If the only token is numeric, return a default value.
252
273
  if len(combined_tokens) == 1 and combined_tokens[0].isdigit():
253
- return "N/A" # Return "N/A" for cases where it's just a number
274
+ return "N/A"
254
275
 
255
- # Simplify the word list and generate the description
276
+ # Simplify the token list to remove near-duplicates based on the Jaccard index.
256
277
  simplified_words = _simplify_word_list(combined_tokens)
278
+ # Generate a coherent description from the simplified words.
257
279
  description = _generate_coherent_description(simplified_words)
258
280
 
259
281
  return description
260
282
 
261
283
 
262
284
  def _simplify_word_list(words: List[str], threshold: float = 0.80) -> List[str]:
263
- """Filter out words that are too similar based on the Jaccard index, keeping the word with the higher count.
285
+ """Filter out words that are too similar based on the Jaccard index,
286
+ keeping the word with the higher aggregated count.
264
287
 
265
288
  Args:
266
- words (list of str): The list of words to be filtered.
289
+ words (List[str]): The list of tokens to be filtered.
267
290
  threshold (float, optional): The similarity threshold for the Jaccard index. Defaults to 0.80.
268
291
 
269
292
  Returns:
270
- list of str: A list of filtered words, where similar words are reduced to the most frequent one.
293
+ List[str]: A list of filtered words, where similar words are reduced to the most frequent one.
271
294
  """
272
- # Count the occurrences of each word
295
+ # Count the occurrences (which reflect the weighted importance)
273
296
  word_counts = Counter(words)
274
297
  filtered_words = []
275
298
  used_words = set()
276
- # Iterate through the words to find similar words
277
- for word in word_counts:
299
+
300
+ # Iterate through words sorted by descending weighted frequency
301
+ for word in sorted(word_counts, key=lambda w: word_counts[w], reverse=True):
278
302
  if word in used_words:
279
303
  continue
280
304
 
281
305
  word_set = set(word)
282
- # Find similar words based on the Jaccard index
306
+ # Find similar words (including the current word) based on the Jaccard index
283
307
  similar_words = [
284
308
  other_word
285
309
  for other_word in word_counts
286
310
  if _calculate_jaccard_index(word_set, set(other_word)) >= threshold
287
311
  ]
288
- # Sort by frequency and choose the most frequent word
312
+ # Choose the word with the highest weighted count among the similar group
289
313
  similar_words.sort(key=lambda w: word_counts[w], reverse=True)
290
314
  best_word = similar_words[0]
291
315
  filtered_words.append(best_word)
292
316
  used_words.update(similar_words)
293
317
 
318
+ # Preserve the original order (by frequency) from the filtered set
294
319
  final_words = [word for word in words if word in filtered_words]
295
320
 
296
321
  return final_words
297
322
 
298
323
 
299
324
  def _calculate_jaccard_index(set1: Set[Any], set2: Set[Any]) -> float:
300
- """Calculate the Jaccard Index of two sets.
325
+ """Calculate the Jaccard index between two sets.
301
326
 
302
327
  Args:
303
- set1 (set): The first set for comparison.
304
- set2 (set): The second set for comparison.
328
+ set1 (Set[Any]): The first set.
329
+ set2 (Set[Any]): The second set.
305
330
 
306
331
  Returns:
307
- float: The Jaccard Index, which is the ratio of the intersection to the union of the two sets.
308
- Returns 0 if the union of the sets is empty.
332
+ float: The Jaccard index (intersection over union). Returns 0 if the union is empty.
309
333
  """
310
334
  intersection = len(set1.intersection(set2))
311
335
  union = len(set1.union(set2))
@@ -313,28 +337,28 @@ def _calculate_jaccard_index(set1: Set[Any], set2: Set[Any]) -> float:
313
337
 
314
338
 
315
339
  def _generate_coherent_description(words: List[str]) -> str:
316
- """Generate a coherent description from a list of words or numerical string values.
340
+ """Generate a coherent description from a list of words.
341
+
317
342
  If there is only one unique entry, return it directly.
343
+ Otherwise, order the words by frequency and join them into a single string.
318
344
 
319
345
  Args:
320
- words (List): A list of words or numerical string values.
346
+ words (List[str]): A list of tokens.
321
347
 
322
348
  Returns:
323
- str: A coherent description formed by arranging the words in a logical sequence.
349
+ str: A coherent, space-separated description.
324
350
  """
325
- # If there are no words, return a keyword indicating no data is available
326
351
  if not words:
327
352
  return "N/A"
328
353
 
329
- # If there's only one unique word, return it directly
354
+ # If there is only one unique word, return it directly
330
355
  unique_words = set(words)
331
356
  if len(unique_words) == 1:
332
357
  return list(unique_words)[0]
333
358
 
334
- # Count the frequency of each word and sort them by frequency
359
+ # Count weighted occurrences and sort in descending order.
335
360
  word_counts = Counter(words)
336
361
  most_common_words = [word for word, _ in word_counts.most_common()]
337
- # Join the most common words to form a coherent description based on frequency
338
362
  description = " ".join(most_common_words)
339
363
 
340
364
  return description
@@ -14,17 +14,27 @@ from sklearn.metrics import silhouette_score
14
14
  from tqdm import tqdm
15
15
 
16
16
  from risk.annotations import get_weighted_description
17
- from risk.constants import GROUP_LINKAGE_METHODS, GROUP_DISTANCE_METRICS
18
17
  from risk.log import logger
19
18
 
20
19
 
20
+ # Define constants for clustering
21
+ # fmt: off
22
+ LINKAGE_METHODS = {"single", "complete", "average", "weighted", "centroid", "median", "ward"}
23
+ LINKAGE_METRICS = {
24
+ "braycurtis","canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
25
+ "hamming", "jaccard", "jensenshannon", "kulczynski1", "mahalanobis", "matching", "minkowski",
26
+ "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule",
27
+ }
28
+ # fmt: on
29
+
30
+
21
31
  def define_domains(
22
32
  top_annotations: pd.DataFrame,
23
33
  significant_neighborhoods_significance: np.ndarray,
24
34
  linkage_criterion: str,
25
35
  linkage_method: str,
26
36
  linkage_metric: str,
27
- linkage_threshold: float,
37
+ linkage_threshold: Union[float, str],
28
38
  ) -> pd.DataFrame:
29
39
  """Define domains and assign nodes to these domains based on their significance scores and clustering,
30
40
  handling errors by assigning unique domains when clustering fails.
@@ -33,9 +43,9 @@ def define_domains(
33
43
  top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
34
44
  significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
35
45
  linkage_criterion (str): The clustering criterion for defining groups.
36
- linkage_method (str): The linkage method for clustering.
37
- linkage_metric (str): The linkage metric for clustering.
38
- linkage_threshold (float): The threshold for clustering.
46
+ linkage_method (str): The linkage method for clustering. Choose "auto" to optimize.
47
+ linkage_metric (str): The linkage metric for clustering. Choose "auto" to optimize.
48
+ linkage_threshold (float, str): The threshold for clustering. Choose "auto" to optimize.
39
49
 
40
50
  Returns:
41
51
  pd.DataFrame: DataFrame with the primary domain for each node.
@@ -55,9 +65,8 @@ def define_domains(
55
65
  # Perform hierarchical clustering
56
66
  Z = linkage(m, method=best_linkage, metric=best_metric)
57
67
  logger.warning(
58
- f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'"
68
+ f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'\nLinkage threshold: {round(best_threshold, 3)}"
59
69
  )
60
- logger.debug(f"Optimal linkage threshold: {round(best_threshold, 3)}")
61
70
  # Calculate the optimal threshold for clustering
62
71
  max_d_optimal = np.max(Z[:, 2]) * best_threshold
63
72
  # Assign domains to the annotations matrix
@@ -209,9 +218,9 @@ def _optimize_silhouette_across_linkage_and_metrics(
209
218
  Args:
210
219
  m (np.ndarray): Data matrix.
211
220
  linkage_criterion (str): Clustering criterion.
212
- linkage_method (str): Linkage method for clustering.
213
- linkage_metric (str): Linkage metric for clustering.
214
- linkage_threshold (Union[str, float]): Threshold for clustering. Set to "auto" to optimize.
221
+ linkage_method (str): Linkage method for clustering. Choose "auto" to optimize.
222
+ linkage_metric (str): Linkage metric for clustering. Choose "auto" to optimize.
223
+ linkage_threshold (Union[str, float]): Threshold for clustering. Choose "auto" to optimize.
215
224
 
216
225
  Returns:
217
226
  Tuple[str, str, float]:
@@ -226,8 +235,8 @@ def _optimize_silhouette_across_linkage_and_metrics(
226
235
  best_overall_score = -np.inf
227
236
 
228
237
  # Set linkage methods and metrics to all combinations if "auto" is selected
229
- linkage_methods = GROUP_LINKAGE_METHODS if linkage_method == "auto" else [linkage_method]
230
- linkage_metrics = GROUP_DISTANCE_METRICS if linkage_metric == "auto" else [linkage_metric]
238
+ linkage_methods = LINKAGE_METHODS if linkage_method == "auto" else [linkage_method]
239
+ linkage_metrics = LINKAGE_METRICS if linkage_metric == "auto" else [linkage_metric]
231
240
  total_combinations = len(linkage_methods) * len(linkage_metrics)
232
241
 
233
242
  # Evaluating optimal linkage method and metric
risk/network/graph/api.py CHANGED
@@ -4,7 +4,7 @@ risk/network/graph/api
4
4
  """
5
5
 
6
6
  import copy
7
- from typing import Any, Dict
7
+ from typing import Any, Dict, Union
8
8
 
9
9
  import networkx as nx
10
10
  import pandas as pd
@@ -42,7 +42,7 @@ class GraphAPI:
42
42
  linkage_criterion: str = "distance",
43
43
  linkage_method: str = "average",
44
44
  linkage_metric: str = "yule",
45
- linkage_threshold: float = 0.2,
45
+ linkage_threshold: Union[float, str] = 0.2,
46
46
  min_cluster_size: int = 5,
47
47
  max_cluster_size: int = 1000,
48
48
  ) -> Graph:
@@ -58,9 +58,11 @@ class GraphAPI:
58
58
  impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
59
59
  prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
60
60
  linkage_criterion (str, optional): Clustering criterion for defining domains. Defaults to "distance".
61
- linkage_method (str, optional): Clustering method to use. Defaults to "average".
62
- linkage_metric (str, optional): Metric to use for calculating distances. Defaults to "yule".
63
- linkage_threshold (float, optional): Threshold for clustering. Defaults to 0.2.
61
+ linkage_method (str, optional): Clustering method to use. Choose "auto" to optimize. Defaults to "average".
62
+ linkage_metric (str, optional): Metric to use for calculating distances. Choose "auto" to optimize.
63
+ Defaults to "yule".
64
+ linkage_threshold (float, str, optional): Threshold for clustering. Choose "auto" to optimize.
65
+ Defaults to 0.2.
64
66
  min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
65
67
  max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
66
68
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: risk-network
3
- Version: 0.0.9b39
3
+ Version: 0.0.9b40
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -1,8 +1,7 @@
1
- risk/__init__.py,sha256=ewYSGDLHigkwFLI9IW6qDbQk4uS6nb3RTd-k2GCD1b0,127
2
- risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
1
+ risk/__init__.py,sha256=2Ucmxw9wGNzUhqe_QGlEi2pnGhkdOrl9wa8w-MUIfm8,127
3
2
  risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
4
3
  risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
5
- risk/annotations/annotations.py,sha256=g8ca9H49dZIqHv6Od3Dem4BIo_euy8alL3PDauT6ZJI,14088
4
+ risk/annotations/annotations.py,sha256=Sq24YBtNPMxXOvWoxqPwOJ4bsFAMIBYpVWjEvsQPtNo,14912
6
5
  risk/annotations/io.py,sha256=z1AJySsU-KL_IYuHa7j3nvuczmOHgK3WfaQ4TRunvrA,10499
7
6
  risk/log/__init__.py,sha256=7LxDysQu7doi0LAvlY2YbjN6iJH0fNknqy8lSLgeljo,217
8
7
  risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
@@ -10,13 +9,13 @@ risk/log/parameters.py,sha256=VtwfMzLU1xI4yji3-Ch5vHjH-KdwTfwaEMmi7hFQTs0,5716
10
9
  risk/neighborhoods/__init__.py,sha256=Q74HwTH7okI-vaskJPy2bYwb5sNjGASTzJ6m8V8arCU,234
11
10
  risk/neighborhoods/api.py,sha256=ywngw2TQVV27gYlWDXcs8-qnmeepnvb-W9ov6J6VEPM,23341
12
11
  risk/neighborhoods/community.py,sha256=5Q_-VAJC-5SY5EUsB8gIlemeDoAL85uLjyl16pItHiQ,16699
13
- risk/neighborhoods/domains.py,sha256=Yu93mKNCuOpBGa87knAH-XIl260kf-rswPfn3aC9GNo,13937
12
+ risk/neighborhoods/domains.py,sha256=4K1tbiia3_TQKUrGdfmKVdYlRD2EEzPnMCKRv6IGxu4,14448
14
13
  risk/neighborhoods/neighborhoods.py,sha256=l9FhADB1C-OxM8E9QXOcA4osUDgA1vs4ud-OCGKKybc,21457
15
14
  risk/network/__init__.py,sha256=oVi3FA1XXKD84014Cykq-9bpX4_s0F3aAUfNOU-07Qw,73
16
15
  risk/network/geometry.py,sha256=eVtGHMgBf9fEqQZUFdHWjw-zFYYpfUONoHFSAxoRkug,6219
17
16
  risk/network/io.py,sha256=RCH4nQdgYDXcNwMfpSz7qEmPO0pJ1p9fL0rNQptsQrc,21673
18
17
  risk/network/graph/__init__.py,sha256=ziGJew3yhtqvrb9LUuneDu_LwW2Wa9vd4UuhoL5l1CA,91
19
- risk/network/graph/api.py,sha256=9yoviP7EqFU1okLJZlaLBZzFNmjOHv30B1JgDFNP1bg,8399
18
+ risk/network/graph/api.py,sha256=xS_rNDvZPdwIar2E9x9BKMeR0DcYuwcHiUpc_EcJ4-o,8536
20
19
  risk/network/graph/graph.py,sha256=qEWyZvuaGT_vvjhreBdmRPX3gst2wQFaXhFAvikPSqw,12158
21
20
  risk/network/graph/summary.py,sha256=Y_0rL2C1UoQeZQIPVe5LbaCO356Mcc8HisnrXwQsRm8,10289
22
21
  risk/network/plotter/__init__.py,sha256=4gWtQHGzQVNHmEBXi31Zf0tX0y2sTcE66J_yGnn7268,99
@@ -34,8 +33,8 @@ risk/stats/stat_tests.py,sha256=tj0ri9w89_1fsjGLuafTWpfBEwZXpSLn7Ej2aAQ5lxk,1177
34
33
  risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
35
34
  risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
36
35
  risk/stats/permutation/test_functions.py,sha256=KlECWTz1EZ6EPF_OAgHb0uznaIhopiVYb_AKUKuC4no,3120
37
- risk_network-0.0.9b39.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
38
- risk_network-0.0.9b39.dist-info/METADATA,sha256=y3xDx1OCYpCS1OgBMUTNIK5y8HFORWHag4PLnyAXc5g,47627
39
- risk_network-0.0.9b39.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
40
- risk_network-0.0.9b39.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
41
- risk_network-0.0.9b39.dist-info/RECORD,,
36
+ risk_network-0.0.9b40.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
37
+ risk_network-0.0.9b40.dist-info/METADATA,sha256=0gk-H9_4YiOCT5iykSjB89qALDejboNUa2mZy_XtLNc,47627
38
+ risk_network-0.0.9b40.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
39
+ risk_network-0.0.9b40.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
40
+ risk_network-0.0.9b40.dist-info/RECORD,,
risk/constants.py DELETED
@@ -1,31 +0,0 @@
1
- """
2
- risk/constants
3
- ~~~~~~~~~~~~~~
4
- """
5
-
6
- GROUP_LINKAGE_METHODS = ["single", "complete", "average", "weighted", "centroid", "median", "ward"]
7
-
8
- GROUP_DISTANCE_METRICS = [
9
- "braycurtis",
10
- "canberra",
11
- "chebyshev",
12
- "cityblock",
13
- "correlation",
14
- "cosine",
15
- "dice",
16
- "euclidean",
17
- "hamming",
18
- "jaccard",
19
- "jensenshannon",
20
- "kulczynski1",
21
- "mahalanobis",
22
- "matching",
23
- "minkowski",
24
- "rogerstanimoto",
25
- "russellrao",
26
- "seuclidean",
27
- "sokalmichener",
28
- "sokalsneath",
29
- "sqeuclidean",
30
- "yule",
31
- ]