risk-network 0.0.9b39__tar.gz → 0.0.9b41__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/PKG-INFO +1 -1
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/__init__.py +1 -1
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/annotations/annotations.py +73 -48
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/neighborhoods/domains.py +21 -12
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/graph/api.py +7 -5
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk_network.egg-info/PKG-INFO +1 -1
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk_network.egg-info/SOURCES.txt +0 -1
- risk_network-0.0.9b39/risk/constants.py +0 -31
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/LICENSE +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/MANIFEST.in +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/README.md +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/pyproject.toml +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/annotations/__init__.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/annotations/io.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/log/__init__.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/log/console.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/log/parameters.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/neighborhoods/__init__.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/neighborhoods/api.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/neighborhoods/community.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/neighborhoods/neighborhoods.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/__init__.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/geometry.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/graph/__init__.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/graph/graph.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/graph/summary.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/io.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/plotter/__init__.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/plotter/api.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/plotter/canvas.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/plotter/contour.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/plotter/labels.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/plotter/network.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/plotter/plotter.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/plotter/utils/colors.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/network/plotter/utils/layout.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/risk.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/stats/__init__.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/stats/permutation/__init__.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/stats/permutation/permutation.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/stats/permutation/test_functions.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/stats/significance.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk/stats/stat_tests.py +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk_network.egg-info/dependency_links.txt +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk_network.egg-info/requires.txt +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/risk_network.egg-info/top_level.txt +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/setup.cfg +0 -0
- {risk_network-0.0.9b39 → risk_network-0.0.9b41}/setup.py +0 -0
@@ -12,8 +12,9 @@ import networkx as nx
|
|
12
12
|
import nltk
|
13
13
|
import numpy as np
|
14
14
|
import pandas as pd
|
15
|
-
from nltk.tokenize import word_tokenize
|
16
15
|
from nltk.corpus import stopwords
|
16
|
+
from nltk.stem import WordNetLemmatizer
|
17
|
+
from nltk.tokenize import word_tokenize
|
17
18
|
|
18
19
|
from risk.log import logger
|
19
20
|
from scipy.sparse import coo_matrix
|
@@ -24,18 +25,25 @@ def _setup_nltk():
|
|
24
25
|
try:
|
25
26
|
nltk.data.find("tokenizers/punkt")
|
26
27
|
except LookupError:
|
27
|
-
|
28
|
+
# Force download if not found
|
29
|
+
nltk.download("punkt", force=True)
|
28
30
|
|
29
31
|
try:
|
30
32
|
nltk.data.find("corpora/stopwords")
|
31
33
|
except LookupError:
|
32
|
-
nltk.download("stopwords")
|
34
|
+
nltk.download("stopwords", force=True)
|
35
|
+
|
36
|
+
try:
|
37
|
+
nltk.data.find("corpora/wordnet")
|
38
|
+
except LookupError:
|
39
|
+
nltk.download("wordnet", force=True)
|
33
40
|
|
34
41
|
|
35
42
|
# Ensure you have the necessary NLTK data
|
36
43
|
_setup_nltk()
|
37
|
-
#
|
38
|
-
|
44
|
+
# Use NLTK's stopwords
|
45
|
+
STOP_WORDS = set(stopwords.words("english"))
|
46
|
+
LEMMATIZER = WordNetLemmatizer()
|
39
47
|
|
40
48
|
|
41
49
|
def load_annotations(
|
@@ -208,104 +216,121 @@ def define_top_annotations(
|
|
208
216
|
|
209
217
|
def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
|
210
218
|
"""Generate a weighted description from words and their corresponding scores,
|
211
|
-
|
219
|
+
using improved weighting logic with normalization, lemmatization, and aggregation.
|
212
220
|
|
213
221
|
Args:
|
214
|
-
words_column (pd.Series): A pandas Series containing strings to process.
|
222
|
+
words_column (pd.Series): A pandas Series containing strings (phrases) to process.
|
215
223
|
scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
|
216
224
|
|
217
225
|
Returns:
|
218
|
-
str: A coherent description formed from the most frequent and significant words
|
226
|
+
str: A coherent description formed from the most frequent and significant words.
|
219
227
|
"""
|
220
|
-
#
|
228
|
+
# Normalize significance scores to [0,1]. If all scores are identical, use 1.
|
221
229
|
if scores_column.max() == scores_column.min():
|
222
|
-
normalized_scores = pd.Series([1] * len(scores_column))
|
230
|
+
normalized_scores = pd.Series([1] * len(scores_column), index=scores_column.index)
|
223
231
|
else:
|
224
|
-
# Normalize the significance scores to be between 0 and 1
|
225
232
|
normalized_scores = (scores_column - scores_column.min()) / (
|
226
233
|
scores_column.max() - scores_column.min()
|
227
234
|
)
|
228
235
|
|
229
|
-
#
|
236
|
+
# Accumulate weighted counts for each token (after cleaning and lemmatization)
|
237
|
+
weighted_counts = {}
|
238
|
+
for phrase, score in zip(words_column, normalized_scores):
|
239
|
+
# Tokenize the phrase
|
240
|
+
tokens = word_tokenize(str(phrase))
|
241
|
+
# Determine the weight (scale factor; here multiplying normalized score by 10)
|
242
|
+
weight = max(1, int((0 if pd.isna(score) else score) * 10))
|
243
|
+
for token in tokens:
|
244
|
+
# Clean token: lowercase and remove extraneous punctuation (but preserve intra-word hyphens)
|
245
|
+
token_clean = re.sub(r"[^\w\-]", "", token.lower()).strip()
|
246
|
+
if not token_clean:
|
247
|
+
continue
|
248
|
+
# Skip tokens that are pure numbers
|
249
|
+
if token_clean.isdigit():
|
250
|
+
continue
|
251
|
+
# Skip stopwords
|
252
|
+
if token_clean in STOP_WORDS:
|
253
|
+
continue
|
254
|
+
# Lemmatize the token to merge similar forms
|
255
|
+
token_norm = LEMMATIZER.lemmatize(token_clean)
|
256
|
+
weighted_counts[token_norm] = weighted_counts.get(token_norm, 0) + weight
|
257
|
+
|
258
|
+
# Reconstruct a weighted token list by repeating each token by its aggregated count.
|
230
259
|
weighted_words = []
|
231
|
-
for
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
weighted_words.extend([word] * weight)
|
236
|
-
|
237
|
-
# Tokenize the weighted words, but preserve number-word patterns like '4-alpha'
|
238
|
-
tokens = word_tokenize(" ".join(weighted_words))
|
239
|
-
# Ensure we treat "4-alpha" or other "number-word" patterns as single tokens
|
260
|
+
for token, count in weighted_counts.items():
|
261
|
+
weighted_words.extend([token] * count)
|
262
|
+
|
263
|
+
# Combine tokens that match number-word patterns (e.g. "4-alpha") and remove pure numeric tokens.
|
240
264
|
combined_tokens = []
|
241
|
-
for token in
|
242
|
-
# Match patterns like '4-alpha' or '5-hydroxy' and keep them together
|
265
|
+
for token in weighted_words:
|
243
266
|
if re.match(r"^\d+-\w+", token):
|
244
267
|
combined_tokens.append(token)
|
245
|
-
elif token.replace(".", "", 1).isdigit():
|
246
|
-
# Ignore pure numbers as descriptions unless necessary
|
268
|
+
elif token.replace(".", "", 1).isdigit():
|
247
269
|
continue
|
248
270
|
else:
|
249
271
|
combined_tokens.append(token)
|
250
272
|
|
251
|
-
#
|
273
|
+
# If the only token is numeric, return a default value.
|
252
274
|
if len(combined_tokens) == 1 and combined_tokens[0].isdigit():
|
253
|
-
return "N/A"
|
275
|
+
return "N/A"
|
254
276
|
|
255
|
-
# Simplify the
|
277
|
+
# Simplify the token list to remove near-duplicates based on the Jaccard index.
|
256
278
|
simplified_words = _simplify_word_list(combined_tokens)
|
279
|
+
# Generate a coherent description from the simplified words.
|
257
280
|
description = _generate_coherent_description(simplified_words)
|
258
281
|
|
259
282
|
return description
|
260
283
|
|
261
284
|
|
262
285
|
def _simplify_word_list(words: List[str], threshold: float = 0.80) -> List[str]:
|
263
|
-
"""Filter out words that are too similar based on the Jaccard index,
|
286
|
+
"""Filter out words that are too similar based on the Jaccard index,
|
287
|
+
keeping the word with the higher aggregated count.
|
264
288
|
|
265
289
|
Args:
|
266
|
-
words (
|
290
|
+
words (List[str]): The list of tokens to be filtered.
|
267
291
|
threshold (float, optional): The similarity threshold for the Jaccard index. Defaults to 0.80.
|
268
292
|
|
269
293
|
Returns:
|
270
|
-
|
294
|
+
List[str]: A list of filtered words, where similar words are reduced to the most frequent one.
|
271
295
|
"""
|
272
|
-
# Count the occurrences
|
296
|
+
# Count the occurrences (which reflect the weighted importance)
|
273
297
|
word_counts = Counter(words)
|
274
298
|
filtered_words = []
|
275
299
|
used_words = set()
|
276
|
-
|
277
|
-
|
300
|
+
|
301
|
+
# Iterate through words sorted by descending weighted frequency
|
302
|
+
for word in sorted(word_counts, key=lambda w: word_counts[w], reverse=True):
|
278
303
|
if word in used_words:
|
279
304
|
continue
|
280
305
|
|
281
306
|
word_set = set(word)
|
282
|
-
# Find similar words based on the Jaccard index
|
307
|
+
# Find similar words (including the current word) based on the Jaccard index
|
283
308
|
similar_words = [
|
284
309
|
other_word
|
285
310
|
for other_word in word_counts
|
286
311
|
if _calculate_jaccard_index(word_set, set(other_word)) >= threshold
|
287
312
|
]
|
288
|
-
#
|
313
|
+
# Choose the word with the highest weighted count among the similar group
|
289
314
|
similar_words.sort(key=lambda w: word_counts[w], reverse=True)
|
290
315
|
best_word = similar_words[0]
|
291
316
|
filtered_words.append(best_word)
|
292
317
|
used_words.update(similar_words)
|
293
318
|
|
319
|
+
# Preserve the original order (by frequency) from the filtered set
|
294
320
|
final_words = [word for word in words if word in filtered_words]
|
295
321
|
|
296
322
|
return final_words
|
297
323
|
|
298
324
|
|
299
325
|
def _calculate_jaccard_index(set1: Set[Any], set2: Set[Any]) -> float:
|
300
|
-
"""Calculate the Jaccard
|
326
|
+
"""Calculate the Jaccard index between two sets.
|
301
327
|
|
302
328
|
Args:
|
303
|
-
set1 (
|
304
|
-
set2 (
|
329
|
+
set1 (Set[Any]): The first set.
|
330
|
+
set2 (Set[Any]): The second set.
|
305
331
|
|
306
332
|
Returns:
|
307
|
-
float: The Jaccard
|
308
|
-
Returns 0 if the union of the sets is empty.
|
333
|
+
float: The Jaccard index (intersection over union). Returns 0 if the union is empty.
|
309
334
|
"""
|
310
335
|
intersection = len(set1.intersection(set2))
|
311
336
|
union = len(set1.union(set2))
|
@@ -313,28 +338,28 @@ def _calculate_jaccard_index(set1: Set[Any], set2: Set[Any]) -> float:
|
|
313
338
|
|
314
339
|
|
315
340
|
def _generate_coherent_description(words: List[str]) -> str:
|
316
|
-
"""Generate a coherent description from a list of words
|
341
|
+
"""Generate a coherent description from a list of words.
|
342
|
+
|
317
343
|
If there is only one unique entry, return it directly.
|
344
|
+
Otherwise, order the words by frequency and join them into a single string.
|
318
345
|
|
319
346
|
Args:
|
320
|
-
words (List): A list of
|
347
|
+
words (List[str]): A list of tokens.
|
321
348
|
|
322
349
|
Returns:
|
323
|
-
str: A coherent description
|
350
|
+
str: A coherent, space-separated description.
|
324
351
|
"""
|
325
|
-
# If there are no words, return a keyword indicating no data is available
|
326
352
|
if not words:
|
327
353
|
return "N/A"
|
328
354
|
|
329
|
-
# If there
|
355
|
+
# If there is only one unique word, return it directly
|
330
356
|
unique_words = set(words)
|
331
357
|
if len(unique_words) == 1:
|
332
358
|
return list(unique_words)[0]
|
333
359
|
|
334
|
-
# Count
|
360
|
+
# Count weighted occurrences and sort in descending order.
|
335
361
|
word_counts = Counter(words)
|
336
362
|
most_common_words = [word for word, _ in word_counts.most_common()]
|
337
|
-
# Join the most common words to form a coherent description based on frequency
|
338
363
|
description = " ".join(most_common_words)
|
339
364
|
|
340
365
|
return description
|
@@ -14,17 +14,27 @@ from sklearn.metrics import silhouette_score
|
|
14
14
|
from tqdm import tqdm
|
15
15
|
|
16
16
|
from risk.annotations import get_weighted_description
|
17
|
-
from risk.constants import GROUP_LINKAGE_METHODS, GROUP_DISTANCE_METRICS
|
18
17
|
from risk.log import logger
|
19
18
|
|
20
19
|
|
20
|
+
# Define constants for clustering
|
21
|
+
# fmt: off
|
22
|
+
LINKAGE_METHODS = {"single", "complete", "average", "weighted", "centroid", "median", "ward"}
|
23
|
+
LINKAGE_METRICS = {
|
24
|
+
"braycurtis","canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
|
25
|
+
"hamming", "jaccard", "jensenshannon", "kulczynski1", "mahalanobis", "matching", "minkowski",
|
26
|
+
"rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule",
|
27
|
+
}
|
28
|
+
# fmt: on
|
29
|
+
|
30
|
+
|
21
31
|
def define_domains(
|
22
32
|
top_annotations: pd.DataFrame,
|
23
33
|
significant_neighborhoods_significance: np.ndarray,
|
24
34
|
linkage_criterion: str,
|
25
35
|
linkage_method: str,
|
26
36
|
linkage_metric: str,
|
27
|
-
linkage_threshold: float,
|
37
|
+
linkage_threshold: Union[float, str],
|
28
38
|
) -> pd.DataFrame:
|
29
39
|
"""Define domains and assign nodes to these domains based on their significance scores and clustering,
|
30
40
|
handling errors by assigning unique domains when clustering fails.
|
@@ -33,9 +43,9 @@ def define_domains(
|
|
33
43
|
top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
|
34
44
|
significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
|
35
45
|
linkage_criterion (str): The clustering criterion for defining groups.
|
36
|
-
linkage_method (str): The linkage method for clustering.
|
37
|
-
linkage_metric (str): The linkage metric for clustering.
|
38
|
-
linkage_threshold (float): The threshold for clustering.
|
46
|
+
linkage_method (str): The linkage method for clustering. Choose "auto" to optimize.
|
47
|
+
linkage_metric (str): The linkage metric for clustering. Choose "auto" to optimize.
|
48
|
+
linkage_threshold (float, str): The threshold for clustering. Choose "auto" to optimize.
|
39
49
|
|
40
50
|
Returns:
|
41
51
|
pd.DataFrame: DataFrame with the primary domain for each node.
|
@@ -55,9 +65,8 @@ def define_domains(
|
|
55
65
|
# Perform hierarchical clustering
|
56
66
|
Z = linkage(m, method=best_linkage, metric=best_metric)
|
57
67
|
logger.warning(
|
58
|
-
f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'"
|
68
|
+
f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'\nLinkage threshold: {round(best_threshold, 3)}"
|
59
69
|
)
|
60
|
-
logger.debug(f"Optimal linkage threshold: {round(best_threshold, 3)}")
|
61
70
|
# Calculate the optimal threshold for clustering
|
62
71
|
max_d_optimal = np.max(Z[:, 2]) * best_threshold
|
63
72
|
# Assign domains to the annotations matrix
|
@@ -209,9 +218,9 @@ def _optimize_silhouette_across_linkage_and_metrics(
|
|
209
218
|
Args:
|
210
219
|
m (np.ndarray): Data matrix.
|
211
220
|
linkage_criterion (str): Clustering criterion.
|
212
|
-
linkage_method (str): Linkage method for clustering.
|
213
|
-
linkage_metric (str): Linkage metric for clustering.
|
214
|
-
linkage_threshold (Union[str, float]): Threshold for clustering.
|
221
|
+
linkage_method (str): Linkage method for clustering. Choose "auto" to optimize.
|
222
|
+
linkage_metric (str): Linkage metric for clustering. Choose "auto" to optimize.
|
223
|
+
linkage_threshold (Union[str, float]): Threshold for clustering. Choose "auto" to optimize.
|
215
224
|
|
216
225
|
Returns:
|
217
226
|
Tuple[str, str, float]:
|
@@ -226,8 +235,8 @@ def _optimize_silhouette_across_linkage_and_metrics(
|
|
226
235
|
best_overall_score = -np.inf
|
227
236
|
|
228
237
|
# Set linkage methods and metrics to all combinations if "auto" is selected
|
229
|
-
linkage_methods =
|
230
|
-
linkage_metrics =
|
238
|
+
linkage_methods = LINKAGE_METHODS if linkage_method == "auto" else [linkage_method]
|
239
|
+
linkage_metrics = LINKAGE_METRICS if linkage_metric == "auto" else [linkage_metric]
|
231
240
|
total_combinations = len(linkage_methods) * len(linkage_metrics)
|
232
241
|
|
233
242
|
# Evaluating optimal linkage method and metric
|
@@ -4,7 +4,7 @@ risk/network/graph/api
|
|
4
4
|
"""
|
5
5
|
|
6
6
|
import copy
|
7
|
-
from typing import Any, Dict
|
7
|
+
from typing import Any, Dict, Union
|
8
8
|
|
9
9
|
import networkx as nx
|
10
10
|
import pandas as pd
|
@@ -42,7 +42,7 @@ class GraphAPI:
|
|
42
42
|
linkage_criterion: str = "distance",
|
43
43
|
linkage_method: str = "average",
|
44
44
|
linkage_metric: str = "yule",
|
45
|
-
linkage_threshold: float = 0.2,
|
45
|
+
linkage_threshold: Union[float, str] = 0.2,
|
46
46
|
min_cluster_size: int = 5,
|
47
47
|
max_cluster_size: int = 1000,
|
48
48
|
) -> Graph:
|
@@ -58,9 +58,11 @@ class GraphAPI:
|
|
58
58
|
impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
|
59
59
|
prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
|
60
60
|
linkage_criterion (str, optional): Clustering criterion for defining domains. Defaults to "distance".
|
61
|
-
linkage_method (str, optional): Clustering method to use. Defaults to "average".
|
62
|
-
linkage_metric (str, optional): Metric to use for calculating distances.
|
63
|
-
|
61
|
+
linkage_method (str, optional): Clustering method to use. Choose "auto" to optimize. Defaults to "average".
|
62
|
+
linkage_metric (str, optional): Metric to use for calculating distances. Choose "auto" to optimize.
|
63
|
+
Defaults to "yule".
|
64
|
+
linkage_threshold (float, str, optional): Threshold for clustering. Choose "auto" to optimize.
|
65
|
+
Defaults to 0.2.
|
64
66
|
min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
|
65
67
|
max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
|
66
68
|
|
@@ -1,31 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
risk/constants
|
3
|
-
~~~~~~~~~~~~~~
|
4
|
-
"""
|
5
|
-
|
6
|
-
GROUP_LINKAGE_METHODS = ["single", "complete", "average", "weighted", "centroid", "median", "ward"]
|
7
|
-
|
8
|
-
GROUP_DISTANCE_METRICS = [
|
9
|
-
"braycurtis",
|
10
|
-
"canberra",
|
11
|
-
"chebyshev",
|
12
|
-
"cityblock",
|
13
|
-
"correlation",
|
14
|
-
"cosine",
|
15
|
-
"dice",
|
16
|
-
"euclidean",
|
17
|
-
"hamming",
|
18
|
-
"jaccard",
|
19
|
-
"jensenshannon",
|
20
|
-
"kulczynski1",
|
21
|
-
"mahalanobis",
|
22
|
-
"matching",
|
23
|
-
"minkowski",
|
24
|
-
"rogerstanimoto",
|
25
|
-
"russellrao",
|
26
|
-
"seuclidean",
|
27
|
-
"sokalmichener",
|
28
|
-
"sokalsneath",
|
29
|
-
"sqeuclidean",
|
30
|
-
"yule",
|
31
|
-
]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|