risk-network 0.0.9b37__py3-none-any.whl → 0.0.9b38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/neighborhoods/domains.py +34 -57
- risk/network/graph/api.py +6 -3
- {risk_network-0.0.9b37.dist-info → risk_network-0.0.9b38.dist-info}/METADATA +1 -1
- {risk_network-0.0.9b37.dist-info → risk_network-0.0.9b38.dist-info}/RECORD +8 -8
- {risk_network-0.0.9b37.dist-info → risk_network-0.0.9b38.dist-info}/LICENSE +0 -0
- {risk_network-0.0.9b37.dist-info → risk_network-0.0.9b38.dist-info}/WHEEL +0 -0
- {risk_network-0.0.9b37.dist-info → risk_network-0.0.9b38.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/neighborhoods/domains.py
CHANGED
@@ -10,6 +10,7 @@ from typing import Tuple, Union
|
|
10
10
|
import numpy as np
|
11
11
|
import pandas as pd
|
12
12
|
from scipy.cluster.hierarchy import linkage, fcluster
|
13
|
+
from scipy.optimize import minimize_scalar
|
13
14
|
from sklearn.metrics import calinski_harabasz_score, davies_bouldin_score, silhouette_score
|
14
15
|
from tqdm import tqdm
|
15
16
|
|
@@ -42,8 +43,8 @@ def define_domains(
|
|
42
43
|
clustering or "maxclust" for a fixed number of clusters. Use "off" to skip clustering.
|
43
44
|
linkage_method (str): The linkage method for clustering. Use "auto" to try multiple methods.
|
44
45
|
linkage_metric (str): The linkage metric for clustering. Use "auto" to try multiple metrics.
|
45
|
-
linkage_threshold (float): The linkage threshold for clustering, or one of "silhouette",
|
46
|
-
or "davies_bouldin" to optimize the threshold.
|
46
|
+
linkage_threshold (str, float): The linkage threshold for clustering, or one of "silhouette",
|
47
|
+
"calinski_harabasz", or "davies_bouldin" to optimize the threshold.
|
47
48
|
|
48
49
|
Returns:
|
49
50
|
pd.DataFrame: DataFrame with the primary domain for each node.
|
@@ -229,7 +230,7 @@ def _optimize_linkage_threshold_across_methods_and_metrics(
|
|
229
230
|
linkage_criterion (str): Criterion for fcluster (typically "distance").
|
230
231
|
linkage_method (str): Linkage method for clustering, or "auto" to try multiple methods.
|
231
232
|
linkage_metric (str): Distance metric for clustering, or "auto" to try multiple metrics.
|
232
|
-
linkage_threshold (
|
233
|
+
linkage_threshold (str, float): Either a numeric threshold or one of the following keywords:
|
233
234
|
"silhouette", "calinski_harabasz", or "davies_bouldin" to trigger optimization.
|
234
235
|
|
235
236
|
Returns:
|
@@ -306,86 +307,62 @@ def _find_optimal_linkage_threshold(
|
|
306
307
|
linkage_criterion: str,
|
307
308
|
opt_metric: str = "silhouette",
|
308
309
|
) -> Tuple[float, float]:
|
309
|
-
"""Find the optimal linkage threshold coefficient
|
310
|
-
|
310
|
+
"""Find the optimal linkage threshold coefficient for hierarchical clustering. The function optimizes
|
311
|
+
the threshold value using the specified metric (opt_metric).
|
311
312
|
|
312
313
|
Args:
|
313
|
-
Z (np.ndarray): Linkage matrix.
|
314
|
-
m (np.ndarray): Data matrix.
|
315
|
-
linkage_metric (str):
|
316
|
-
|
317
|
-
|
318
|
-
|
314
|
+
Z (np.ndarray): Linkage matrix generated by a hierarchical clustering algorithm.
|
315
|
+
m (np.ndarray): Data matrix used for clustering.
|
316
|
+
linkage_metric (str): Metric used to calculate distances between data points
|
317
|
+
(e.g., "euclidean" or "cosine").
|
318
|
+
linkage_criterion (str): Criterion to pass to `fcluster`, typically "distance".
|
319
|
+
opt_metric (str, optional): Metric to optimize clustering quality. Options are:
|
320
|
+
"silhouette", "calinski_harabasz", or "davies_bouldin". Defaults to "silhouette".
|
319
321
|
|
320
322
|
Returns:
|
321
323
|
Tuple[float, float]:
|
322
|
-
- best_threshold: The optimal linkage coefficient
|
323
|
-
- best_metric_value: The clustering quality metric achieved
|
324
|
-
"silhouette" and "calinski_harabasz",
|
324
|
+
- best_threshold (float): The optimal linkage threshold coefficient.
|
325
|
+
- best_metric_value (float): The value of the clustering quality metric achieved
|
326
|
+
at the optimal threshold (higher for "silhouette" and "calinski_harabasz",
|
327
|
+
lower for "davies_bouldin").
|
328
|
+
|
329
|
+
Raises:
|
330
|
+
ValueError: If the `opt_metric` argument is not one of the supported metrics.
|
325
331
|
"""
|
332
|
+
# Get the maximum distance in the linkage matrix
|
326
333
|
max_d = np.max(Z[:, 2])
|
327
|
-
lower_bound = 0.0
|
328
|
-
upper_bound = 1.0
|
329
334
|
resolution = 1e-6
|
330
335
|
|
331
336
|
def compute_objective(coefficient: float) -> float:
|
332
|
-
"""Compute the objective function for
|
337
|
+
"""Compute the objective function for optimization."""
|
333
338
|
threshold_val = coefficient * max_d
|
334
339
|
clusters = fcluster(Z, threshold_val, criterion=linkage_criterion)
|
335
340
|
unique_clusters = np.unique(clusters)
|
336
|
-
# Return a heavy penalty if the clustering is trivial.
|
337
341
|
if len(unique_clusters) <= 1 or len(unique_clusters) == m.shape[0]:
|
338
342
|
return 1e6
|
339
343
|
try:
|
340
344
|
if opt_metric == "silhouette":
|
341
345
|
score = silhouette_score(m, clusters, metric=linkage_metric)
|
342
|
-
return -score # We want to maximize the
|
346
|
+
return -score # We want to maximize the score.
|
343
347
|
elif opt_metric == "calinski_harabasz":
|
344
348
|
score = calinski_harabasz_score(m, clusters)
|
345
|
-
return -score
|
349
|
+
return -score
|
346
350
|
elif opt_metric == "davies_bouldin":
|
347
351
|
score = davies_bouldin_score(m, clusters)
|
348
|
-
return score
|
352
|
+
return score
|
349
353
|
else:
|
350
|
-
raise
|
354
|
+
raise ValueError(f"Unknown optimization metric: {opt_metric}.")
|
351
355
|
except Exception:
|
352
356
|
return 1e6
|
353
357
|
|
354
|
-
#
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
if obj_lower < obj_upper:
|
359
|
-
best_obj = obj_lower
|
360
|
-
best_threshold = lower_bound
|
361
|
-
lower = lower_bound
|
362
|
-
upper = (lower_bound + upper_bound) / 2
|
363
|
-
else:
|
364
|
-
best_obj = obj_upper
|
365
|
-
best_threshold = upper_bound
|
366
|
-
lower = (lower_bound + upper_bound) / 2
|
367
|
-
upper = upper_bound
|
368
|
-
|
369
|
-
# Perform binary search to find the optimal linkage threshold coefficient
|
370
|
-
while (upper - lower) > resolution:
|
371
|
-
mid = (upper + lower) / 2
|
372
|
-
obj_mid = compute_objective(mid)
|
373
|
-
if obj_mid < best_obj:
|
374
|
-
best_obj = obj_mid
|
375
|
-
best_threshold = mid
|
376
|
-
|
377
|
-
# Update the bounds based on the objective value
|
378
|
-
obj_left = compute_objective((lower + mid) / 2)
|
379
|
-
obj_right = compute_objective((mid + upper) / 2)
|
380
|
-
if obj_left < obj_right:
|
381
|
-
upper = mid
|
382
|
-
else:
|
383
|
-
lower = mid
|
358
|
+
# Optimize the threshold using the specified metric
|
359
|
+
res = minimize_scalar(
|
360
|
+
compute_objective, bounds=(0.0, 1.0), method="bounded", options={"xatol": resolution}
|
361
|
+
)
|
384
362
|
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
else
|
389
|
-
best_metric_value = best_obj
|
363
|
+
best_threshold = res.x
|
364
|
+
best_obj = res.fun
|
365
|
+
# For silhouette and calinski_harabasz, the objective was negative.
|
366
|
+
best_metric_value = -best_obj if opt_metric in ["silhouette", "calinski_harabasz"] else best_obj
|
390
367
|
|
391
368
|
return best_threshold, float(best_metric_value)
|
risk/network/graph/api.py
CHANGED
@@ -58,9 +58,12 @@ class GraphAPI:
|
|
58
58
|
impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
|
59
59
|
prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
|
60
60
|
linkage_criterion (str, optional): Clustering criterion for defining domains. Defaults to "distance".
|
61
|
-
linkage_method (str, optional): Clustering method to use. Defaults to "average".
|
62
|
-
|
63
|
-
|
61
|
+
linkage_method (str, optional): Clustering method to use. Defaults to "average". Choose "auto"
|
62
|
+
to automatically select the best linkage method.
|
63
|
+
linkage_metric (str, optional): Metric to use for calculating distances. Defaults to "yule". Choose "auto"
|
64
|
+
to automatically select the best linkage metric.
|
65
|
+
linkage_threshold (str, float, optional): Threshold for clustering. Choose "silhouette", "calinski_harabasz",
|
66
|
+
or "davies_bouldin" to automatically select the best threshold. Defaults to 0.2.
|
64
67
|
min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
|
65
68
|
max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
|
66
69
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
risk/__init__.py,sha256=
|
1
|
+
risk/__init__.py,sha256=S22dtKjPn7IvWeLakeN4IajRLsqHuSbBhjvf6z5kHoo,127
|
2
2
|
risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
|
3
3
|
risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
|
4
4
|
risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
|
@@ -10,13 +10,13 @@ risk/log/parameters.py,sha256=VtwfMzLU1xI4yji3-Ch5vHjH-KdwTfwaEMmi7hFQTs0,5716
|
|
10
10
|
risk/neighborhoods/__init__.py,sha256=Q74HwTH7okI-vaskJPy2bYwb5sNjGASTzJ6m8V8arCU,234
|
11
11
|
risk/neighborhoods/api.py,sha256=ywngw2TQVV27gYlWDXcs8-qnmeepnvb-W9ov6J6VEPM,23341
|
12
12
|
risk/neighborhoods/community.py,sha256=5Q_-VAJC-5SY5EUsB8gIlemeDoAL85uLjyl16pItHiQ,16699
|
13
|
-
risk/neighborhoods/domains.py,sha256=
|
13
|
+
risk/neighborhoods/domains.py,sha256=yaSHymGfRJVuXHIa7BwoKzvIRSg5oLhNoOMg0tsVqV8,15961
|
14
14
|
risk/neighborhoods/neighborhoods.py,sha256=l9FhADB1C-OxM8E9QXOcA4osUDgA1vs4ud-OCGKKybc,21457
|
15
15
|
risk/network/__init__.py,sha256=oVi3FA1XXKD84014Cykq-9bpX4_s0F3aAUfNOU-07Qw,73
|
16
16
|
risk/network/geometry.py,sha256=eVtGHMgBf9fEqQZUFdHWjw-zFYYpfUONoHFSAxoRkug,6219
|
17
17
|
risk/network/io.py,sha256=RCH4nQdgYDXcNwMfpSz7qEmPO0pJ1p9fL0rNQptsQrc,21673
|
18
18
|
risk/network/graph/__init__.py,sha256=ziGJew3yhtqvrb9LUuneDu_LwW2Wa9vd4UuhoL5l1CA,91
|
19
|
-
risk/network/graph/api.py,sha256=
|
19
|
+
risk/network/graph/api.py,sha256=fOyd-5rRnqmtquproP90scehewd0UtOVZS65hCuwasI,8684
|
20
20
|
risk/network/graph/graph.py,sha256=qEWyZvuaGT_vvjhreBdmRPX3gst2wQFaXhFAvikPSqw,12158
|
21
21
|
risk/network/graph/summary.py,sha256=Y_0rL2C1UoQeZQIPVe5LbaCO356Mcc8HisnrXwQsRm8,10289
|
22
22
|
risk/network/plotter/__init__.py,sha256=4gWtQHGzQVNHmEBXi31Zf0tX0y2sTcE66J_yGnn7268,99
|
@@ -34,8 +34,8 @@ risk/stats/stat_tests.py,sha256=tj0ri9w89_1fsjGLuafTWpfBEwZXpSLn7Ej2aAQ5lxk,1177
|
|
34
34
|
risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
|
35
35
|
risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
|
36
36
|
risk/stats/permutation/test_functions.py,sha256=KlECWTz1EZ6EPF_OAgHb0uznaIhopiVYb_AKUKuC4no,3120
|
37
|
-
risk_network-0.0.
|
38
|
-
risk_network-0.0.
|
39
|
-
risk_network-0.0.
|
40
|
-
risk_network-0.0.
|
41
|
-
risk_network-0.0.
|
37
|
+
risk_network-0.0.9b38.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
38
|
+
risk_network-0.0.9b38.dist-info/METADATA,sha256=mAB2KQoRWeOH13radrcMeW5dalpkPkl4YtjfUpQhJXI,47627
|
39
|
+
risk_network-0.0.9b38.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
40
|
+
risk_network-0.0.9b38.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
41
|
+
risk_network-0.0.9b38.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|