risk-network 0.0.9b37__py3-none-any.whl → 0.0.9b38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.9-beta.37"
10
+ __version__ = "0.0.9-beta.38"
@@ -10,6 +10,7 @@ from typing import Tuple, Union
10
10
  import numpy as np
11
11
  import pandas as pd
12
12
  from scipy.cluster.hierarchy import linkage, fcluster
13
+ from scipy.optimize import minimize_scalar
13
14
  from sklearn.metrics import calinski_harabasz_score, davies_bouldin_score, silhouette_score
14
15
  from tqdm import tqdm
15
16
 
@@ -42,8 +43,8 @@ def define_domains(
42
43
  clustering or "maxclust" for a fixed number of clusters. Use "off" to skip clustering.
43
44
  linkage_method (str): The linkage method for clustering. Use "auto" to try multiple methods.
44
45
  linkage_metric (str): The linkage metric for clustering. Use "auto" to try multiple metrics.
45
- linkage_threshold (float): The linkage threshold for clustering, or one of "silhouette", "calinski_harabasz",
46
- or "davies_bouldin" to optimize the threshold.
46
+ linkage_threshold (str, float): The linkage threshold for clustering, or one of "silhouette",
47
+ "calinski_harabasz", or "davies_bouldin" to optimize the threshold.
47
48
 
48
49
  Returns:
49
50
  pd.DataFrame: DataFrame with the primary domain for each node.
@@ -229,7 +230,7 @@ def _optimize_linkage_threshold_across_methods_and_metrics(
229
230
  linkage_criterion (str): Criterion for fcluster (typically "distance").
230
231
  linkage_method (str): Linkage method for clustering, or "auto" to try multiple methods.
231
232
  linkage_metric (str): Distance metric for clustering, or "auto" to try multiple metrics.
232
- linkage_threshold (Union[str, float]): Either a numeric threshold or one of
233
+ linkage_threshold (str, float): Either a numeric threshold or one of the following keywords:
233
234
  "silhouette", "calinski_harabasz", or "davies_bouldin" to trigger optimization.
234
235
 
235
236
  Returns:
@@ -306,86 +307,62 @@ def _find_optimal_linkage_threshold(
306
307
  linkage_criterion: str,
307
308
  opt_metric: str = "silhouette",
308
309
  ) -> Tuple[float, float]:
309
- """Find the optimal linkage threshold coefficient (multiplier for np.max(Z[:, 2])) via binary search.
310
- The threshold is chosen to optimize clustering quality, as measured by the chosen metric.
310
+ """Find the optimal linkage threshold coefficient for hierarchical clustering. The function optimizes
311
+ the threshold value using the specified metric (opt_metric).
311
312
 
312
313
  Args:
313
- Z (np.ndarray): Linkage matrix.
314
- m (np.ndarray): Data matrix.
315
- linkage_metric (str): The metric used for clustering quality calculation.
316
- linkage_criterion (str): The criterion to pass to fcluster (typically "distance").
317
- opt_metric (str): The metric to optimize; one of "silhouette", "calinski_harabasz", or "davies_bouldin".
318
- For "silhouette" and "calinski_harabasz", higher is better; for "davies_bouldin", lower is better.
314
+ Z (np.ndarray): Linkage matrix generated by a hierarchical clustering algorithm.
315
+ m (np.ndarray): Data matrix used for clustering.
316
+ linkage_metric (str): Metric used to calculate distances between data points
317
+ (e.g., "euclidean" or "cosine").
318
+ linkage_criterion (str): Criterion to pass to `fcluster`, typically "distance".
319
+ opt_metric (str, optional): Metric to optimize clustering quality. Options are:
320
+ "silhouette", "calinski_harabasz", or "davies_bouldin". Defaults to "silhouette".
319
321
 
320
322
  Returns:
321
323
  Tuple[float, float]:
322
- - best_threshold: The optimal linkage coefficient for np.max(Z[:, 2]).
323
- - best_metric_value: The clustering quality metric achieved (with higher being better for
324
- "silhouette" and "calinski_harabasz", and lower for "davies_bouldin").
324
+ - best_threshold (float): The optimal linkage threshold coefficient.
325
+ - best_metric_value (float): The value of the clustering quality metric achieved
326
+ at the optimal threshold (higher for "silhouette" and "calinski_harabasz",
327
+ lower for "davies_bouldin").
328
+
329
+ Raises:
330
+ ValueError: If the `opt_metric` argument is not one of the supported metrics.
325
331
  """
332
+ # Get the maximum distance in the linkage matrix
326
333
  max_d = np.max(Z[:, 2])
327
- lower_bound = 0.0
328
- upper_bound = 1.0
329
334
  resolution = 1e-6
330
335
 
331
336
  def compute_objective(coefficient: float) -> float:
332
- """Compute the objective function for a given linkage threshold coefficient."""
337
+ """Compute the objective function for optimization."""
333
338
  threshold_val = coefficient * max_d
334
339
  clusters = fcluster(Z, threshold_val, criterion=linkage_criterion)
335
340
  unique_clusters = np.unique(clusters)
336
- # Return a heavy penalty if the clustering is trivial.
337
341
  if len(unique_clusters) <= 1 or len(unique_clusters) == m.shape[0]:
338
342
  return 1e6
339
343
  try:
340
344
  if opt_metric == "silhouette":
341
345
  score = silhouette_score(m, clusters, metric=linkage_metric)
342
- return -score # We want to maximize the silhouette score.
346
+ return -score # We want to maximize the score.
343
347
  elif opt_metric == "calinski_harabasz":
344
348
  score = calinski_harabasz_score(m, clusters)
345
- return -score # Higher is better.
349
+ return -score
346
350
  elif opt_metric == "davies_bouldin":
347
351
  score = davies_bouldin_score(m, clusters)
348
- return score # Lower is better.
352
+ return score
349
353
  else:
350
- raise LinkageThresholdError(f"Unknown optimization metric: {opt_metric}.")
354
+ raise ValueError(f"Unknown optimization metric: {opt_metric}.")
351
355
  except Exception:
352
356
  return 1e6
353
357
 
354
- # Initialize the bounds and the best objective value
355
- obj_lower = compute_objective(lower_bound)
356
- obj_upper = compute_objective(upper_bound)
357
- # Determine the initial direction of the search
358
- if obj_lower < obj_upper:
359
- best_obj = obj_lower
360
- best_threshold = lower_bound
361
- lower = lower_bound
362
- upper = (lower_bound + upper_bound) / 2
363
- else:
364
- best_obj = obj_upper
365
- best_threshold = upper_bound
366
- lower = (lower_bound + upper_bound) / 2
367
- upper = upper_bound
368
-
369
- # Perform binary search to find the optimal linkage threshold coefficient
370
- while (upper - lower) > resolution:
371
- mid = (upper + lower) / 2
372
- obj_mid = compute_objective(mid)
373
- if obj_mid < best_obj:
374
- best_obj = obj_mid
375
- best_threshold = mid
376
-
377
- # Update the bounds based on the objective value
378
- obj_left = compute_objective((lower + mid) / 2)
379
- obj_right = compute_objective((mid + upper) / 2)
380
- if obj_left < obj_right:
381
- upper = mid
382
- else:
383
- lower = mid
358
+ # Optimize the threshold using the specified metric
359
+ res = minimize_scalar(
360
+ compute_objective, bounds=(0.0, 1.0), method="bounded", options={"xatol": resolution}
361
+ )
384
362
 
385
- # If the optimization metric is silhouette or calinski_harabasz, return the negative value
386
- if opt_metric in ["silhouette", "calinski_harabasz"]:
387
- best_metric_value = -best_obj
388
- else:
389
- best_metric_value = best_obj
363
+ best_threshold = res.x
364
+ best_obj = res.fun
365
+ # For silhouette and calinski_harabasz, the objective was negative.
366
+ best_metric_value = -best_obj if opt_metric in ["silhouette", "calinski_harabasz"] else best_obj
390
367
 
391
368
  return best_threshold, float(best_metric_value)
risk/network/graph/api.py CHANGED
@@ -58,9 +58,12 @@ class GraphAPI:
58
58
  impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
59
59
  prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
60
60
  linkage_criterion (str, optional): Clustering criterion for defining domains. Defaults to "distance".
61
- linkage_method (str, optional): Clustering method to use. Defaults to "average".
62
- linkage_metric (str, optional): Metric to use for calculating distances. Defaults to "yule".
63
- linkage_threshold (float, optional): Threshold for clustering. Defaults to 0.2.
61
+ linkage_method (str, optional): Clustering method to use. Defaults to "average". Choose "auto"
62
+ to automatically select the best linkage method.
63
+ linkage_metric (str, optional): Metric to use for calculating distances. Defaults to "yule". Choose "auto"
64
+ to automatically select the best linkage metric.
65
+ linkage_threshold (str, float, optional): Threshold for clustering. Choose "silhouette", "calinski_harabasz",
66
+ or "davies_bouldin" to automatically select the best threshold. Defaults to 0.2.
64
67
  min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
65
68
  max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
66
69
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: risk-network
3
- Version: 0.0.9b37
3
+ Version: 0.0.9b38
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -1,4 +1,4 @@
1
- risk/__init__.py,sha256=T82k1TIYSqPy3KdA5kWb-5ZtqtksCWdHZ4e5vTYhTSY,127
1
+ risk/__init__.py,sha256=S22dtKjPn7IvWeLakeN4IajRLsqHuSbBhjvf6z5kHoo,127
2
2
  risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
3
3
  risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
4
4
  risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
@@ -10,13 +10,13 @@ risk/log/parameters.py,sha256=VtwfMzLU1xI4yji3-Ch5vHjH-KdwTfwaEMmi7hFQTs0,5716
10
10
  risk/neighborhoods/__init__.py,sha256=Q74HwTH7okI-vaskJPy2bYwb5sNjGASTzJ6m8V8arCU,234
11
11
  risk/neighborhoods/api.py,sha256=ywngw2TQVV27gYlWDXcs8-qnmeepnvb-W9ov6J6VEPM,23341
12
12
  risk/neighborhoods/community.py,sha256=5Q_-VAJC-5SY5EUsB8gIlemeDoAL85uLjyl16pItHiQ,16699
13
- risk/neighborhoods/domains.py,sha256=lMEKwZaOcFIVFqCPPgER1GQQR_ANIWaNk5m2zRdUH08,16774
13
+ risk/neighborhoods/domains.py,sha256=yaSHymGfRJVuXHIa7BwoKzvIRSg5oLhNoOMg0tsVqV8,15961
14
14
  risk/neighborhoods/neighborhoods.py,sha256=l9FhADB1C-OxM8E9QXOcA4osUDgA1vs4ud-OCGKKybc,21457
15
15
  risk/network/__init__.py,sha256=oVi3FA1XXKD84014Cykq-9bpX4_s0F3aAUfNOU-07Qw,73
16
16
  risk/network/geometry.py,sha256=eVtGHMgBf9fEqQZUFdHWjw-zFYYpfUONoHFSAxoRkug,6219
17
17
  risk/network/io.py,sha256=RCH4nQdgYDXcNwMfpSz7qEmPO0pJ1p9fL0rNQptsQrc,21673
18
18
  risk/network/graph/__init__.py,sha256=ziGJew3yhtqvrb9LUuneDu_LwW2Wa9vd4UuhoL5l1CA,91
19
- risk/network/graph/api.py,sha256=9yoviP7EqFU1okLJZlaLBZzFNmjOHv30B1JgDFNP1bg,8399
19
+ risk/network/graph/api.py,sha256=fOyd-5rRnqmtquproP90scehewd0UtOVZS65hCuwasI,8684
20
20
  risk/network/graph/graph.py,sha256=qEWyZvuaGT_vvjhreBdmRPX3gst2wQFaXhFAvikPSqw,12158
21
21
  risk/network/graph/summary.py,sha256=Y_0rL2C1UoQeZQIPVe5LbaCO356Mcc8HisnrXwQsRm8,10289
22
22
  risk/network/plotter/__init__.py,sha256=4gWtQHGzQVNHmEBXi31Zf0tX0y2sTcE66J_yGnn7268,99
@@ -34,8 +34,8 @@ risk/stats/stat_tests.py,sha256=tj0ri9w89_1fsjGLuafTWpfBEwZXpSLn7Ej2aAQ5lxk,1177
34
34
  risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
35
35
  risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
36
36
  risk/stats/permutation/test_functions.py,sha256=KlECWTz1EZ6EPF_OAgHb0uznaIhopiVYb_AKUKuC4no,3120
37
- risk_network-0.0.9b37.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
38
- risk_network-0.0.9b37.dist-info/METADATA,sha256=5tQlDkFQSqNgXhv-_TZSfraFKAseXh7LCHciBQIR4CQ,47627
39
- risk_network-0.0.9b37.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
40
- risk_network-0.0.9b37.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
41
- risk_network-0.0.9b37.dist-info/RECORD,,
37
+ risk_network-0.0.9b38.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
38
+ risk_network-0.0.9b38.dist-info/METADATA,sha256=mAB2KQoRWeOH13radrcMeW5dalpkPkl4YtjfUpQhJXI,47627
39
+ risk_network-0.0.9b38.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
40
+ risk_network-0.0.9b38.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
41
+ risk_network-0.0.9b38.dist-info/RECORD,,