risk-network 0.0.16b1__py3-none-any.whl → 0.0.16b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +2 -2
- risk/{_annotation → annotation}/__init__.py +2 -2
- risk/{_annotation → annotation}/_nltk_setup.py +3 -3
- risk/{_annotation/_annotation.py → annotation/annotation.py} +13 -13
- risk/{_annotation/_io.py → annotation/io.py} +4 -4
- risk/cluster/__init__.py +8 -0
- risk/{_neighborhoods → cluster}/_community.py +37 -37
- risk/cluster/api.py +273 -0
- risk/{_neighborhoods/_neighborhoods.py → cluster/cluster.py} +127 -98
- risk/{_neighborhoods/_domains.py → cluster/label.py} +18 -12
- risk/{_log → log}/__init__.py +2 -2
- risk/{_log/_console.py → log/console.py} +2 -2
- risk/{_log/_parameters.py → log/parameters.py} +20 -10
- risk/network/__init__.py +8 -0
- risk/network/graph/__init__.py +7 -0
- risk/{_network/_graph → network/graph}/_stats.py +2 -2
- risk/{_network/_graph → network/graph}/_summary.py +13 -13
- risk/{_network/_graph/_api.py → network/graph/api.py} +37 -39
- risk/{_network/_graph/_graph.py → network/graph/graph.py} +5 -5
- risk/{_network/_io.py → network/io.py} +9 -4
- risk/network/plotter/__init__.py +6 -0
- risk/{_network/_plotter → network/plotter}/_canvas.py +6 -6
- risk/{_network/_plotter → network/plotter}/_contour.py +4 -4
- risk/{_network/_plotter → network/plotter}/_labels.py +6 -6
- risk/{_network/_plotter → network/plotter}/_network.py +7 -7
- risk/{_network/_plotter → network/plotter}/_plotter.py +5 -5
- risk/network/plotter/_utils/__init__.py +7 -0
- risk/{_network/_plotter/_utils/_colors.py → network/plotter/_utils/colors.py} +3 -3
- risk/{_network/_plotter/_utils/_layout.py → network/plotter/_utils/layout.py} +2 -2
- risk/{_network/_plotter/_api.py → network/plotter/api.py} +5 -5
- risk/{_risk.py → risk.py} +9 -8
- risk/stats/__init__.py +6 -0
- risk/stats/_stats/__init__.py +11 -0
- risk/stats/_stats/permutation/__init__.py +6 -0
- risk/stats/_stats/permutation/_test_functions.py +72 -0
- risk/{_neighborhoods/_stats/_permutation/_permutation.py → stats/_stats/permutation/permutation.py} +35 -37
- risk/{_neighborhoods/_stats/_tests.py → stats/_stats/tests.py} +32 -34
- risk/stats/api.py +202 -0
- {risk_network-0.0.16b1.dist-info → risk_network-0.0.16b2.dist-info}/METADATA +2 -2
- risk_network-0.0.16b2.dist-info/RECORD +43 -0
- risk/_neighborhoods/__init__.py +0 -8
- risk/_neighborhoods/_api.py +0 -354
- risk/_neighborhoods/_stats/__init__.py +0 -11
- risk/_neighborhoods/_stats/_permutation/__init__.py +0 -6
- risk/_neighborhoods/_stats/_permutation/_test_functions.py +0 -72
- risk/_network/__init__.py +0 -8
- risk/_network/_graph/__init__.py +0 -7
- risk/_network/_plotter/__init__.py +0 -6
- risk/_network/_plotter/_utils/__init__.py +0 -7
- risk_network-0.0.16b1.dist-info/RECORD +0 -41
- {risk_network-0.0.16b1.dist-info → risk_network-0.0.16b2.dist-info}/WHEEL +0 -0
- {risk_network-0.0.16b1.dist-info → risk_network-0.0.16b2.dist-info}/licenses/LICENSE +0 -0
- {risk_network-0.0.16b1.dist-info → risk_network-0.0.16b2.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
|
-
risk/
|
|
3
|
-
|
|
2
|
+
risk/cluster/cluster
|
|
3
|
+
~~~~~~~~~~~~~~~~~~~~
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import random
|
|
@@ -13,116 +13,124 @@ from scipy.sparse import csr_matrix
|
|
|
13
13
|
from sklearn.exceptions import DataConversionWarning
|
|
14
14
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
15
15
|
|
|
16
|
-
from ..
|
|
16
|
+
from ..log import logger
|
|
17
17
|
from ._community import (
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
18
|
+
calculate_greedy_modularity_clusters,
|
|
19
|
+
calculate_label_propagation_clusters,
|
|
20
|
+
calculate_leiden_clusters,
|
|
21
|
+
calculate_louvain_clusters,
|
|
22
|
+
calculate_markov_clustering_clusters,
|
|
23
|
+
calculate_spinglass_clusters,
|
|
24
|
+
calculate_walktrap_clusters,
|
|
25
25
|
)
|
|
26
26
|
|
|
27
27
|
# Suppress DataConversionWarning
|
|
28
28
|
warnings.filterwarnings(action="ignore", category=DataConversionWarning)
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def
|
|
31
|
+
def cluster_method(func):
|
|
32
|
+
"""
|
|
33
|
+
Decorator for clustering functions to ensure deterministic, reproducible results.
|
|
34
|
+
Sets random seeds, copies the network, and ensures output is normalized.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
func (callable): The clustering function to be decorated.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
callable: The wrapped clustering function with added functionality.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def wrapper(*args, **kwargs):
|
|
44
|
+
"""
|
|
45
|
+
Wrapper function to set random seeds and normalize output.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
*args: Positional arguments for the clustering function.
|
|
49
|
+
**kwargs: Keyword arguments for the clustering function.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
csr_matrix: Sparse matrix representing cluster assignments.
|
|
53
|
+
"""
|
|
54
|
+
clusters = func(*args, **kwargs)
|
|
55
|
+
return _set_max_row_value_to_one_sparse(clusters)
|
|
56
|
+
|
|
57
|
+
return wrapper
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_network_clusters(
|
|
32
61
|
network: nx.Graph,
|
|
33
|
-
|
|
34
|
-
fraction_shortest_edges:
|
|
62
|
+
clustering: str = "louvain",
|
|
63
|
+
fraction_shortest_edges: float = 0.5,
|
|
35
64
|
louvain_resolution: float = 0.1,
|
|
36
65
|
leiden_resolution: float = 1.0,
|
|
37
66
|
random_seed: int = 888,
|
|
38
67
|
) -> csr_matrix:
|
|
39
68
|
"""
|
|
40
|
-
Calculate
|
|
69
|
+
Calculate clusters for the network using a single method.
|
|
41
70
|
|
|
42
71
|
Args:
|
|
43
72
|
network (nx.Graph): The network graph.
|
|
44
|
-
|
|
45
|
-
fraction_shortest_edges (float,
|
|
46
|
-
louvain_resolution (float, optional): Resolution
|
|
47
|
-
leiden_resolution (float, optional): Resolution
|
|
48
|
-
random_seed (int, optional): Random seed
|
|
73
|
+
clustering (str, optional): The clustering method ('greedy', 'labelprop', 'leiden', 'louvain', 'markov', 'spinglass', 'walktrap').
|
|
74
|
+
fraction_shortest_edges (float, optional): Fraction of shortest edges to consider for creating subgraphs. Defaults to 0.5.
|
|
75
|
+
louvain_resolution (float, optional): Resolution for Louvain.
|
|
76
|
+
leiden_resolution (float, optional): Resolution for Leiden.
|
|
77
|
+
random_seed (int, optional): Random seed.
|
|
49
78
|
|
|
50
79
|
Returns:
|
|
51
|
-
csr_matrix:
|
|
80
|
+
csr_matrix: Sparse cluster matrix.
|
|
52
81
|
|
|
53
82
|
Raises:
|
|
54
|
-
ValueError: If
|
|
83
|
+
ValueError: If invalid clustering method is provided.
|
|
55
84
|
"""
|
|
56
|
-
# Set random seed for reproducibility
|
|
85
|
+
# Set random seed for cluster reproducibility
|
|
57
86
|
random.seed(random_seed)
|
|
58
87
|
np.random.seed(random_seed)
|
|
59
88
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
89
|
+
clusters = None
|
|
90
|
+
# Determine clustering method and compute clusters
|
|
91
|
+
if clustering == "greedy":
|
|
92
|
+
clusters = calculate_greedy_modularity_clusters(
|
|
93
|
+
network, fraction_shortest_edges=fraction_shortest_edges
|
|
94
|
+
)
|
|
95
|
+
elif clustering == "labelprop":
|
|
96
|
+
clusters = calculate_label_propagation_clusters(
|
|
97
|
+
network, fraction_shortest_edges=fraction_shortest_edges
|
|
98
|
+
)
|
|
99
|
+
elif clustering == "leiden":
|
|
100
|
+
clusters = calculate_leiden_clusters(
|
|
101
|
+
network,
|
|
102
|
+
resolution=leiden_resolution,
|
|
103
|
+
fraction_shortest_edges=fraction_shortest_edges,
|
|
104
|
+
random_seed=random_seed,
|
|
105
|
+
)
|
|
106
|
+
elif clustering == "louvain":
|
|
107
|
+
clusters = calculate_louvain_clusters(
|
|
108
|
+
network,
|
|
109
|
+
resolution=louvain_resolution,
|
|
110
|
+
fraction_shortest_edges=fraction_shortest_edges,
|
|
111
|
+
random_seed=random_seed,
|
|
112
|
+
)
|
|
113
|
+
elif clustering == "markov":
|
|
114
|
+
clusters = calculate_markov_clustering_clusters(
|
|
115
|
+
network, fraction_shortest_edges=fraction_shortest_edges
|
|
116
|
+
)
|
|
117
|
+
elif clustering == "spinglass":
|
|
118
|
+
clusters = calculate_spinglass_clusters(
|
|
119
|
+
network, fraction_shortest_edges=fraction_shortest_edges
|
|
120
|
+
)
|
|
121
|
+
elif clustering == "walktrap":
|
|
122
|
+
clusters = calculate_walktrap_clusters(
|
|
123
|
+
network, fraction_shortest_edges=fraction_shortest_edges
|
|
124
|
+
)
|
|
125
|
+
else:
|
|
68
126
|
raise ValueError(
|
|
69
|
-
"
|
|
127
|
+
"Invalid clustering method. Choose from: 'greedy', 'labelprop', 'leiden', 'louvain', 'markov', 'spinglass', 'walktrap'."
|
|
70
128
|
)
|
|
71
129
|
|
|
72
|
-
#
|
|
73
|
-
|
|
74
|
-
# Initialize a sparse matrix with the same shape as the network
|
|
75
|
-
combined_neighborhoods = csr_matrix((num_nodes, num_nodes), dtype=np.uint8)
|
|
76
|
-
# Loop through each distance metric and corresponding edge rank fraction
|
|
77
|
-
for metric, percentile in zip(distance_metric, fraction_shortest_edges):
|
|
78
|
-
# Compute neighborhoods for the specified metric
|
|
79
|
-
if metric == "greedy_modularity":
|
|
80
|
-
neighborhoods = calculate_greedy_modularity_neighborhoods(
|
|
81
|
-
network, fraction_shortest_edges=percentile
|
|
82
|
-
)
|
|
83
|
-
elif metric == "label_propagation":
|
|
84
|
-
neighborhoods = calculate_label_propagation_neighborhoods(
|
|
85
|
-
network, fraction_shortest_edges=percentile
|
|
86
|
-
)
|
|
87
|
-
elif metric == "leiden":
|
|
88
|
-
neighborhoods = calculate_leiden_neighborhoods(
|
|
89
|
-
network,
|
|
90
|
-
resolution=leiden_resolution,
|
|
91
|
-
fraction_shortest_edges=percentile,
|
|
92
|
-
random_seed=random_seed,
|
|
93
|
-
)
|
|
94
|
-
elif metric == "louvain":
|
|
95
|
-
neighborhoods = calculate_louvain_neighborhoods(
|
|
96
|
-
network,
|
|
97
|
-
resolution=louvain_resolution,
|
|
98
|
-
fraction_shortest_edges=percentile,
|
|
99
|
-
random_seed=random_seed,
|
|
100
|
-
)
|
|
101
|
-
elif metric == "markov_clustering":
|
|
102
|
-
neighborhoods = calculate_markov_clustering_neighborhoods(
|
|
103
|
-
network, fraction_shortest_edges=percentile
|
|
104
|
-
)
|
|
105
|
-
elif metric == "spinglass":
|
|
106
|
-
neighborhoods = calculate_spinglass_neighborhoods(
|
|
107
|
-
network, fraction_shortest_edges=percentile
|
|
108
|
-
)
|
|
109
|
-
elif metric == "walktrap":
|
|
110
|
-
neighborhoods = calculate_walktrap_neighborhoods(
|
|
111
|
-
network, fraction_shortest_edges=percentile
|
|
112
|
-
)
|
|
113
|
-
else:
|
|
114
|
-
raise ValueError(
|
|
115
|
-
"Invalid distance metric. Choose from: 'greedy_modularity', 'label_propagation',"
|
|
116
|
-
"'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
# Add the sparse neighborhood matrix
|
|
120
|
-
combined_neighborhoods += neighborhoods
|
|
121
|
-
|
|
122
|
-
# Ensure maximum value in each row is set to 1
|
|
123
|
-
combined_neighborhoods = _set_max_row_value_to_one_sparse(combined_neighborhoods)
|
|
130
|
+
# Ensure maximum per row set to 1
|
|
131
|
+
clusters = _set_max_row_value_to_one_sparse(clusters)
|
|
124
132
|
|
|
125
|
-
return
|
|
133
|
+
return clusters
|
|
126
134
|
|
|
127
135
|
|
|
128
136
|
def _set_max_row_value_to_one_sparse(matrix: csr_matrix) -> csr_matrix:
|
|
@@ -144,27 +152,29 @@ def _set_max_row_value_to_one_sparse(matrix: csr_matrix) -> csr_matrix:
|
|
|
144
152
|
return matrix
|
|
145
153
|
|
|
146
154
|
|
|
147
|
-
def
|
|
155
|
+
def process_significant_clusters(
|
|
148
156
|
network: nx.Graph,
|
|
149
|
-
|
|
157
|
+
significant_clusters: Dict[str, Any],
|
|
150
158
|
impute_depth: int = 0,
|
|
151
159
|
prune_threshold: float = 0.0,
|
|
152
160
|
) -> Dict[str, Any]:
|
|
153
161
|
"""
|
|
154
|
-
Process
|
|
162
|
+
Process clusters based on the imputation and pruning settings.
|
|
155
163
|
|
|
156
164
|
Args:
|
|
157
165
|
network (nx.Graph): The network data structure used for imputing and pruning neighbors.
|
|
158
|
-
|
|
166
|
+
significant_clusters (Dict[str, Any]): Dictionary containing 'significance_matrix', 'significant_binary_significance_matrix', and 'significant_significance_matrix'.
|
|
159
167
|
impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
|
|
160
168
|
prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
|
|
161
169
|
|
|
162
170
|
Returns:
|
|
163
|
-
Dict[str, Any]: Processed
|
|
171
|
+
Dict[str, Any]: Processed clusters data, including the updated matrices and significance counts.
|
|
164
172
|
"""
|
|
165
|
-
significance_matrix =
|
|
166
|
-
significant_binary_significance_matrix =
|
|
167
|
-
|
|
173
|
+
significance_matrix = significant_clusters["significance_matrix"]
|
|
174
|
+
significant_binary_significance_matrix = significant_clusters[
|
|
175
|
+
"significant_binary_significance_matrix"
|
|
176
|
+
]
|
|
177
|
+
significant_significance_matrix = significant_clusters["significant_significance_matrix"]
|
|
168
178
|
logger.debug(f"Imputation depth: {impute_depth}")
|
|
169
179
|
if impute_depth:
|
|
170
180
|
(
|
|
@@ -191,13 +201,13 @@ def process_neighborhoods(
|
|
|
191
201
|
distance_threshold=prune_threshold,
|
|
192
202
|
)
|
|
193
203
|
|
|
194
|
-
|
|
204
|
+
cluster_significance_counts = np.sum(significant_binary_significance_matrix, axis=0)
|
|
195
205
|
node_significance_sums = np.sum(significance_matrix, axis=1)
|
|
196
206
|
return {
|
|
197
207
|
"significance_matrix": significance_matrix,
|
|
198
208
|
"significant_binary_significance_matrix": significant_binary_significance_matrix,
|
|
199
209
|
"significant_significance_matrix": significant_significance_matrix,
|
|
200
|
-
"
|
|
210
|
+
"cluster_significance_counts": cluster_significance_counts,
|
|
201
211
|
"node_significance_sums": node_significance_sums,
|
|
202
212
|
}
|
|
203
213
|
|
|
@@ -395,6 +405,7 @@ def _prune_neighbors(
|
|
|
395
405
|
non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
|
|
396
406
|
median_distances = []
|
|
397
407
|
distance_lookup = {}
|
|
408
|
+
isolated_nodes = [] # Track nodes with no significant neighbors
|
|
398
409
|
for node in non_zero_indices:
|
|
399
410
|
dist = _median_distance_to_significant_neighbors(
|
|
400
411
|
node, network, significant_binary_significance_matrix
|
|
@@ -402,6 +413,8 @@ def _prune_neighbors(
|
|
|
402
413
|
if dist is not None:
|
|
403
414
|
median_distances.append(dist)
|
|
404
415
|
distance_lookup[node] = dist
|
|
416
|
+
else:
|
|
417
|
+
isolated_nodes.append(node) # Node has no significant neighbors
|
|
405
418
|
|
|
406
419
|
if not median_distances:
|
|
407
420
|
logger.warning("No significant neighbors found for pruning.")
|
|
@@ -422,6 +435,11 @@ def _prune_neighbors(
|
|
|
422
435
|
significance_matrix[node] = 0
|
|
423
436
|
significant_binary_significance_matrix[node] = 0
|
|
424
437
|
|
|
438
|
+
# Prune isolated nodes (no significant neighbors)
|
|
439
|
+
for node in isolated_nodes:
|
|
440
|
+
significance_matrix[node] = 0
|
|
441
|
+
significant_binary_significance_matrix[node] = 0
|
|
442
|
+
|
|
425
443
|
# Create a matrix where non-significant entries are set to zero
|
|
426
444
|
significant_significance_matrix = np.where(
|
|
427
445
|
significant_binary_significance_matrix == 1, significance_matrix, 0
|
|
@@ -436,7 +454,7 @@ def _prune_neighbors(
|
|
|
436
454
|
|
|
437
455
|
def _median_distance_to_significant_neighbors(
|
|
438
456
|
node, network, significance_mask
|
|
439
|
-
) -> Union[float, None]:
|
|
457
|
+
) -> Union[float, Any, None]:
|
|
440
458
|
"""
|
|
441
459
|
Calculate the median distance from a node to its significant neighbors.
|
|
442
460
|
|
|
@@ -448,11 +466,22 @@ def _median_distance_to_significant_neighbors(
|
|
|
448
466
|
Returns:
|
|
449
467
|
Union[float, None]: The median distance to significant neighbors, or None if no significant neighbors exist.
|
|
450
468
|
"""
|
|
451
|
-
|
|
469
|
+
# Get all neighbors at once
|
|
470
|
+
neighbors = list(network.neighbors(node))
|
|
452
471
|
if not neighbors:
|
|
453
472
|
return None
|
|
454
|
-
|
|
455
|
-
|
|
473
|
+
|
|
474
|
+
# Vectorized check for significant neighbors
|
|
475
|
+
neighbors = np.array(neighbors)
|
|
476
|
+
significant_mask = significance_mask[neighbors].sum(axis=1) != 0
|
|
477
|
+
significant_neighbors = neighbors[significant_mask]
|
|
478
|
+
if len(significant_neighbors) == 0:
|
|
479
|
+
return None
|
|
480
|
+
|
|
481
|
+
# Vectorized distance calculation
|
|
482
|
+
node_pos = _get_node_position(network, node)
|
|
483
|
+
neighbor_positions = np.array([_get_node_position(network, n) for n in significant_neighbors])
|
|
484
|
+
distances = np.linalg.norm(neighbor_positions - node_pos, axis=1)
|
|
456
485
|
|
|
457
486
|
return np.median(distances)
|
|
458
487
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
|
-
risk/
|
|
3
|
-
|
|
2
|
+
risk/cluster/label
|
|
3
|
+
~~~~~~~~~~~~~~~~~~
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from itertools import product
|
|
@@ -13,9 +13,9 @@ from scipy.cluster.hierarchy import fcluster, linkage
|
|
|
13
13
|
from sklearn.metrics import silhouette_score
|
|
14
14
|
from tqdm import tqdm
|
|
15
15
|
|
|
16
|
-
from risk.
|
|
16
|
+
from risk.annotation import get_weighted_description
|
|
17
17
|
|
|
18
|
-
from ..
|
|
18
|
+
from ..log import logger
|
|
19
19
|
|
|
20
20
|
# Define constants for clustering
|
|
21
21
|
# fmt: off
|
|
@@ -30,7 +30,7 @@ LINKAGE_METRICS = {
|
|
|
30
30
|
|
|
31
31
|
def define_domains(
|
|
32
32
|
top_annotation: pd.DataFrame,
|
|
33
|
-
|
|
33
|
+
significant_clusters_significance: np.ndarray,
|
|
34
34
|
linkage_criterion: str,
|
|
35
35
|
linkage_method: str,
|
|
36
36
|
linkage_metric: str,
|
|
@@ -42,7 +42,7 @@ def define_domains(
|
|
|
42
42
|
|
|
43
43
|
Args:
|
|
44
44
|
top_annotation (pd.DataFrame): DataFrame of top annotations data for the network nodes.
|
|
45
|
-
|
|
45
|
+
significant_clusters_significance (np.ndarray): The binary significance matrix below alpha.
|
|
46
46
|
linkage_criterion (str): The clustering criterion for defining groups. Choose "off" to disable clustering.
|
|
47
47
|
linkage_method (str): The linkage method for clustering. Choose "auto" to optimize.
|
|
48
48
|
linkage_metric (str): The linkage metric for clustering. Choose "auto" to optimize.
|
|
@@ -66,11 +66,11 @@ def define_domains(
|
|
|
66
66
|
top_annotation["domain"] = range(1, n_rows + 1)
|
|
67
67
|
else:
|
|
68
68
|
# Transpose the matrix to cluster annotations
|
|
69
|
-
m =
|
|
69
|
+
m = significant_clusters_significance[:, top_annotation["significant_annotation"]].T
|
|
70
70
|
# Safeguard the matrix by replacing NaN, Inf, and -Inf values
|
|
71
71
|
m = _safeguard_matrix(m)
|
|
72
72
|
try:
|
|
73
|
-
# Optimize silhouette score across different linkage methods and
|
|
73
|
+
# Optimize silhouette score across different linkage methods and metrics
|
|
74
74
|
(
|
|
75
75
|
best_linkage,
|
|
76
76
|
best_metric,
|
|
@@ -99,7 +99,7 @@ def define_domains(
|
|
|
99
99
|
|
|
100
100
|
# Create DataFrames to store domain information
|
|
101
101
|
node_to_significance = pd.DataFrame(
|
|
102
|
-
data=
|
|
102
|
+
data=significant_clusters_significance,
|
|
103
103
|
columns=[top_annotation.index.values, top_annotation["domain"]],
|
|
104
104
|
)
|
|
105
105
|
node_to_domain = node_to_significance.T.groupby(level="domain").sum().T
|
|
@@ -152,9 +152,9 @@ def trim_domains(
|
|
|
152
152
|
top_annotation["domain"] = top_annotation["domain"].replace(to_remove, invalid_domain_id)
|
|
153
153
|
domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
|
|
154
154
|
|
|
155
|
-
# Normalize "num significant
|
|
155
|
+
# Normalize "num significant clusters" by percentile for each domain and scale to 0-10
|
|
156
156
|
top_annotation["normalized_value"] = top_annotation.groupby("domain")[
|
|
157
|
-
"
|
|
157
|
+
"significant_cluster_significance_sums"
|
|
158
158
|
].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
|
|
159
159
|
# Modify the lambda function to pass both full_terms and significant_significance_score
|
|
160
160
|
top_annotation["combined_terms"] = top_annotation.apply(
|
|
@@ -245,6 +245,12 @@ def _safeguard_matrix(matrix: np.ndarray) -> np.ndarray:
|
|
|
245
245
|
Returns:
|
|
246
246
|
np.ndarray: Safeguarded data matrix.
|
|
247
247
|
"""
|
|
248
|
+
# Safety guard: handle empty or invalid matrices
|
|
249
|
+
if matrix.size == 0 or not np.isfinite(matrix).any():
|
|
250
|
+
logger.warning(
|
|
251
|
+
"Input matrix is empty or contains no finite values. Returning a zero matrix of same shape."
|
|
252
|
+
)
|
|
253
|
+
return np.zeros(matrix.shape, dtype=float)
|
|
248
254
|
# Replace NaN with column mean
|
|
249
255
|
nan_replacement = np.nanmean(matrix, axis=0)
|
|
250
256
|
matrix = np.where(np.isnan(matrix), nan_replacement, matrix)
|
|
@@ -267,7 +273,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
|
|
|
267
273
|
linkage_threshold: Union[str, float],
|
|
268
274
|
) -> Tuple[str, str, float]:
|
|
269
275
|
"""
|
|
270
|
-
Optimize silhouette score across different linkage methods and
|
|
276
|
+
Optimize silhouette score across different linkage methods and metrics.
|
|
271
277
|
|
|
272
278
|
Args:
|
|
273
279
|
m (np.ndarray): Data matrix.
|
risk/{_log → log}/__init__.py
RENAMED
|
@@ -3,8 +3,8 @@ risk/_log
|
|
|
3
3
|
~~~~~~~~~
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
from .
|
|
7
|
-
from .
|
|
6
|
+
from .console import log_header, logger, set_global_verbosity
|
|
7
|
+
from .parameters import Params
|
|
8
8
|
|
|
9
9
|
# Initialize the global parameters logger
|
|
10
10
|
params = Params()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
|
-
risk/
|
|
3
|
-
|
|
2
|
+
risk/log/parameters
|
|
3
|
+
~~~~~~~~~~~~~~~~~~~
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import csv
|
|
@@ -11,7 +11,7 @@ from typing import Any, Dict
|
|
|
11
11
|
|
|
12
12
|
import numpy as np
|
|
13
13
|
|
|
14
|
-
from .
|
|
14
|
+
from .console import log_header, logger
|
|
15
15
|
|
|
16
16
|
# Suppress all warnings - this is to resolve warnings from multiprocessing
|
|
17
17
|
warnings.filterwarnings("ignore")
|
|
@@ -22,7 +22,7 @@ class Params:
|
|
|
22
22
|
Handles the storage and logging of various parameters for network analysis.
|
|
23
23
|
|
|
24
24
|
The Params class provides methods to log parameters related to different components of the analysis,
|
|
25
|
-
such as the network, annotation,
|
|
25
|
+
such as the network, annotation, clusters, graph, and plotter settings. It also stores
|
|
26
26
|
the current datetime when the parameters were initialized.
|
|
27
27
|
"""
|
|
28
28
|
|
|
@@ -35,7 +35,8 @@ class Params:
|
|
|
35
35
|
"""Initialize the parameter dictionaries for different components."""
|
|
36
36
|
self.network = {}
|
|
37
37
|
self.annotation = {}
|
|
38
|
-
self.
|
|
38
|
+
self.clusters = {}
|
|
39
|
+
self.stats = {}
|
|
39
40
|
self.graph = {}
|
|
40
41
|
self.plotter = {}
|
|
41
42
|
|
|
@@ -57,14 +58,23 @@ class Params:
|
|
|
57
58
|
"""
|
|
58
59
|
self.annotation = {**self.annotation, **kwargs}
|
|
59
60
|
|
|
60
|
-
def
|
|
61
|
+
def log_clusters(self, **kwargs) -> None:
|
|
61
62
|
"""
|
|
62
|
-
Log
|
|
63
|
+
Log cluster-related parameters.
|
|
63
64
|
|
|
64
65
|
Args:
|
|
65
|
-
**kwargs:
|
|
66
|
+
**kwargs: Cluster parameters to log.
|
|
66
67
|
"""
|
|
67
|
-
self.
|
|
68
|
+
self.clusters = {**self.clusters, **kwargs}
|
|
69
|
+
|
|
70
|
+
def log_stats(self, **kwargs) -> None:
|
|
71
|
+
"""
|
|
72
|
+
Log statistical test-related parameters.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
**kwargs: Statistical test parameters to log.
|
|
76
|
+
"""
|
|
77
|
+
self.stats = {**self.stats, **kwargs}
|
|
68
78
|
|
|
69
79
|
def log_graph(self, **kwargs) -> None:
|
|
70
80
|
"""
|
|
@@ -152,7 +162,7 @@ class Params:
|
|
|
152
162
|
"annotation": self.annotation,
|
|
153
163
|
"datetime": self.datetime,
|
|
154
164
|
"graph": self.graph,
|
|
155
|
-
"
|
|
165
|
+
"clusters": self.clusters,
|
|
156
166
|
"network": self.network,
|
|
157
167
|
"plotter": self.plotter,
|
|
158
168
|
}
|
risk/network/__init__.py
ADDED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
|
-
risk/
|
|
3
|
-
|
|
2
|
+
risk/network/graph/_summary
|
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from typing import Any, Dict, Tuple, Union
|
|
@@ -9,35 +9,35 @@ import numpy as np
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
from statsmodels.stats.multitest import fdrcorrection
|
|
11
11
|
|
|
12
|
-
from ...
|
|
12
|
+
from ...log import log_header, logger
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class Summary:
|
|
16
16
|
"""
|
|
17
17
|
Handles the processing, storage, and export of network analysis results.
|
|
18
18
|
|
|
19
|
-
The
|
|
20
|
-
FDR-corrected q-values, and structure information on domains and
|
|
21
|
-
DataFrame. It also offers functionality to export the
|
|
22
|
-
and text formats for analysis and reporting.
|
|
19
|
+
The Summary class provides methods to process significance and depletion data,
|
|
20
|
+
compute FDR-corrected q-values, and structure information on domains and
|
|
21
|
+
annotations into a DataFrame. It also offers functionality to export the
|
|
22
|
+
processed data in CSV, JSON, and text formats for analysis and reporting.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
def __init__(
|
|
26
26
|
self,
|
|
27
27
|
annotation: Dict[str, Any],
|
|
28
|
-
|
|
28
|
+
stats_results: Dict[str, Any],
|
|
29
29
|
graph, # Avoid type hinting Graph to prevent circular imports
|
|
30
30
|
):
|
|
31
31
|
"""
|
|
32
|
-
Initialize the
|
|
32
|
+
Initialize the Summary object with analysis components.
|
|
33
33
|
|
|
34
34
|
Args:
|
|
35
35
|
annotation (Dict[str, Any]): Annotation data, including ordered annotations and matrix of associations.
|
|
36
|
-
|
|
36
|
+
stats_results (Dict[str, Any]): Cluster data containing p-values for significance and depletion analysis.
|
|
37
37
|
graph (Graph): Graph object representing domain-to-node and node-to-label mappings.
|
|
38
38
|
"""
|
|
39
39
|
self.annotation = annotation
|
|
40
|
-
self.
|
|
40
|
+
self.stats_results = stats_results
|
|
41
41
|
self.graph = graph
|
|
42
42
|
|
|
43
43
|
def to_csv(self, filepath: str) -> None:
|
|
@@ -88,8 +88,8 @@ class Summary:
|
|
|
88
88
|
"""
|
|
89
89
|
log_header("Loading analysis summary")
|
|
90
90
|
# Calculate significance and depletion q-values from p-value matrices in annotation
|
|
91
|
-
enrichment_pvals = self.
|
|
92
|
-
depletion_pvals = self.
|
|
91
|
+
enrichment_pvals = self.stats_results["enrichment_pvals"]
|
|
92
|
+
depletion_pvals = self.stats_results["depletion_pvals"]
|
|
93
93
|
enrichment_qvals = self._calculate_qvalues(enrichment_pvals)
|
|
94
94
|
depletion_qvals = self._calculate_qvalues(depletion_pvals)
|
|
95
95
|
|