risk-network 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotation/__init__.py +10 -0
- risk/{annotations/annotations.py → annotation/annotation.py} +44 -44
- risk/{annotations → annotation}/io.py +93 -92
- risk/{annotations → annotation}/nltk_setup.py +6 -5
- risk/log/__init__.py +1 -1
- risk/log/parameters.py +26 -27
- risk/neighborhoods/__init__.py +0 -1
- risk/neighborhoods/api.py +38 -38
- risk/neighborhoods/community.py +33 -4
- risk/neighborhoods/domains.py +26 -28
- risk/neighborhoods/neighborhoods.py +8 -2
- risk/neighborhoods/stats/__init__.py +13 -0
- risk/neighborhoods/stats/permutation/__init__.py +6 -0
- risk/{stats → neighborhoods/stats}/permutation/permutation.py +24 -21
- risk/{stats → neighborhoods/stats}/permutation/test_functions.py +4 -4
- risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +62 -54
- risk/network/__init__.py +0 -2
- risk/network/graph/__init__.py +0 -2
- risk/network/graph/api.py +19 -19
- risk/network/graph/graph.py +73 -68
- risk/{stats/significance.py → network/graph/stats.py} +2 -2
- risk/network/graph/summary.py +12 -13
- risk/network/io.py +163 -20
- risk/network/plotter/__init__.py +0 -2
- risk/network/plotter/api.py +1 -1
- risk/network/plotter/canvas.py +36 -36
- risk/network/plotter/contour.py +14 -15
- risk/network/plotter/labels.py +303 -294
- risk/network/plotter/network.py +6 -6
- risk/network/plotter/plotter.py +8 -10
- risk/network/plotter/utils/colors.py +15 -8
- risk/network/plotter/utils/layout.py +3 -3
- risk/risk.py +6 -6
- risk_network-0.0.12.dist-info/METADATA +122 -0
- risk_network-0.0.12.dist-info/RECORD +40 -0
- {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/WHEEL +1 -1
- risk/annotations/__init__.py +0 -7
- risk/network/geometry.py +0 -150
- risk/stats/__init__.py +0 -15
- risk/stats/permutation/__init__.py +0 -6
- risk_network-0.0.11.dist-info/METADATA +0 -798
- risk_network-0.0.11.dist-info/RECORD +0 -41
- {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info/licenses}/LICENSE +0 -0
- {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/top_level.txt +0 -0
risk/neighborhoods/__init__.py
CHANGED
risk/neighborhoods/api.py
CHANGED
@@ -10,9 +10,9 @@ import networkx as nx
|
|
10
10
|
import numpy as np
|
11
11
|
from scipy.sparse import csr_matrix
|
12
12
|
|
13
|
-
from risk.log import
|
13
|
+
from risk.log import log_header, logger, params
|
14
14
|
from risk.neighborhoods.neighborhoods import get_network_neighborhoods
|
15
|
-
from risk.stats import (
|
15
|
+
from risk.neighborhoods.stats import (
|
16
16
|
compute_binom_test,
|
17
17
|
compute_chi2_test,
|
18
18
|
compute_hypergeom_test,
|
@@ -28,13 +28,13 @@ class NeighborhoodsAPI:
|
|
28
28
|
The NeighborhoodsAPI class provides methods to load neighborhood results from statistical tests.
|
29
29
|
"""
|
30
30
|
|
31
|
-
def __init__() -> None:
|
31
|
+
def __init__(self) -> None:
|
32
32
|
pass
|
33
33
|
|
34
|
-
def
|
34
|
+
def load_neighborhoods_binom(
|
35
35
|
self,
|
36
36
|
network: nx.Graph,
|
37
|
-
|
37
|
+
annotation: Dict[str, Any],
|
38
38
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
39
39
|
louvain_resolution: float = 0.1,
|
40
40
|
leiden_resolution: float = 1.0,
|
@@ -46,7 +46,7 @@ class NeighborhoodsAPI:
|
|
46
46
|
|
47
47
|
Args:
|
48
48
|
network (nx.Graph): The network graph.
|
49
|
-
|
49
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
50
50
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
51
51
|
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
52
52
|
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
@@ -55,7 +55,7 @@ class NeighborhoodsAPI:
|
|
55
55
|
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
56
56
|
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
57
57
|
Defaults to 0.5.
|
58
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
58
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
59
59
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
60
60
|
|
61
61
|
Returns:
|
@@ -65,7 +65,7 @@ class NeighborhoodsAPI:
|
|
65
65
|
# Compute neighborhood significance using the binomial test
|
66
66
|
return self._load_neighborhoods_by_statistical_test(
|
67
67
|
network=network,
|
68
|
-
|
68
|
+
annotation=annotation,
|
69
69
|
distance_metric=distance_metric,
|
70
70
|
louvain_resolution=louvain_resolution,
|
71
71
|
leiden_resolution=leiden_resolution,
|
@@ -76,10 +76,10 @@ class NeighborhoodsAPI:
|
|
76
76
|
statistical_test_function=compute_binom_test,
|
77
77
|
)
|
78
78
|
|
79
|
-
def
|
79
|
+
def load_neighborhoods_chi2(
|
80
80
|
self,
|
81
81
|
network: nx.Graph,
|
82
|
-
|
82
|
+
annotation: Dict[str, Any],
|
83
83
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
84
84
|
louvain_resolution: float = 0.1,
|
85
85
|
leiden_resolution: float = 1.0,
|
@@ -91,7 +91,7 @@ class NeighborhoodsAPI:
|
|
91
91
|
|
92
92
|
Args:
|
93
93
|
network (nx.Graph): The network graph.
|
94
|
-
|
94
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
95
95
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
96
96
|
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
97
97
|
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
@@ -100,7 +100,7 @@ class NeighborhoodsAPI:
|
|
100
100
|
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
101
101
|
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
102
102
|
Defaults to 0.5.
|
103
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
103
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
104
104
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
105
105
|
|
106
106
|
Returns:
|
@@ -110,7 +110,7 @@ class NeighborhoodsAPI:
|
|
110
110
|
# Compute neighborhood significance using the chi-squared test
|
111
111
|
return self._load_neighborhoods_by_statistical_test(
|
112
112
|
network=network,
|
113
|
-
|
113
|
+
annotation=annotation,
|
114
114
|
distance_metric=distance_metric,
|
115
115
|
louvain_resolution=louvain_resolution,
|
116
116
|
leiden_resolution=leiden_resolution,
|
@@ -121,10 +121,10 @@ class NeighborhoodsAPI:
|
|
121
121
|
statistical_test_function=compute_chi2_test,
|
122
122
|
)
|
123
123
|
|
124
|
-
def
|
124
|
+
def load_neighborhoods_hypergeom(
|
125
125
|
self,
|
126
126
|
network: nx.Graph,
|
127
|
-
|
127
|
+
annotation: Dict[str, Any],
|
128
128
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
129
129
|
louvain_resolution: float = 0.1,
|
130
130
|
leiden_resolution: float = 1.0,
|
@@ -136,7 +136,7 @@ class NeighborhoodsAPI:
|
|
136
136
|
|
137
137
|
Args:
|
138
138
|
network (nx.Graph): The network graph.
|
139
|
-
|
139
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
140
140
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
141
141
|
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
142
142
|
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
@@ -145,7 +145,7 @@ class NeighborhoodsAPI:
|
|
145
145
|
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
146
146
|
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
147
147
|
Defaults to 0.5.
|
148
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
148
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
149
149
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
150
150
|
|
151
151
|
Returns:
|
@@ -155,7 +155,7 @@ class NeighborhoodsAPI:
|
|
155
155
|
# Compute neighborhood significance using the hypergeometric test
|
156
156
|
return self._load_neighborhoods_by_statistical_test(
|
157
157
|
network=network,
|
158
|
-
|
158
|
+
annotation=annotation,
|
159
159
|
distance_metric=distance_metric,
|
160
160
|
louvain_resolution=louvain_resolution,
|
161
161
|
leiden_resolution=leiden_resolution,
|
@@ -166,10 +166,10 @@ class NeighborhoodsAPI:
|
|
166
166
|
statistical_test_function=compute_hypergeom_test,
|
167
167
|
)
|
168
168
|
|
169
|
-
def
|
169
|
+
def load_neighborhoods_permutation(
|
170
170
|
self,
|
171
171
|
network: nx.Graph,
|
172
|
-
|
172
|
+
annotation: Dict[str, Any],
|
173
173
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
174
174
|
louvain_resolution: float = 0.1,
|
175
175
|
leiden_resolution: float = 1.0,
|
@@ -184,7 +184,7 @@ class NeighborhoodsAPI:
|
|
184
184
|
|
185
185
|
Args:
|
186
186
|
network (nx.Graph): The network graph.
|
187
|
-
|
187
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
188
188
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
189
189
|
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
190
190
|
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
@@ -194,7 +194,7 @@ class NeighborhoodsAPI:
|
|
194
194
|
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
195
195
|
Defaults to 0.5.
|
196
196
|
score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
|
197
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
197
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
198
198
|
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
199
199
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
200
200
|
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
@@ -210,7 +210,7 @@ class NeighborhoodsAPI:
|
|
210
210
|
# Compute neighborhood significance using the permutation test
|
211
211
|
return self._load_neighborhoods_by_statistical_test(
|
212
212
|
network=network,
|
213
|
-
|
213
|
+
annotation=annotation,
|
214
214
|
distance_metric=distance_metric,
|
215
215
|
louvain_resolution=louvain_resolution,
|
216
216
|
leiden_resolution=leiden_resolution,
|
@@ -224,10 +224,10 @@ class NeighborhoodsAPI:
|
|
224
224
|
max_workers=max_workers,
|
225
225
|
)
|
226
226
|
|
227
|
-
def
|
227
|
+
def load_neighborhoods_poisson(
|
228
228
|
self,
|
229
229
|
network: nx.Graph,
|
230
|
-
|
230
|
+
annotation: Dict[str, Any],
|
231
231
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
232
232
|
louvain_resolution: float = 0.1,
|
233
233
|
leiden_resolution: float = 1.0,
|
@@ -239,7 +239,7 @@ class NeighborhoodsAPI:
|
|
239
239
|
|
240
240
|
Args:
|
241
241
|
network (nx.Graph): The network graph.
|
242
|
-
|
242
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
243
243
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
244
244
|
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
245
245
|
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
@@ -248,7 +248,7 @@ class NeighborhoodsAPI:
|
|
248
248
|
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
249
249
|
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
250
250
|
Defaults to 0.5.
|
251
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
251
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
252
252
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
253
253
|
|
254
254
|
Returns:
|
@@ -258,7 +258,7 @@ class NeighborhoodsAPI:
|
|
258
258
|
# Compute neighborhood significance using the Poisson test
|
259
259
|
return self._load_neighborhoods_by_statistical_test(
|
260
260
|
network=network,
|
261
|
-
|
261
|
+
annotation=annotation,
|
262
262
|
distance_metric=distance_metric,
|
263
263
|
louvain_resolution=louvain_resolution,
|
264
264
|
leiden_resolution=leiden_resolution,
|
@@ -269,10 +269,10 @@ class NeighborhoodsAPI:
|
|
269
269
|
statistical_test_function=compute_poisson_test,
|
270
270
|
)
|
271
271
|
|
272
|
-
def
|
272
|
+
def load_neighborhoods_zscore(
|
273
273
|
self,
|
274
274
|
network: nx.Graph,
|
275
|
-
|
275
|
+
annotation: Dict[str, Any],
|
276
276
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
277
277
|
louvain_resolution: float = 0.1,
|
278
278
|
leiden_resolution: float = 1.0,
|
@@ -284,7 +284,7 @@ class NeighborhoodsAPI:
|
|
284
284
|
|
285
285
|
Args:
|
286
286
|
network (nx.Graph): The network graph.
|
287
|
-
|
287
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
288
288
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
289
289
|
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
290
290
|
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
@@ -293,7 +293,7 @@ class NeighborhoodsAPI:
|
|
293
293
|
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
294
294
|
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
295
295
|
Defaults to 0.5.
|
296
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
296
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
297
297
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
298
298
|
|
299
299
|
Returns:
|
@@ -303,7 +303,7 @@ class NeighborhoodsAPI:
|
|
303
303
|
# Compute neighborhood significance using the z-score test
|
304
304
|
return self._load_neighborhoods_by_statistical_test(
|
305
305
|
network=network,
|
306
|
-
|
306
|
+
annotation=annotation,
|
307
307
|
distance_metric=distance_metric,
|
308
308
|
louvain_resolution=louvain_resolution,
|
309
309
|
leiden_resolution=leiden_resolution,
|
@@ -317,7 +317,7 @@ class NeighborhoodsAPI:
|
|
317
317
|
def _load_neighborhoods_by_statistical_test(
|
318
318
|
self,
|
319
319
|
network: nx.Graph,
|
320
|
-
|
320
|
+
annotation: Dict[str, Any],
|
321
321
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
322
322
|
louvain_resolution: float = 0.1,
|
323
323
|
leiden_resolution: float = 1.0,
|
@@ -332,7 +332,7 @@ class NeighborhoodsAPI:
|
|
332
332
|
|
333
333
|
Args:
|
334
334
|
network (nx.Graph): The input network graph.
|
335
|
-
|
335
|
+
annotation (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
|
336
336
|
distance_metric (Union[str, List, Tuple, np.ndarray], optional): The distance metric or clustering method to define neighborhoods.
|
337
337
|
Can be a string specifying one method (e.g., 'louvain', 'leiden') or a collection of methods.
|
338
338
|
Defaults to "louvain".
|
@@ -340,13 +340,13 @@ class NeighborhoodsAPI:
|
|
340
340
|
leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
|
341
341
|
fraction_shortest_edges (Union[float, List, Tuple, np.ndarray], optional): Fraction of shortest edges to consider for creating subgraphs.
|
342
342
|
Can be a single value or a collection of thresholds for flexibility. Defaults to 0.5.
|
343
|
-
null_distribution (str, optional): The type of null distribution to use ('network' or '
|
343
|
+
null_distribution (str, optional): The type of null distribution to use ('network' or 'annotation').
|
344
344
|
Defaults to "network".
|
345
345
|
random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
|
346
346
|
statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "poisson").
|
347
347
|
Used for logging and debugging. Defaults to "hypergeom".
|
348
348
|
statistical_test_function (Any, optional): The function implementing the statistical test.
|
349
|
-
It should accept neighborhoods,
|
349
|
+
It should accept neighborhoods, annotation, null distribution, and additional kwargs.
|
350
350
|
Defaults to `compute_hypergeom_test`.
|
351
351
|
**kwargs: Additional parameters to be passed to the statistical test function.
|
352
352
|
|
@@ -381,7 +381,7 @@ class NeighborhoodsAPI:
|
|
381
381
|
# Apply statistical test function to compute neighborhood significance
|
382
382
|
neighborhood_significance = statistical_test_function(
|
383
383
|
neighborhoods=neighborhoods,
|
384
|
-
|
384
|
+
annotation=annotation["matrix"],
|
385
385
|
null_distribution=null_distribution,
|
386
386
|
**kwargs,
|
387
387
|
)
|
risk/neighborhoods/community.py
CHANGED
@@ -8,7 +8,7 @@ import igraph as ig
|
|
8
8
|
import markov_clustering as mc
|
9
9
|
import networkx as nx
|
10
10
|
import numpy as np
|
11
|
-
from leidenalg import
|
11
|
+
from leidenalg import RBConfigurationVertexPartition, find_partition
|
12
12
|
from networkx.algorithms.community import greedy_modularity_communities
|
13
13
|
from scipy.sparse import csr_matrix
|
14
14
|
|
@@ -27,6 +27,10 @@ def calculate_greedy_modularity_neighborhoods(
|
|
27
27
|
|
28
28
|
Returns:
|
29
29
|
csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
|
30
|
+
|
31
|
+
Raises:
|
32
|
+
ValueError: If the subgraph has no edges after filtering.
|
33
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
30
34
|
"""
|
31
35
|
# Create a subgraph with the shortest edges based on the rank fraction
|
32
36
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -67,6 +71,10 @@ def calculate_label_propagation_neighborhoods(
|
|
67
71
|
|
68
72
|
Returns:
|
69
73
|
csr_matrix: A binary neighborhood matrix (CSR) on Label Propagation.
|
74
|
+
|
75
|
+
Raises:
|
76
|
+
ValueError: If the subgraph has no edges after filtering.
|
77
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
70
78
|
"""
|
71
79
|
# Create a subgraph with the shortest edges based on the rank fraction
|
72
80
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -115,6 +123,10 @@ def calculate_leiden_neighborhoods(
|
|
115
123
|
|
116
124
|
Returns:
|
117
125
|
csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
|
126
|
+
|
127
|
+
Raises:
|
128
|
+
ValueError: If the subgraph has no edges after filtering.
|
129
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
118
130
|
"""
|
119
131
|
# Create a subgraph with the shortest edges based on the rank fraction
|
120
132
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -167,6 +179,10 @@ def calculate_louvain_neighborhoods(
|
|
167
179
|
|
168
180
|
Returns:
|
169
181
|
csr_matrix: A binary neighborhood matrix in CSR format.
|
182
|
+
|
183
|
+
Raises:
|
184
|
+
ValueError: If the subgraph has no edges after filtering.
|
185
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
170
186
|
"""
|
171
187
|
# Create a subgraph with the shortest edges based on the rank fraction
|
172
188
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -215,9 +231,10 @@ def calculate_markov_clustering_neighborhoods(
|
|
215
231
|
Returns:
|
216
232
|
csr_matrix: A binary neighborhood matrix (CSR) on Markov Clustering.
|
217
233
|
|
218
|
-
|
219
|
-
|
220
|
-
|
234
|
+
Raises:
|
235
|
+
ValueError: If the subgraph has no edges after filtering.
|
236
|
+
RuntimeError: If MCL fails to run.
|
237
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
221
238
|
"""
|
222
239
|
# Create a subgraph with the shortest edges based on the rank fraction
|
223
240
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -283,6 +300,10 @@ def calculate_spinglass_neighborhoods(
|
|
283
300
|
|
284
301
|
Returns:
|
285
302
|
csr_matrix: A binary neighborhood matrix (CSR) based on Spinglass communities.
|
303
|
+
|
304
|
+
Raises:
|
305
|
+
ValueError: If the subgraph has no edges after filtering.
|
306
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
286
307
|
"""
|
287
308
|
# Create a subgraph with the shortest edges based on the rank fraction
|
288
309
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -343,6 +364,10 @@ def calculate_walktrap_neighborhoods(
|
|
343
364
|
|
344
365
|
Returns:
|
345
366
|
csr_matrix: A binary neighborhood matrix (CSR) on Walktrap communities.
|
367
|
+
|
368
|
+
Raises:
|
369
|
+
ValueError: If the subgraph has no edges after filtering.
|
370
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
346
371
|
"""
|
347
372
|
# Create a subgraph with the shortest edges based on the rank fraction
|
348
373
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -384,6 +409,10 @@ def _create_percentile_limited_subgraph(G: nx.Graph, fraction_shortest_edges: fl
|
|
384
409
|
Returns:
|
385
410
|
nx.Graph: A subgraph with nodes and edges where the edges are within the shortest
|
386
411
|
specified rank fraction.
|
412
|
+
|
413
|
+
Raises:
|
414
|
+
ValueError: If no edges with 'length' attributes are found in the graph.
|
415
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
387
416
|
"""
|
388
417
|
# Step 1: Extract edges with their lengths
|
389
418
|
edges_with_length = [(u, v, d) for u, v, d in G.edges(data=True) if "length" in d]
|
risk/neighborhoods/domains.py
CHANGED
@@ -9,19 +9,18 @@ from typing import Tuple, Union
|
|
9
9
|
import numpy as np
|
10
10
|
import pandas as pd
|
11
11
|
from numpy.linalg import LinAlgError
|
12
|
-
from scipy.cluster.hierarchy import
|
12
|
+
from scipy.cluster.hierarchy import fcluster, linkage
|
13
13
|
from sklearn.metrics import silhouette_score
|
14
14
|
from tqdm import tqdm
|
15
15
|
|
16
|
-
from risk.
|
16
|
+
from risk.annotation import get_weighted_description
|
17
17
|
from risk.log import logger
|
18
18
|
|
19
|
-
|
20
19
|
# Define constants for clustering
|
21
20
|
# fmt: off
|
22
21
|
LINKAGE_METHODS = {"single", "complete", "average", "weighted", "centroid", "median", "ward"}
|
23
22
|
LINKAGE_METRICS = {
|
24
|
-
"braycurtis","canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
|
23
|
+
"braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
|
25
24
|
"hamming", "jaccard", "jensenshannon", "kulczynski1", "mahalanobis", "matching", "minkowski",
|
26
25
|
"rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule",
|
27
26
|
}
|
@@ -29,7 +28,7 @@ LINKAGE_METRICS = {
|
|
29
28
|
|
30
29
|
|
31
30
|
def define_domains(
|
32
|
-
|
31
|
+
top_annotation: pd.DataFrame,
|
33
32
|
significant_neighborhoods_significance: np.ndarray,
|
34
33
|
linkage_criterion: str,
|
35
34
|
linkage_method: str,
|
@@ -40,7 +39,7 @@ def define_domains(
|
|
40
39
|
handling errors by assigning unique domains when clustering fails.
|
41
40
|
|
42
41
|
Args:
|
43
|
-
|
42
|
+
top_annotation (pd.DataFrame): DataFrame of top annotations data for the network nodes.
|
44
43
|
significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
|
45
44
|
linkage_criterion (str): The clustering criterion for defining groups. Choose "off" to disable clustering.
|
46
45
|
linkage_method (str): The linkage method for clustering. Choose "auto" to optimize.
|
@@ -49,13 +48,16 @@ def define_domains(
|
|
49
48
|
|
50
49
|
Returns:
|
51
50
|
pd.DataFrame: DataFrame with the primary domain for each node.
|
51
|
+
|
52
|
+
Raises:
|
53
|
+
ValueError: If the clustering criterion is set to "off" or if an error occurs during clustering.
|
52
54
|
"""
|
53
55
|
try:
|
54
56
|
if linkage_criterion == "off":
|
55
57
|
raise ValueError("Clustering is turned off.")
|
56
58
|
|
57
59
|
# Transpose the matrix to cluster annotations
|
58
|
-
m = significant_neighborhoods_significance[:,
|
60
|
+
m = significant_neighborhoods_significance[:, top_annotation["significant_annotation"]].T
|
59
61
|
# Safeguard the matrix by replacing NaN, Inf, and -Inf values
|
60
62
|
m = _safeguard_matrix(m)
|
61
63
|
# Optimize silhouette score across different linkage methods and distance metrics
|
@@ -69,27 +71,23 @@ def define_domains(
|
|
69
71
|
)
|
70
72
|
# Calculate the optimal threshold for clustering
|
71
73
|
max_d_optimal = np.max(Z[:, 2]) * best_threshold
|
72
|
-
# Assign domains to the
|
74
|
+
# Assign domains to the annotation matrix
|
73
75
|
domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
|
74
|
-
|
75
|
-
|
76
|
+
top_annotation["domain"] = 0
|
77
|
+
top_annotation.loc[top_annotation["significant_annotation"], "domain"] = domains
|
76
78
|
except (ValueError, LinAlgError):
|
77
79
|
# If a ValueError is encountered, handle it by assigning unique domains
|
78
|
-
n_rows = len(
|
80
|
+
n_rows = len(top_annotation)
|
79
81
|
if linkage_criterion == "off":
|
80
|
-
logger.warning(
|
81
|
-
f"Clustering is turned off. Skipping clustering and assigning {n_rows} unique domains."
|
82
|
-
)
|
82
|
+
logger.warning("Clustering is turned off. Skipping clustering.")
|
83
83
|
else:
|
84
|
-
logger.error(
|
85
|
-
|
86
|
-
)
|
87
|
-
top_annotations["domain"] = range(1, n_rows + 1) # Assign unique domains
|
84
|
+
logger.error("Error encountered. Skipping clustering.")
|
85
|
+
top_annotation["domain"] = range(1, n_rows + 1) # Assign unique domains
|
88
86
|
|
89
87
|
# Create DataFrames to store domain information
|
90
88
|
node_to_significance = pd.DataFrame(
|
91
89
|
data=significant_neighborhoods_significance,
|
92
|
-
columns=[
|
90
|
+
columns=[top_annotation.index.values, top_annotation["domain"]],
|
93
91
|
)
|
94
92
|
node_to_domain = node_to_significance.T.groupby(level="domain").sum().T
|
95
93
|
|
@@ -110,15 +108,15 @@ def define_domains(
|
|
110
108
|
|
111
109
|
def trim_domains(
|
112
110
|
domains: pd.DataFrame,
|
113
|
-
|
111
|
+
top_annotation: pd.DataFrame,
|
114
112
|
min_cluster_size: int = 5,
|
115
113
|
max_cluster_size: int = 1000,
|
116
|
-
) -> Tuple[pd.DataFrame, pd.DataFrame
|
114
|
+
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
117
115
|
"""Trim domains that do not meet size criteria and find outliers.
|
118
116
|
|
119
117
|
Args:
|
120
118
|
domains (pd.DataFrame): DataFrame of domain data for the network nodes.
|
121
|
-
|
119
|
+
top_annotation (pd.DataFrame): DataFrame of top annotations data for the network nodes.
|
122
120
|
min_cluster_size (int, optional): Minimum size of a cluster to be retained. Defaults to 5.
|
123
121
|
max_cluster_size (int, optional): Maximum size of a cluster to be retained. Defaults to 1000.
|
124
122
|
|
@@ -137,21 +135,21 @@ def trim_domains(
|
|
137
135
|
invalid_domain_id = 888888
|
138
136
|
invalid_domain_ids = {0, invalid_domain_id}
|
139
137
|
# Mark domains to be removed
|
140
|
-
|
138
|
+
top_annotation["domain"] = top_annotation["domain"].replace(to_remove, invalid_domain_id)
|
141
139
|
domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
|
142
140
|
|
143
141
|
# Normalize "num significant neighborhoods" by percentile for each domain and scale to 0-10
|
144
|
-
|
142
|
+
top_annotation["normalized_value"] = top_annotation.groupby("domain")[
|
145
143
|
"significant_neighborhood_significance_sums"
|
146
144
|
].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
|
147
145
|
# Modify the lambda function to pass both full_terms and significant_significance_score
|
148
|
-
|
146
|
+
top_annotation["combined_terms"] = top_annotation.apply(
|
149
147
|
lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
|
150
148
|
)
|
151
149
|
|
152
150
|
# Perform the groupby operation while retaining the other columns and adding the weighting with significance scores
|
153
151
|
domain_labels = (
|
154
|
-
|
152
|
+
top_annotation.groupby("domain")
|
155
153
|
.agg(
|
156
154
|
full_terms=("full_terms", lambda x: list(x)),
|
157
155
|
significance_scores=("significant_significance_score", lambda x: list(x)),
|
@@ -231,7 +229,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
|
|
231
229
|
# Initialize best overall values
|
232
230
|
best_overall_method = linkage_method
|
233
231
|
best_overall_metric = linkage_metric
|
234
|
-
best_overall_threshold =
|
232
|
+
best_overall_threshold = 0.0
|
235
233
|
best_overall_score = -np.inf
|
236
234
|
|
237
235
|
# Set linkage methods and metrics to all combinations if "auto" is selected
|
@@ -242,7 +240,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
|
|
242
240
|
# Evaluating optimal linkage method and metric
|
243
241
|
for method, metric in tqdm(
|
244
242
|
product(linkage_methods, linkage_metrics),
|
245
|
-
desc="Evaluating
|
243
|
+
desc="Evaluating linkage methods and metrics",
|
246
244
|
total=total_combinations,
|
247
245
|
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
|
248
246
|
):
|
@@ -13,6 +13,7 @@ from scipy.sparse import csr_matrix
|
|
13
13
|
from sklearn.exceptions import DataConversionWarning
|
14
14
|
from sklearn.metrics.pairwise import cosine_similarity
|
15
15
|
|
16
|
+
from risk.log import logger
|
16
17
|
from risk.neighborhoods.community import (
|
17
18
|
calculate_greedy_modularity_neighborhoods,
|
18
19
|
calculate_label_propagation_neighborhoods,
|
@@ -22,7 +23,6 @@ from risk.neighborhoods.community import (
|
|
22
23
|
calculate_spinglass_neighborhoods,
|
23
24
|
calculate_walktrap_neighborhoods,
|
24
25
|
)
|
25
|
-
from risk.log import logger
|
26
26
|
|
27
27
|
# Suppress DataConversionWarning
|
28
28
|
warnings.filterwarnings(action="ignore", category=DataConversionWarning)
|
@@ -48,6 +48,9 @@ def get_network_neighborhoods(
|
|
48
48
|
|
49
49
|
Returns:
|
50
50
|
csr_matrix: The combined neighborhood matrix.
|
51
|
+
|
52
|
+
Raises:
|
53
|
+
ValueError: If the number of distance metrics does not match the number of edge length thresholds.
|
51
54
|
"""
|
52
55
|
# Set random seed for reproducibility
|
53
56
|
random.seed(random_seed)
|
@@ -490,6 +493,9 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
|
|
490
493
|
|
491
494
|
Returns:
|
492
495
|
float: The calculated distance threshold value.
|
496
|
+
|
497
|
+
Raises:
|
498
|
+
ValueError: If no significant annotation is found in the median distances.
|
493
499
|
"""
|
494
500
|
# Sort the median distances
|
495
501
|
sorted_distances = np.sort(median_distances)
|
@@ -500,7 +506,7 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
|
|
500
506
|
try:
|
501
507
|
smoothed_distances = np.interp(interpolated_percentiles, rank_percentiles, sorted_distances)
|
502
508
|
except ValueError as e:
|
503
|
-
raise ValueError("No significant
|
509
|
+
raise ValueError("No significant annotation found.") from e
|
504
510
|
|
505
511
|
# Determine the index corresponding to the distance threshold
|
506
512
|
threshold_index = int(np.ceil(distance_threshold * len(smoothed_distances))) - 1
|
@@ -0,0 +1,13 @@
|
|
1
|
+
"""
|
2
|
+
risk/neighborhoods/stats
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
from risk.neighborhoods.stats.permutation import compute_permutation_test
|
7
|
+
from risk.neighborhoods.stats.tests import (
|
8
|
+
compute_binom_test,
|
9
|
+
compute_chi2_test,
|
10
|
+
compute_hypergeom_test,
|
11
|
+
compute_poisson_test,
|
12
|
+
compute_zscore_test,
|
13
|
+
)
|