risk-network 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotation/__init__.py +10 -0
- risk/{annotations/annotations.py → annotation/annotation.py} +62 -102
- risk/{annotations → annotation}/io.py +93 -92
- risk/annotation/nltk_setup.py +86 -0
- risk/log/__init__.py +1 -1
- risk/log/parameters.py +26 -27
- risk/neighborhoods/__init__.py +0 -1
- risk/neighborhoods/api.py +38 -38
- risk/neighborhoods/community.py +33 -4
- risk/neighborhoods/domains.py +26 -28
- risk/neighborhoods/neighborhoods.py +8 -2
- risk/neighborhoods/stats/__init__.py +13 -0
- risk/neighborhoods/stats/permutation/__init__.py +6 -0
- risk/{stats → neighborhoods/stats}/permutation/permutation.py +24 -21
- risk/{stats → neighborhoods/stats}/permutation/test_functions.py +5 -4
- risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +62 -54
- risk/network/__init__.py +0 -2
- risk/network/graph/__init__.py +0 -2
- risk/network/graph/api.py +19 -19
- risk/network/graph/graph.py +73 -68
- risk/{stats/significance.py → network/graph/stats.py} +2 -2
- risk/network/graph/summary.py +12 -13
- risk/network/io.py +163 -20
- risk/network/plotter/__init__.py +0 -2
- risk/network/plotter/api.py +1 -1
- risk/network/plotter/canvas.py +36 -36
- risk/network/plotter/contour.py +14 -15
- risk/network/plotter/labels.py +303 -294
- risk/network/plotter/network.py +6 -6
- risk/network/plotter/plotter.py +8 -10
- risk/network/plotter/utils/colors.py +15 -8
- risk/network/plotter/utils/layout.py +3 -3
- risk/risk.py +6 -7
- risk_network-0.0.12.dist-info/METADATA +122 -0
- risk_network-0.0.12.dist-info/RECORD +40 -0
- {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info}/WHEEL +1 -1
- risk/annotations/__init__.py +0 -7
- risk/network/geometry.py +0 -150
- risk/stats/__init__.py +0 -15
- risk/stats/permutation/__init__.py +0 -6
- risk_network-0.0.10.dist-info/METADATA +0 -798
- risk_network-0.0.10.dist-info/RECORD +0 -40
- {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info/licenses}/LICENSE +0 -0
- {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
"""
|
2
|
+
risk/annotation/nltk_setup
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
import os
|
7
|
+
import zipfile
|
8
|
+
from typing import List, Optional, Tuple
|
9
|
+
|
10
|
+
import nltk
|
11
|
+
from nltk.data import find
|
12
|
+
from nltk.data import path as nltk_data_path
|
13
|
+
|
14
|
+
from risk.log import logger
|
15
|
+
|
16
|
+
|
17
|
+
def setup_nltk_resources(required_resources: Optional[List[Tuple[str, str]]] = None) -> None:
|
18
|
+
"""Ensures all required NLTK resources are available and properly extracted.
|
19
|
+
Uses NLTK's default paths and mechanisms.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
required_resources (List[Tuple[str, str]], optional): List of required resources
|
23
|
+
to download and extract. Each tuple should contain the resource path within
|
24
|
+
NLTK data and the package name. Defaults to None.
|
25
|
+
"""
|
26
|
+
if required_resources is None:
|
27
|
+
required_resources = [
|
28
|
+
("tokenizers/punkt", "punkt"),
|
29
|
+
("tokenizers/punkt_tab", "punkt_tab"),
|
30
|
+
("corpora/stopwords", "stopwords"),
|
31
|
+
("corpora/wordnet", "wordnet"),
|
32
|
+
]
|
33
|
+
|
34
|
+
# Process each resource
|
35
|
+
for resource_path, package_name in required_resources:
|
36
|
+
try:
|
37
|
+
# First try to find the resource - this is how NLTK checks if it's available
|
38
|
+
find(resource_path)
|
39
|
+
except LookupError:
|
40
|
+
# Resource not found, download it
|
41
|
+
logger.info(f"Downloading missing NLTK resource: {package_name}")
|
42
|
+
nltk.download(package_name, quiet=True)
|
43
|
+
|
44
|
+
# Even if find() succeeded, the resource might be a zip that failed to extract
|
45
|
+
# Check if we need to manually extract zips
|
46
|
+
verify_and_extract_if_needed(resource_path, package_name)
|
47
|
+
|
48
|
+
|
49
|
+
def verify_and_extract_if_needed(resource_path: str, package_name: str) -> None:
|
50
|
+
"""Verifies if the resource is properly extracted and extracts if needed. Respects
|
51
|
+
NLTK's directory structure where the extracted content should be in the same directory
|
52
|
+
as the zip file.
|
53
|
+
|
54
|
+
Args:
|
55
|
+
resource_path (str): Path to the resource within NLTK data.
|
56
|
+
package_name (str): Name of the NLTK package.
|
57
|
+
"""
|
58
|
+
# Get the directory and base name from the resource path
|
59
|
+
path_parts = resource_path.split("/")
|
60
|
+
resource_type = path_parts[0] # 'corpora', 'tokenizers', etc.
|
61
|
+
resource_name = path_parts[-1] # 'wordnet', 'punkt', etc.
|
62
|
+
|
63
|
+
# Check all NLTK data directories
|
64
|
+
for base in nltk_data_path:
|
65
|
+
# For resource paths like 'corpora/wordnet', the zip file is at '~/nltk_data/corpora/wordnet.zip'
|
66
|
+
# and the extracted directory should be at '~/nltk_data/corpora/wordnet'
|
67
|
+
resource_dir = os.path.join(base, resource_type)
|
68
|
+
zip_path = os.path.join(resource_dir, f"{resource_name}.zip")
|
69
|
+
folder_path = os.path.join(resource_dir, resource_name)
|
70
|
+
|
71
|
+
# If zip exists but folder doesn't, extraction is needed
|
72
|
+
if os.path.exists(zip_path) and not os.path.exists(folder_path):
|
73
|
+
logger.info(f"Found unextracted zip for {package_name}, extracting...")
|
74
|
+
try:
|
75
|
+
with zipfile.ZipFile(zip_path, "r") as zf:
|
76
|
+
# Extract files to the same directory where the zip file is located
|
77
|
+
zf.extractall(path=resource_dir)
|
78
|
+
|
79
|
+
if os.path.exists(folder_path):
|
80
|
+
logger.info(f"Successfully extracted {package_name}")
|
81
|
+
else:
|
82
|
+
logger.warning(
|
83
|
+
f"Extraction completed but resource directory not found for {package_name}"
|
84
|
+
)
|
85
|
+
except Exception as e:
|
86
|
+
logger.error(f"Failed to extract {package_name}: {e}")
|
risk/log/__init__.py
CHANGED
risk/log/parameters.py
CHANGED
@@ -11,7 +11,7 @@ from typing import Any, Dict
|
|
11
11
|
|
12
12
|
import numpy as np
|
13
13
|
|
14
|
-
from risk.log.console import
|
14
|
+
from risk.log.console import log_header, logger
|
15
15
|
|
16
16
|
# Suppress all warnings - this is to resolve warnings from multiprocessing
|
17
17
|
warnings.filterwarnings("ignore")
|
@@ -21,7 +21,7 @@ class Params:
|
|
21
21
|
"""Handles the storage and logging of various parameters for network analysis.
|
22
22
|
|
23
23
|
The Params class provides methods to log parameters related to different components of the analysis,
|
24
|
-
such as the network,
|
24
|
+
such as the network, annotation, neighborhoods, graph, and plotter settings. It also stores
|
25
25
|
the current datetime when the parameters were initialized.
|
26
26
|
"""
|
27
27
|
|
@@ -33,7 +33,7 @@ class Params:
|
|
33
33
|
def initialize(self) -> None:
|
34
34
|
"""Initialize the parameter dictionaries for different components."""
|
35
35
|
self.network = {}
|
36
|
-
self.
|
36
|
+
self.annotation = {}
|
37
37
|
self.neighborhoods = {}
|
38
38
|
self.graph = {}
|
39
39
|
self.plotter = {}
|
@@ -46,13 +46,13 @@ class Params:
|
|
46
46
|
"""
|
47
47
|
self.network = {**self.network, **kwargs}
|
48
48
|
|
49
|
-
def
|
49
|
+
def log_annotation(self, **kwargs) -> None:
|
50
50
|
"""Log annotation-related parameters.
|
51
51
|
|
52
52
|
Args:
|
53
53
|
**kwargs: Annotation parameters to log.
|
54
54
|
"""
|
55
|
-
self.
|
55
|
+
self.annotation = {**self.annotation, **kwargs}
|
56
56
|
|
57
57
|
def log_neighborhoods(self, **kwargs) -> None:
|
58
58
|
"""Log neighborhood-related parameters.
|
@@ -137,9 +137,9 @@ class Params:
|
|
137
137
|
Dict[str, Any]: A dictionary containing the processed parameters.
|
138
138
|
"""
|
139
139
|
log_header("Loading parameters")
|
140
|
-
return _convert_ndarray_to_list(
|
140
|
+
return self._convert_ndarray_to_list(
|
141
141
|
{
|
142
|
-
"
|
142
|
+
"annotation": self.annotation,
|
143
143
|
"datetime": self.datetime,
|
144
144
|
"graph": self.graph,
|
145
145
|
"neighborhoods": self.neighborhoods,
|
@@ -148,25 +148,24 @@ class Params:
|
|
148
148
|
}
|
149
149
|
)
|
150
150
|
|
151
|
+
def _convert_ndarray_to_list(self, d: Dict[str, Any]) -> Dict[str, Any]:
|
152
|
+
"""Recursively convert all np.ndarray values in the dictionary to lists.
|
151
153
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
Args:
|
156
|
-
d (Dict[str, Any]): The dictionary to process.
|
154
|
+
Args:
|
155
|
+
d (Dict[str, Any]): The dictionary to process.
|
157
156
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
157
|
+
Returns:
|
158
|
+
Dict[str, Any]: The processed dictionary with np.ndarray values converted to lists.
|
159
|
+
"""
|
160
|
+
if isinstance(d, dict):
|
161
|
+
# Recursively process each value in the dictionary
|
162
|
+
return {k: self._convert_ndarray_to_list(v) for k, v in d.items()}
|
163
|
+
if isinstance(d, list):
|
164
|
+
# Recursively process each item in the list
|
165
|
+
return [self._convert_ndarray_to_list(v) for v in d]
|
166
|
+
if isinstance(d, np.ndarray):
|
167
|
+
# Convert numpy arrays to lists
|
168
|
+
return d.tolist()
|
169
|
+
|
170
|
+
# Return the value unchanged if it's not a dict, List, or ndarray
|
171
|
+
return d
|
risk/neighborhoods/__init__.py
CHANGED
risk/neighborhoods/api.py
CHANGED
@@ -10,9 +10,9 @@ import networkx as nx
|
|
10
10
|
import numpy as np
|
11
11
|
from scipy.sparse import csr_matrix
|
12
12
|
|
13
|
-
from risk.log import
|
13
|
+
from risk.log import log_header, logger, params
|
14
14
|
from risk.neighborhoods.neighborhoods import get_network_neighborhoods
|
15
|
-
from risk.stats import (
|
15
|
+
from risk.neighborhoods.stats import (
|
16
16
|
compute_binom_test,
|
17
17
|
compute_chi2_test,
|
18
18
|
compute_hypergeom_test,
|
@@ -28,13 +28,13 @@ class NeighborhoodsAPI:
|
|
28
28
|
The NeighborhoodsAPI class provides methods to load neighborhood results from statistical tests.
|
29
29
|
"""
|
30
30
|
|
31
|
-
def __init__() -> None:
|
31
|
+
def __init__(self) -> None:
|
32
32
|
pass
|
33
33
|
|
34
|
-
def
|
34
|
+
def load_neighborhoods_binom(
|
35
35
|
self,
|
36
36
|
network: nx.Graph,
|
37
|
-
|
37
|
+
annotation: Dict[str, Any],
|
38
38
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
39
39
|
louvain_resolution: float = 0.1,
|
40
40
|
leiden_resolution: float = 1.0,
|
@@ -46,7 +46,7 @@ class NeighborhoodsAPI:
|
|
46
46
|
|
47
47
|
Args:
|
48
48
|
network (nx.Graph): The network graph.
|
49
|
-
|
49
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
50
50
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
51
51
|
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
52
52
|
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
@@ -55,7 +55,7 @@ class NeighborhoodsAPI:
|
|
55
55
|
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
56
56
|
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
57
57
|
Defaults to 0.5.
|
58
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
58
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
59
59
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
60
60
|
|
61
61
|
Returns:
|
@@ -65,7 +65,7 @@ class NeighborhoodsAPI:
|
|
65
65
|
# Compute neighborhood significance using the binomial test
|
66
66
|
return self._load_neighborhoods_by_statistical_test(
|
67
67
|
network=network,
|
68
|
-
|
68
|
+
annotation=annotation,
|
69
69
|
distance_metric=distance_metric,
|
70
70
|
louvain_resolution=louvain_resolution,
|
71
71
|
leiden_resolution=leiden_resolution,
|
@@ -76,10 +76,10 @@ class NeighborhoodsAPI:
|
|
76
76
|
statistical_test_function=compute_binom_test,
|
77
77
|
)
|
78
78
|
|
79
|
-
def
|
79
|
+
def load_neighborhoods_chi2(
|
80
80
|
self,
|
81
81
|
network: nx.Graph,
|
82
|
-
|
82
|
+
annotation: Dict[str, Any],
|
83
83
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
84
84
|
louvain_resolution: float = 0.1,
|
85
85
|
leiden_resolution: float = 1.0,
|
@@ -91,7 +91,7 @@ class NeighborhoodsAPI:
|
|
91
91
|
|
92
92
|
Args:
|
93
93
|
network (nx.Graph): The network graph.
|
94
|
-
|
94
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
95
95
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
96
96
|
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
97
97
|
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
@@ -100,7 +100,7 @@ class NeighborhoodsAPI:
|
|
100
100
|
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
101
101
|
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
102
102
|
Defaults to 0.5.
|
103
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
103
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
104
104
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
105
105
|
|
106
106
|
Returns:
|
@@ -110,7 +110,7 @@ class NeighborhoodsAPI:
|
|
110
110
|
# Compute neighborhood significance using the chi-squared test
|
111
111
|
return self._load_neighborhoods_by_statistical_test(
|
112
112
|
network=network,
|
113
|
-
|
113
|
+
annotation=annotation,
|
114
114
|
distance_metric=distance_metric,
|
115
115
|
louvain_resolution=louvain_resolution,
|
116
116
|
leiden_resolution=leiden_resolution,
|
@@ -121,10 +121,10 @@ class NeighborhoodsAPI:
|
|
121
121
|
statistical_test_function=compute_chi2_test,
|
122
122
|
)
|
123
123
|
|
124
|
-
def
|
124
|
+
def load_neighborhoods_hypergeom(
|
125
125
|
self,
|
126
126
|
network: nx.Graph,
|
127
|
-
|
127
|
+
annotation: Dict[str, Any],
|
128
128
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
129
129
|
louvain_resolution: float = 0.1,
|
130
130
|
leiden_resolution: float = 1.0,
|
@@ -136,7 +136,7 @@ class NeighborhoodsAPI:
|
|
136
136
|
|
137
137
|
Args:
|
138
138
|
network (nx.Graph): The network graph.
|
139
|
-
|
139
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
140
140
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
141
141
|
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
142
142
|
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
@@ -145,7 +145,7 @@ class NeighborhoodsAPI:
|
|
145
145
|
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
146
146
|
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
147
147
|
Defaults to 0.5.
|
148
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
148
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
149
149
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
150
150
|
|
151
151
|
Returns:
|
@@ -155,7 +155,7 @@ class NeighborhoodsAPI:
|
|
155
155
|
# Compute neighborhood significance using the hypergeometric test
|
156
156
|
return self._load_neighborhoods_by_statistical_test(
|
157
157
|
network=network,
|
158
|
-
|
158
|
+
annotation=annotation,
|
159
159
|
distance_metric=distance_metric,
|
160
160
|
louvain_resolution=louvain_resolution,
|
161
161
|
leiden_resolution=leiden_resolution,
|
@@ -166,10 +166,10 @@ class NeighborhoodsAPI:
|
|
166
166
|
statistical_test_function=compute_hypergeom_test,
|
167
167
|
)
|
168
168
|
|
169
|
-
def
|
169
|
+
def load_neighborhoods_permutation(
|
170
170
|
self,
|
171
171
|
network: nx.Graph,
|
172
|
-
|
172
|
+
annotation: Dict[str, Any],
|
173
173
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
174
174
|
louvain_resolution: float = 0.1,
|
175
175
|
leiden_resolution: float = 1.0,
|
@@ -184,7 +184,7 @@ class NeighborhoodsAPI:
|
|
184
184
|
|
185
185
|
Args:
|
186
186
|
network (nx.Graph): The network graph.
|
187
|
-
|
187
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
188
188
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
189
189
|
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
190
190
|
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
@@ -194,7 +194,7 @@ class NeighborhoodsAPI:
|
|
194
194
|
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
195
195
|
Defaults to 0.5.
|
196
196
|
score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
|
197
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
197
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
198
198
|
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
199
199
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
200
200
|
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
@@ -210,7 +210,7 @@ class NeighborhoodsAPI:
|
|
210
210
|
# Compute neighborhood significance using the permutation test
|
211
211
|
return self._load_neighborhoods_by_statistical_test(
|
212
212
|
network=network,
|
213
|
-
|
213
|
+
annotation=annotation,
|
214
214
|
distance_metric=distance_metric,
|
215
215
|
louvain_resolution=louvain_resolution,
|
216
216
|
leiden_resolution=leiden_resolution,
|
@@ -224,10 +224,10 @@ class NeighborhoodsAPI:
|
|
224
224
|
max_workers=max_workers,
|
225
225
|
)
|
226
226
|
|
227
|
-
def
|
227
|
+
def load_neighborhoods_poisson(
|
228
228
|
self,
|
229
229
|
network: nx.Graph,
|
230
|
-
|
230
|
+
annotation: Dict[str, Any],
|
231
231
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
232
232
|
louvain_resolution: float = 0.1,
|
233
233
|
leiden_resolution: float = 1.0,
|
@@ -239,7 +239,7 @@ class NeighborhoodsAPI:
|
|
239
239
|
|
240
240
|
Args:
|
241
241
|
network (nx.Graph): The network graph.
|
242
|
-
|
242
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
243
243
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
244
244
|
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
245
245
|
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
@@ -248,7 +248,7 @@ class NeighborhoodsAPI:
|
|
248
248
|
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
249
249
|
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
250
250
|
Defaults to 0.5.
|
251
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
251
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
252
252
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
253
253
|
|
254
254
|
Returns:
|
@@ -258,7 +258,7 @@ class NeighborhoodsAPI:
|
|
258
258
|
# Compute neighborhood significance using the Poisson test
|
259
259
|
return self._load_neighborhoods_by_statistical_test(
|
260
260
|
network=network,
|
261
|
-
|
261
|
+
annotation=annotation,
|
262
262
|
distance_metric=distance_metric,
|
263
263
|
louvain_resolution=louvain_resolution,
|
264
264
|
leiden_resolution=leiden_resolution,
|
@@ -269,10 +269,10 @@ class NeighborhoodsAPI:
|
|
269
269
|
statistical_test_function=compute_poisson_test,
|
270
270
|
)
|
271
271
|
|
272
|
-
def
|
272
|
+
def load_neighborhoods_zscore(
|
273
273
|
self,
|
274
274
|
network: nx.Graph,
|
275
|
-
|
275
|
+
annotation: Dict[str, Any],
|
276
276
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
277
277
|
louvain_resolution: float = 0.1,
|
278
278
|
leiden_resolution: float = 1.0,
|
@@ -284,7 +284,7 @@ class NeighborhoodsAPI:
|
|
284
284
|
|
285
285
|
Args:
|
286
286
|
network (nx.Graph): The network graph.
|
287
|
-
|
287
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
288
288
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
289
289
|
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
290
290
|
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
@@ -293,7 +293,7 @@ class NeighborhoodsAPI:
|
|
293
293
|
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
294
294
|
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
295
295
|
Defaults to 0.5.
|
296
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
296
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
297
297
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
298
298
|
|
299
299
|
Returns:
|
@@ -303,7 +303,7 @@ class NeighborhoodsAPI:
|
|
303
303
|
# Compute neighborhood significance using the z-score test
|
304
304
|
return self._load_neighborhoods_by_statistical_test(
|
305
305
|
network=network,
|
306
|
-
|
306
|
+
annotation=annotation,
|
307
307
|
distance_metric=distance_metric,
|
308
308
|
louvain_resolution=louvain_resolution,
|
309
309
|
leiden_resolution=leiden_resolution,
|
@@ -317,7 +317,7 @@ class NeighborhoodsAPI:
|
|
317
317
|
def _load_neighborhoods_by_statistical_test(
|
318
318
|
self,
|
319
319
|
network: nx.Graph,
|
320
|
-
|
320
|
+
annotation: Dict[str, Any],
|
321
321
|
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
322
322
|
louvain_resolution: float = 0.1,
|
323
323
|
leiden_resolution: float = 1.0,
|
@@ -332,7 +332,7 @@ class NeighborhoodsAPI:
|
|
332
332
|
|
333
333
|
Args:
|
334
334
|
network (nx.Graph): The input network graph.
|
335
|
-
|
335
|
+
annotation (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
|
336
336
|
distance_metric (Union[str, List, Tuple, np.ndarray], optional): The distance metric or clustering method to define neighborhoods.
|
337
337
|
Can be a string specifying one method (e.g., 'louvain', 'leiden') or a collection of methods.
|
338
338
|
Defaults to "louvain".
|
@@ -340,13 +340,13 @@ class NeighborhoodsAPI:
|
|
340
340
|
leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
|
341
341
|
fraction_shortest_edges (Union[float, List, Tuple, np.ndarray], optional): Fraction of shortest edges to consider for creating subgraphs.
|
342
342
|
Can be a single value or a collection of thresholds for flexibility. Defaults to 0.5.
|
343
|
-
null_distribution (str, optional): The type of null distribution to use ('network' or '
|
343
|
+
null_distribution (str, optional): The type of null distribution to use ('network' or 'annotation').
|
344
344
|
Defaults to "network".
|
345
345
|
random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
|
346
346
|
statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "poisson").
|
347
347
|
Used for logging and debugging. Defaults to "hypergeom".
|
348
348
|
statistical_test_function (Any, optional): The function implementing the statistical test.
|
349
|
-
It should accept neighborhoods,
|
349
|
+
It should accept neighborhoods, annotation, null distribution, and additional kwargs.
|
350
350
|
Defaults to `compute_hypergeom_test`.
|
351
351
|
**kwargs: Additional parameters to be passed to the statistical test function.
|
352
352
|
|
@@ -381,7 +381,7 @@ class NeighborhoodsAPI:
|
|
381
381
|
# Apply statistical test function to compute neighborhood significance
|
382
382
|
neighborhood_significance = statistical_test_function(
|
383
383
|
neighborhoods=neighborhoods,
|
384
|
-
|
384
|
+
annotation=annotation["matrix"],
|
385
385
|
null_distribution=null_distribution,
|
386
386
|
**kwargs,
|
387
387
|
)
|
risk/neighborhoods/community.py
CHANGED
@@ -8,7 +8,7 @@ import igraph as ig
|
|
8
8
|
import markov_clustering as mc
|
9
9
|
import networkx as nx
|
10
10
|
import numpy as np
|
11
|
-
from leidenalg import
|
11
|
+
from leidenalg import RBConfigurationVertexPartition, find_partition
|
12
12
|
from networkx.algorithms.community import greedy_modularity_communities
|
13
13
|
from scipy.sparse import csr_matrix
|
14
14
|
|
@@ -27,6 +27,10 @@ def calculate_greedy_modularity_neighborhoods(
|
|
27
27
|
|
28
28
|
Returns:
|
29
29
|
csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
|
30
|
+
|
31
|
+
Raises:
|
32
|
+
ValueError: If the subgraph has no edges after filtering.
|
33
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
30
34
|
"""
|
31
35
|
# Create a subgraph with the shortest edges based on the rank fraction
|
32
36
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -67,6 +71,10 @@ def calculate_label_propagation_neighborhoods(
|
|
67
71
|
|
68
72
|
Returns:
|
69
73
|
csr_matrix: A binary neighborhood matrix (CSR) on Label Propagation.
|
74
|
+
|
75
|
+
Raises:
|
76
|
+
ValueError: If the subgraph has no edges after filtering.
|
77
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
70
78
|
"""
|
71
79
|
# Create a subgraph with the shortest edges based on the rank fraction
|
72
80
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -115,6 +123,10 @@ def calculate_leiden_neighborhoods(
|
|
115
123
|
|
116
124
|
Returns:
|
117
125
|
csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
|
126
|
+
|
127
|
+
Raises:
|
128
|
+
ValueError: If the subgraph has no edges after filtering.
|
129
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
118
130
|
"""
|
119
131
|
# Create a subgraph with the shortest edges based on the rank fraction
|
120
132
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -167,6 +179,10 @@ def calculate_louvain_neighborhoods(
|
|
167
179
|
|
168
180
|
Returns:
|
169
181
|
csr_matrix: A binary neighborhood matrix in CSR format.
|
182
|
+
|
183
|
+
Raises:
|
184
|
+
ValueError: If the subgraph has no edges after filtering.
|
185
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
170
186
|
"""
|
171
187
|
# Create a subgraph with the shortest edges based on the rank fraction
|
172
188
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -215,9 +231,10 @@ def calculate_markov_clustering_neighborhoods(
|
|
215
231
|
Returns:
|
216
232
|
csr_matrix: A binary neighborhood matrix (CSR) on Markov Clustering.
|
217
233
|
|
218
|
-
|
219
|
-
|
220
|
-
|
234
|
+
Raises:
|
235
|
+
ValueError: If the subgraph has no edges after filtering.
|
236
|
+
RuntimeError: If MCL fails to run.
|
237
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
221
238
|
"""
|
222
239
|
# Create a subgraph with the shortest edges based on the rank fraction
|
223
240
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -283,6 +300,10 @@ def calculate_spinglass_neighborhoods(
|
|
283
300
|
|
284
301
|
Returns:
|
285
302
|
csr_matrix: A binary neighborhood matrix (CSR) based on Spinglass communities.
|
303
|
+
|
304
|
+
Raises:
|
305
|
+
ValueError: If the subgraph has no edges after filtering.
|
306
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
286
307
|
"""
|
287
308
|
# Create a subgraph with the shortest edges based on the rank fraction
|
288
309
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -343,6 +364,10 @@ def calculate_walktrap_neighborhoods(
|
|
343
364
|
|
344
365
|
Returns:
|
345
366
|
csr_matrix: A binary neighborhood matrix (CSR) on Walktrap communities.
|
367
|
+
|
368
|
+
Raises:
|
369
|
+
ValueError: If the subgraph has no edges after filtering.
|
370
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
346
371
|
"""
|
347
372
|
# Create a subgraph with the shortest edges based on the rank fraction
|
348
373
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -384,6 +409,10 @@ def _create_percentile_limited_subgraph(G: nx.Graph, fraction_shortest_edges: fl
|
|
384
409
|
Returns:
|
385
410
|
nx.Graph: A subgraph with nodes and edges where the edges are within the shortest
|
386
411
|
specified rank fraction.
|
412
|
+
|
413
|
+
Raises:
|
414
|
+
ValueError: If no edges with 'length' attributes are found in the graph.
|
415
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
387
416
|
"""
|
388
417
|
# Step 1: Extract edges with their lengths
|
389
418
|
edges_with_length = [(u, v, d) for u, v, d in G.edges(data=True) if "length" in d]
|