risk-network 0.0.8b18__py3-none-any.whl → 0.0.9b26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. risk/__init__.py +2 -2
  2. risk/annotations/__init__.py +2 -2
  3. risk/annotations/annotations.py +133 -72
  4. risk/annotations/io.py +50 -34
  5. risk/log/__init__.py +4 -2
  6. risk/log/{config.py → console.py} +5 -3
  7. risk/log/{params.py → parameters.py} +21 -46
  8. risk/neighborhoods/__init__.py +3 -5
  9. risk/neighborhoods/api.py +446 -0
  10. risk/neighborhoods/community.py +281 -96
  11. risk/neighborhoods/domains.py +92 -38
  12. risk/neighborhoods/neighborhoods.py +210 -149
  13. risk/network/__init__.py +1 -3
  14. risk/network/geometry.py +69 -58
  15. risk/network/graph/__init__.py +6 -0
  16. risk/network/graph/api.py +194 -0
  17. risk/network/graph/network.py +269 -0
  18. risk/network/graph/summary.py +254 -0
  19. risk/network/io.py +58 -48
  20. risk/network/plotter/__init__.py +6 -0
  21. risk/network/plotter/api.py +54 -0
  22. risk/network/{plot → plotter}/canvas.py +80 -26
  23. risk/network/{plot → plotter}/contour.py +43 -34
  24. risk/network/{plot → plotter}/labels.py +123 -113
  25. risk/network/plotter/network.py +424 -0
  26. risk/network/plotter/utils/colors.py +416 -0
  27. risk/network/plotter/utils/layout.py +94 -0
  28. risk/risk.py +11 -469
  29. risk/stats/__init__.py +8 -4
  30. risk/stats/binom.py +51 -0
  31. risk/stats/chi2.py +69 -0
  32. risk/stats/hypergeom.py +28 -18
  33. risk/stats/permutation/__init__.py +1 -1
  34. risk/stats/permutation/permutation.py +45 -39
  35. risk/stats/permutation/test_functions.py +25 -17
  36. risk/stats/poisson.py +17 -11
  37. risk/stats/stats.py +20 -16
  38. risk/stats/zscore.py +68 -0
  39. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
  40. risk_network-0.0.9b26.dist-info/RECORD +44 -0
  41. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
  42. risk/network/graph.py +0 -159
  43. risk/network/plot/__init__.py +0 -6
  44. risk/network/plot/network.py +0 -282
  45. risk/network/plot/plotter.py +0 -137
  46. risk/network/plot/utils/color.py +0 -353
  47. risk/network/plot/utils/layout.py +0 -53
  48. risk_network-0.0.8b18.dist-info/RECORD +0 -37
  49. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
  50. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0
@@ -1,50 +1,22 @@
1
1
  """
2
- risk/log/params
3
- ~~~~~~~~~~~~~~~
2
+ risk/log/parameters
3
+ ~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import csv
7
7
  import json
8
8
  import warnings
9
9
  from datetime import datetime
10
- from functools import wraps
11
10
  from typing import Any, Dict
12
11
 
13
12
  import numpy as np
14
13
 
15
- from .config import logger, log_header
14
+ from risk.log.console import logger, log_header
16
15
 
17
16
  # Suppress all warnings - this is to resolve warnings from multiprocessing
18
17
  warnings.filterwarnings("ignore")
19
18
 
20
19
 
21
- def _safe_param_export(func):
22
- """A decorator to wrap parameter export functions in a try-except block for safe execution.
23
-
24
- Args:
25
- func (function): The function to be wrapped.
26
-
27
- Returns:
28
- function: The wrapped function with error handling.
29
- """
30
-
31
- @wraps(func)
32
- def wrapper(*args, **kwargs):
33
- try:
34
- result = func(*args, **kwargs)
35
- filepath = (
36
- kwargs.get("filepath") or args[1]
37
- ) # Assuming filepath is always the second argument
38
- logger.info(f"Parameters successfully exported to filepath: {filepath}")
39
- return result
40
- except Exception as e:
41
- filepath = kwargs.get("filepath") or args[1]
42
- logger.error(f"An error occurred while exporting parameters to {filepath}: {e}")
43
- return None
44
-
45
- return wrapper
46
-
47
-
48
20
  class Params:
49
21
  """Handles the storage and logging of various parameters for network analysis.
50
22
 
@@ -106,7 +78,6 @@ class Params:
106
78
  """
107
79
  self.plotter = {**self.plotter, **kwargs}
108
80
 
109
- @_safe_param_export
110
81
  def to_csv(self, filepath: str) -> None:
111
82
  """Export the parameters to a CSV file.
112
83
 
@@ -116,7 +87,7 @@ class Params:
116
87
  # Load the parameter dictionary
117
88
  params = self.load()
118
89
  # Open the file in write mode
119
- with open(filepath, "w", newline="") as csv_file:
90
+ with open(filepath, "w", encoding="utf-8", newline="") as csv_file:
120
91
  writer = csv.writer(csv_file)
121
92
  # Write the header
122
93
  writer.writerow(["parent_key", "child_key", "value"])
@@ -128,17 +99,19 @@ class Params:
128
99
  else:
129
100
  writer.writerow([parent_key, "", parent_value])
130
101
 
131
- @_safe_param_export
102
+ logger.info(f"Parameters exported to CSV file: {filepath}")
103
+
132
104
  def to_json(self, filepath: str) -> None:
133
105
  """Export the parameters to a JSON file.
134
106
 
135
107
  Args:
136
108
  filepath (str): The path where the JSON file will be saved.
137
109
  """
138
- with open(filepath, "w") as json_file:
110
+ with open(filepath, "w", encoding="utf-8") as json_file:
139
111
  json.dump(self.load(), json_file, indent=4)
140
112
 
141
- @_safe_param_export
113
+ logger.info(f"Parameters exported to JSON file: {filepath}")
114
+
142
115
  def to_txt(self, filepath: str) -> None:
143
116
  """Export the parameters to a text file.
144
117
 
@@ -148,18 +121,20 @@ class Params:
148
121
  # Load the parameter dictionary
149
122
  params = self.load()
150
123
  # Open the file in write mode
151
- with open(filepath, "w") as txt_file:
124
+ with open(filepath, "w", encoding="utf-8") as txt_file:
152
125
  for key, value in params.items():
153
126
  # Write the key and its corresponding value
154
127
  txt_file.write(f"{key}: {value}\n")
155
128
  # Add a blank line after each entry
156
129
  txt_file.write("\n")
157
130
 
131
+ logger.info(f"Parameters exported to text file: {filepath}")
132
+
158
133
  def load(self) -> Dict[str, Any]:
159
134
  """Load and process various parameters, converting any np.ndarray values to lists.
160
135
 
161
136
  Returns:
162
- dict: A dictionary containing the processed parameters.
137
+ Dict[str, Any]: A dictionary containing the processed parameters.
163
138
  """
164
139
  log_header("Loading parameters")
165
140
  return _convert_ndarray_to_list(
@@ -174,24 +149,24 @@ class Params:
174
149
  )
175
150
 
176
151
 
177
- def _convert_ndarray_to_list(d: Any) -> Any:
152
+ def _convert_ndarray_to_list(d: Dict[str, Any]) -> Dict[str, Any]:
178
153
  """Recursively convert all np.ndarray values in the dictionary to lists.
179
154
 
180
155
  Args:
181
- d (dict): The dictionary to process.
156
+ d (Dict[str, Any]): The dictionary to process.
182
157
 
183
158
  Returns:
184
- dict: The processed dictionary with np.ndarray values converted to lists.
159
+ Dict[str, Any]: The processed dictionary with np.ndarray values converted to lists.
185
160
  """
186
161
  if isinstance(d, dict):
187
162
  # Recursively process each value in the dictionary
188
163
  return {k: _convert_ndarray_to_list(v) for k, v in d.items()}
189
- elif isinstance(d, list):
164
+ if isinstance(d, list):
190
165
  # Recursively process each item in the list
191
166
  return [_convert_ndarray_to_list(v) for v in d]
192
- elif isinstance(d, np.ndarray):
167
+ if isinstance(d, np.ndarray):
193
168
  # Convert numpy arrays to lists
194
169
  return d.tolist()
195
- else:
196
- # Return the value unchanged if it's not a dict, list, or ndarray
197
- return d
170
+
171
+ # Return the value unchanged if it's not a dict, List, or ndarray
172
+ return d
@@ -3,8 +3,6 @@ risk/neighborhoods
3
3
  ~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from .domains import define_domains, trim_domains_and_top_annotations
7
- from .neighborhoods import (
8
- get_network_neighborhoods,
9
- process_neighborhoods,
10
- )
6
+ from risk.neighborhoods.domains import define_domains, trim_domains
7
+ from risk.neighborhoods.api import NeighborhoodsAPI
8
+ from risk.neighborhoods.neighborhoods import process_neighborhoods
@@ -0,0 +1,446 @@
1
+ """
2
+ risk/neighborhoods/api
3
+ ~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ import copy
7
+ from typing import Any, Dict, List, Tuple, Union
8
+
9
+ import networkx as nx
10
+ import numpy as np
11
+ from scipy.sparse import csr_matrix
12
+
13
+ from risk.log import logger, log_header, params
14
+ from risk.neighborhoods.neighborhoods import get_network_neighborhoods
15
+ from risk.stats import (
16
+ compute_binom_test,
17
+ compute_chi2_test,
18
+ compute_hypergeom_test,
19
+ compute_permutation_test,
20
+ compute_poisson_test,
21
+ compute_zscore_test,
22
+ )
23
+
24
+
25
+ class NeighborhoodsAPI:
26
+ """Handles the loading of statistical results and annotation significance for neighborhoods.
27
+
28
+ The NeighborhoodsAPI class provides methods to load neighborhood results from statistical tests.
29
+ """
30
+
31
+ def __init__() -> None:
32
+ pass
33
+
34
+ def load_neighborhoods_by_binom(
35
+ self,
36
+ network: nx.Graph,
37
+ annotations: Dict[str, Any],
38
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
39
+ louvain_resolution: float = 0.1,
40
+ leiden_resolution: float = 1.0,
41
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
42
+ null_distribution: str = "network",
43
+ random_seed: int = 888,
44
+ ) -> Dict[str, Any]:
45
+ """Load significant neighborhoods for the network using the binomial test.
46
+
47
+ Args:
48
+ network (nx.Graph): The network graph.
49
+ annotations (Dict[str, Any]): The annotations associated with the network.
50
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
51
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
52
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
53
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
54
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
55
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
56
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
57
+ Defaults to 0.5.
58
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
59
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
60
+
61
+ Returns:
62
+ Dict[str, Any]: Computed significance of neighborhoods.
63
+ """
64
+ log_header("Running binomial test")
65
+ # Compute neighborhood significance using the binomial test
66
+ return self._load_neighborhoods_by_statistical_test(
67
+ network=network,
68
+ annotations=annotations,
69
+ distance_metric=distance_metric,
70
+ louvain_resolution=louvain_resolution,
71
+ leiden_resolution=leiden_resolution,
72
+ fraction_shortest_edges=fraction_shortest_edges,
73
+ null_distribution=null_distribution,
74
+ random_seed=random_seed,
75
+ statistical_test_key="binom",
76
+ statistical_test_function=compute_binom_test,
77
+ )
78
+
79
+ def load_neighborhoods_by_chi2(
80
+ self,
81
+ network: nx.Graph,
82
+ annotations: Dict[str, Any],
83
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
84
+ louvain_resolution: float = 0.1,
85
+ leiden_resolution: float = 1.0,
86
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
87
+ null_distribution: str = "network",
88
+ random_seed: int = 888,
89
+ ) -> Dict[str, Any]:
90
+ """Load significant neighborhoods for the network using the chi-squared test.
91
+
92
+ Args:
93
+ network (nx.Graph): The network graph.
94
+ annotations (Dict[str, Any]): The annotations associated with the network.
95
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
96
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
97
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
98
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
99
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
100
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
101
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
102
+ Defaults to 0.5.
103
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
104
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
105
+
106
+ Returns:
107
+ Dict[str, Any]: Computed significance of neighborhoods.
108
+ """
109
+ log_header("Running chi-squared test")
110
+ # Compute neighborhood significance using the chi-squared test
111
+ return self._load_neighborhoods_by_statistical_test(
112
+ network=network,
113
+ annotations=annotations,
114
+ distance_metric=distance_metric,
115
+ louvain_resolution=louvain_resolution,
116
+ leiden_resolution=leiden_resolution,
117
+ fraction_shortest_edges=fraction_shortest_edges,
118
+ null_distribution=null_distribution,
119
+ random_seed=random_seed,
120
+ statistical_test_key="chi2",
121
+ statistical_test_function=compute_chi2_test,
122
+ )
123
+
124
+ def load_neighborhoods_by_hypergeom(
125
+ self,
126
+ network: nx.Graph,
127
+ annotations: Dict[str, Any],
128
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
129
+ louvain_resolution: float = 0.1,
130
+ leiden_resolution: float = 1.0,
131
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
132
+ null_distribution: str = "network",
133
+ random_seed: int = 888,
134
+ ) -> Dict[str, Any]:
135
+ """Load significant neighborhoods for the network using the hypergeometric test.
136
+
137
+ Args:
138
+ network (nx.Graph): The network graph.
139
+ annotations (Dict[str, Any]): The annotations associated with the network.
140
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
141
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
142
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
143
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
144
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
145
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
146
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
147
+ Defaults to 0.5.
148
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
149
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
150
+
151
+ Returns:
152
+ Dict[str, Any]: Computed significance of neighborhoods.
153
+ """
154
+ log_header("Running hypergeometric test")
155
+ # Compute neighborhood significance using the hypergeometric test
156
+ return self._load_neighborhoods_by_statistical_test(
157
+ network=network,
158
+ annotations=annotations,
159
+ distance_metric=distance_metric,
160
+ louvain_resolution=louvain_resolution,
161
+ leiden_resolution=leiden_resolution,
162
+ fraction_shortest_edges=fraction_shortest_edges,
163
+ null_distribution=null_distribution,
164
+ random_seed=random_seed,
165
+ statistical_test_key="hypergeom",
166
+ statistical_test_function=compute_hypergeom_test,
167
+ )
168
+
169
+ def load_neighborhoods_by_permutation(
170
+ self,
171
+ network: nx.Graph,
172
+ annotations: Dict[str, Any],
173
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
174
+ louvain_resolution: float = 0.1,
175
+ leiden_resolution: float = 1.0,
176
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
177
+ score_metric: str = "sum",
178
+ null_distribution: str = "network",
179
+ num_permutations: int = 1000,
180
+ random_seed: int = 888,
181
+ max_workers: int = 1,
182
+ ) -> Dict[str, Any]:
183
+ """Load significant neighborhoods for the network using the permutation test.
184
+
185
+ Args:
186
+ network (nx.Graph): The network graph.
187
+ annotations (Dict[str, Any]): The annotations associated with the network.
188
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
189
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
190
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
191
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
192
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
193
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
194
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
195
+ Defaults to 0.5.
196
+ score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
197
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
198
+ num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
199
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
200
+ max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
201
+
202
+ Returns:
203
+ Dict[str, Any]: Computed significance of neighborhoods.
204
+ """
205
+ log_header("Running permutation test")
206
+ # Log and display permutation test settings, which is unique to this test
207
+ logger.debug(f"Neighborhood scoring metric: '{score_metric}'")
208
+ logger.debug(f"Number of permutations: {num_permutations}")
209
+ logger.debug(f"Maximum workers: {max_workers}")
210
+ # Compute neighborhood significance using the permutation test
211
+ return self._load_neighborhoods_by_statistical_test(
212
+ network=network,
213
+ annotations=annotations,
214
+ distance_metric=distance_metric,
215
+ louvain_resolution=louvain_resolution,
216
+ leiden_resolution=leiden_resolution,
217
+ fraction_shortest_edges=fraction_shortest_edges,
218
+ null_distribution=null_distribution,
219
+ random_seed=random_seed,
220
+ statistical_test_key="permutation",
221
+ statistical_test_function=compute_permutation_test,
222
+ score_metric=score_metric,
223
+ num_permutations=num_permutations,
224
+ max_workers=max_workers,
225
+ )
226
+
227
+ def load_neighborhoods_by_poisson(
228
+ self,
229
+ network: nx.Graph,
230
+ annotations: Dict[str, Any],
231
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
232
+ louvain_resolution: float = 0.1,
233
+ leiden_resolution: float = 1.0,
234
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
235
+ null_distribution: str = "network",
236
+ random_seed: int = 888,
237
+ ) -> Dict[str, Any]:
238
+ """Load significant neighborhoods for the network using the Poisson test.
239
+
240
+ Args:
241
+ network (nx.Graph): The network graph.
242
+ annotations (Dict[str, Any]): The annotations associated with the network.
243
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
244
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
245
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
246
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
247
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
248
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
249
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
250
+ Defaults to 0.5.
251
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
252
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
253
+
254
+ Returns:
255
+ Dict[str, Any]: Computed significance of neighborhoods.
256
+ """
257
+ log_header("Running Poisson test")
258
+ # Compute neighborhood significance using the Poisson test
259
+ return self._load_neighborhoods_by_statistical_test(
260
+ network=network,
261
+ annotations=annotations,
262
+ distance_metric=distance_metric,
263
+ louvain_resolution=louvain_resolution,
264
+ leiden_resolution=leiden_resolution,
265
+ fraction_shortest_edges=fraction_shortest_edges,
266
+ null_distribution=null_distribution,
267
+ random_seed=random_seed,
268
+ statistical_test_key="poisson",
269
+ statistical_test_function=compute_poisson_test,
270
+ )
271
+
272
+ def load_neighborhoods_by_zscore(
273
+ self,
274
+ network: nx.Graph,
275
+ annotations: Dict[str, Any],
276
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
277
+ louvain_resolution: float = 0.1,
278
+ leiden_resolution: float = 1.0,
279
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
280
+ null_distribution: str = "network",
281
+ random_seed: int = 888,
282
+ ) -> Dict[str, Any]:
283
+ """Load significant neighborhoods for the network using the Z-score test.
284
+
285
+ Args:
286
+ network (nx.Graph): The network graph.
287
+ annotations (Dict[str, Any]): The annotations associated with the network.
288
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
289
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
290
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
291
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
292
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
293
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
294
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
295
+ Defaults to 0.5.
296
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
297
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
298
+
299
+ Returns:
300
+ Dict[str, Any]: Computed significance of neighborhoods.
301
+ """
302
+ log_header("Running Z-score test")
303
+ # Compute neighborhood significance using the Z-score test
304
+ return self._load_neighborhoods_by_statistical_test(
305
+ network=network,
306
+ annotations=annotations,
307
+ distance_metric=distance_metric,
308
+ louvain_resolution=louvain_resolution,
309
+ leiden_resolution=leiden_resolution,
310
+ fraction_shortest_edges=fraction_shortest_edges,
311
+ null_distribution=null_distribution,
312
+ random_seed=random_seed,
313
+ statistical_test_key="zscore",
314
+ statistical_test_function=compute_zscore_test,
315
+ )
316
+
317
+ def _load_neighborhoods_by_statistical_test(
318
+ self,
319
+ network: nx.Graph,
320
+ annotations: Dict[str, Any],
321
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
322
+ louvain_resolution: float = 0.1,
323
+ leiden_resolution: float = 1.0,
324
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
325
+ null_distribution: str = "network",
326
+ random_seed: int = 888,
327
+ statistical_test_key: str = "hypergeom",
328
+ statistical_test_function: Any = compute_hypergeom_test,
329
+ **kwargs,
330
+ ):
331
+ """Load and compute significant neighborhoods for the network using a specified statistical test.
332
+
333
+ Args:
334
+ network (nx.Graph): The input network graph.
335
+ annotations (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
336
+ distance_metric (Union[str, List, Tuple, np.ndarray], optional): The distance metric or clustering method to define neighborhoods.
337
+ Can be a string specifying one method (e.g., 'louvain', 'leiden') or a collection of methods.
338
+ Defaults to "louvain".
339
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
340
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
341
+ fraction_shortest_edges (Union[float, List, Tuple, np.ndarray], optional): Fraction of shortest edges to consider for creating subgraphs.
342
+ Can be a single value or a collection of thresholds for flexibility. Defaults to 0.5.
343
+ null_distribution (str, optional): The type of null distribution to use ('network' or 'annotations').
344
+ Defaults to "network".
345
+ random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
346
+ statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "poisson").
347
+ Used for logging and debugging. Defaults to "hypergeom".
348
+ statistical_test_function (Any, optional): The function implementing the statistical test.
349
+ It should accept neighborhoods, annotations, null distribution, and additional kwargs.
350
+ Defaults to `compute_hypergeom_test`.
351
+ **kwargs: Additional parameters to be passed to the statistical test function.
352
+
353
+ Returns:
354
+ Dict[str, Any]: A dictionary containing the computed significance values for neighborhoods.
355
+ """
356
+ # Log null distribution type
357
+ logger.debug(f"Null distribution: '{null_distribution}'")
358
+ # Log neighborhood analysis parameters
359
+ params.log_neighborhoods(
360
+ distance_metric=distance_metric,
361
+ louvain_resolution=louvain_resolution,
362
+ leiden_resolution=leiden_resolution,
363
+ fraction_shortest_edges=fraction_shortest_edges,
364
+ statistical_test_function=statistical_test_key,
365
+ null_distribution=null_distribution,
366
+ random_seed=random_seed,
367
+ **kwargs,
368
+ )
369
+
370
+ # Make a copy of the network to avoid modifying the original
371
+ network = copy.deepcopy(network)
372
+ # Load neighborhoods based on the network and distance metric
373
+ neighborhoods = self._load_neighborhoods(
374
+ network,
375
+ distance_metric,
376
+ louvain_resolution=louvain_resolution,
377
+ leiden_resolution=leiden_resolution,
378
+ fraction_shortest_edges=fraction_shortest_edges,
379
+ random_seed=random_seed,
380
+ )
381
+ # Apply statistical test function to compute neighborhood significance
382
+ neighborhood_significance = statistical_test_function(
383
+ neighborhoods=neighborhoods,
384
+ annotations=annotations["matrix"],
385
+ null_distribution=null_distribution,
386
+ **kwargs,
387
+ )
388
+
389
+ # Return the computed neighborhood significance
390
+ return neighborhood_significance
391
+
392
+ def _load_neighborhoods(
393
+ self,
394
+ network: nx.Graph,
395
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
396
+ louvain_resolution: float = 0.1,
397
+ leiden_resolution: float = 1.0,
398
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
399
+ random_seed: int = 888,
400
+ ) -> csr_matrix:
401
+ """Load significant neighborhoods for the network.
402
+
403
+ Args:
404
+ network (nx.Graph): The network graph.
405
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
406
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
407
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
408
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
409
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
410
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
411
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
412
+ Defaults to 0.5.
413
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
414
+
415
+ Returns:
416
+ csr_matrix: Sparse neighborhood matrix calculated based on the selected distance metric.
417
+ """
418
+ # Display the chosen distance metric
419
+ if distance_metric == "louvain":
420
+ for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
421
+ elif distance_metric == "leiden":
422
+ for_print_distance_metric = f"leiden (resolution={leiden_resolution})"
423
+ else:
424
+ for_print_distance_metric = distance_metric
425
+
426
+ # Log and display neighborhood settings
427
+ logger.debug(f"Distance metric: '{for_print_distance_metric}'")
428
+ logger.debug(f"Edge length threshold: {fraction_shortest_edges}")
429
+ logger.debug(f"Random seed: {random_seed}")
430
+
431
+ # Compute neighborhoods
432
+ neighborhoods = get_network_neighborhoods(
433
+ network,
434
+ distance_metric,
435
+ fraction_shortest_edges,
436
+ louvain_resolution=louvain_resolution,
437
+ leiden_resolution=leiden_resolution,
438
+ random_seed=random_seed,
439
+ )
440
+
441
+ # Ensure the neighborhood matrix is in sparse format
442
+ if not isinstance(neighborhoods, csr_matrix):
443
+ neighborhoods = csr_matrix(neighborhoods)
444
+
445
+ # Return the sparse neighborhood matrix
446
+ return neighborhoods