risk-network 0.0.9b23__py3-none-any.whl → 0.0.9b25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. risk/__init__.py +1 -1
  2. risk/annotations/__init__.py +2 -2
  3. risk/annotations/annotations.py +9 -9
  4. risk/annotations/io.py +0 -2
  5. risk/log/__init__.py +2 -2
  6. risk/neighborhoods/__init__.py +3 -5
  7. risk/neighborhoods/api.py +446 -0
  8. risk/neighborhoods/community.py +4 -2
  9. risk/neighborhoods/domains.py +28 -1
  10. risk/network/__init__.py +1 -3
  11. risk/network/graph/__init__.py +1 -1
  12. risk/network/graph/api.py +194 -0
  13. risk/network/graph/summary.py +6 -2
  14. risk/network/io.py +0 -2
  15. risk/network/plotter/__init__.py +6 -0
  16. risk/network/plotter/api.py +54 -0
  17. risk/network/{plot → plotter}/canvas.py +3 -3
  18. risk/network/{plot → plotter}/contour.py +2 -2
  19. risk/network/{plot → plotter}/labels.py +3 -3
  20. risk/network/{plot → plotter}/network.py +136 -3
  21. risk/network/{plot → plotter}/utils/colors.py +15 -6
  22. risk/risk.py +10 -483
  23. risk/stats/__init__.py +8 -4
  24. risk/stats/binom.py +51 -0
  25. risk/stats/chi2.py +69 -0
  26. risk/stats/hypergeom.py +27 -17
  27. risk/stats/permutation/__init__.py +1 -1
  28. risk/stats/permutation/permutation.py +44 -55
  29. risk/stats/permutation/test_functions.py +25 -17
  30. risk/stats/poisson.py +15 -9
  31. risk/stats/zscore.py +68 -0
  32. {risk_network-0.0.9b23.dist-info → risk_network-0.0.9b25.dist-info}/METADATA +1 -1
  33. risk_network-0.0.9b25.dist-info/RECORD +44 -0
  34. risk/network/plot/__init__.py +0 -6
  35. risk/network/plot/plotter.py +0 -143
  36. risk_network-0.0.9b23.dist-info/RECORD +0 -39
  37. /risk/network/{plot → plotter}/utils/layout.py +0 -0
  38. {risk_network-0.0.9b23.dist-info → risk_network-0.0.9b25.dist-info}/LICENSE +0 -0
  39. {risk_network-0.0.9b23.dist-info → risk_network-0.0.9b25.dist-info}/WHEEL +0 -0
  40. {risk_network-0.0.9b23.dist-info → risk_network-0.0.9b25.dist-info}/top_level.txt +0 -0
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.9-beta.23"
10
+ __version__ = "0.0.9-beta.25"
@@ -3,5 +3,5 @@ risk/annotations
3
3
  ~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from .annotations import define_top_annotations, get_weighted_description
7
- from .io import AnnotationsIO
6
+ from risk.annotations.annotations import define_top_annotations, get_weighted_description
7
+ from risk.annotations.io import AnnotationsIO
@@ -16,6 +16,7 @@ from nltk.tokenize import word_tokenize
16
16
  from nltk.corpus import stopwords
17
17
 
18
18
  from risk.log import logger
19
+ from scipy.sparse import csr_matrix
19
20
 
20
21
 
21
22
  def _setup_nltk():
@@ -47,17 +48,15 @@ def load_annotations(
47
48
  annotations_input (Dict[str, Any]): A dictionary with annotations.
48
49
  min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
49
50
  term to be included. Defaults to 2.
51
+ use_sparse (bool, optional): Whether to return the annotations matrix as a sparse matrix. Defaults to True.
50
52
 
51
53
  Returns:
52
- Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
54
+ Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the sparse binary annotations
55
+ matrix.
53
56
 
54
57
  Raises:
55
58
  ValueError: If no annotations are found for the nodes in the network.
56
59
  ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
57
-
58
- Comment:
59
- This function should be optimized to handle large networks and annotations efficiently. An attempt
60
- to use sparse matrices did not yield significant performance improvements, so it was not implemented.
61
60
  """
62
61
  # Flatten the dictionary to a list of tuples for easier DataFrame creation
63
62
  flattened_annotations = [
@@ -78,7 +77,6 @@ def load_annotations(
78
77
  raise ValueError("No terms found in the annotation file for the nodes in the network.")
79
78
 
80
79
  # Filter out annotations with fewer than min_nodes_per_term occurrences
81
- # This assists in reducing noise and focusing on more relevant annotations for statistical analysis
82
80
  num_terms_before_filtering = annotations_pivot.shape[1]
83
81
  annotations_pivot = annotations_pivot.loc[
84
82
  :, (annotations_pivot.sum(axis=0) >= min_nodes_per_term)
@@ -96,13 +94,15 @@ def load_annotations(
96
94
  # Extract ordered nodes and annotations
97
95
  ordered_nodes = tuple(annotations_pivot.index)
98
96
  ordered_annotations = tuple(annotations_pivot.columns)
99
- # Convert the annotations_pivot matrix to a numpy array and ensure it's binary
100
- annotations_pivot_numpy = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
97
+ # Convert the annotations_pivot matrix to a numpy array or sparse matrix
98
+ annotations_pivot_binary = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
99
+ # Convert the binary annotations matrix to a sparse matrix
100
+ annotations_pivot_binary = csr_matrix(annotations_pivot_binary)
101
101
 
102
102
  return {
103
103
  "ordered_nodes": ordered_nodes,
104
104
  "ordered_annotations": ordered_annotations,
105
- "matrix": annotations_pivot_numpy,
105
+ "matrix": annotations_pivot_binary,
106
106
  }
107
107
 
108
108
 
risk/annotations/io.py CHANGED
@@ -1,8 +1,6 @@
1
1
  """
2
2
  risk/annotations/io
3
3
  ~~~~~~~~~~~~~~~~~~~
4
-
5
- This file contains the code for the RISK class and command-line access.
6
4
  """
7
5
 
8
6
  import json
risk/log/__init__.py CHANGED
@@ -3,8 +3,8 @@ risk/log
3
3
  ~~~~~~~~
4
4
  """
5
5
 
6
- from .console import logger, log_header, set_global_verbosity
7
- from .parameters import Params
6
+ from risk.log.console import logger, log_header, set_global_verbosity
7
+ from risk.log.parameters import Params
8
8
 
9
9
  # Initialize the global parameters logger
10
10
  params = Params()
@@ -3,8 +3,6 @@ risk/neighborhoods
3
3
  ~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from .domains import define_domains, trim_domains
7
- from .neighborhoods import (
8
- get_network_neighborhoods,
9
- process_neighborhoods,
10
- )
6
+ from risk.neighborhoods.domains import define_domains, trim_domains
7
+ from risk.neighborhoods.api import NeighborhoodsAPI
8
+ from risk.neighborhoods.neighborhoods import process_neighborhoods
@@ -0,0 +1,446 @@
1
+ """
2
+ risk/neighborhoods/api
3
+ ~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ import copy
7
+ from typing import Any, Dict, List, Tuple, Union
8
+
9
+ import networkx as nx
10
+ import numpy as np
11
+ from scipy.sparse import csr_matrix
12
+
13
+ from risk.log import logger, log_header, params
14
+ from risk.neighborhoods.neighborhoods import get_network_neighborhoods
15
+ from risk.stats import (
16
+ compute_binom_test,
17
+ compute_chi2_test,
18
+ compute_hypergeom_test,
19
+ compute_permutation_test,
20
+ compute_poisson_test,
21
+ compute_zscore_test,
22
+ )
23
+
24
+
25
+ class NeighborhoodsAPI:
26
+ """Handles the loading of statistical results and annotation significance for neighborhoods.
27
+
28
+ The NeighborhoodsAPI class provides methods to load neighborhood results from statistical tests.
29
+ """
30
+
31
+ def __init__() -> None:
32
+ pass
33
+
34
+ def load_neighborhoods_by_binom(
35
+ self,
36
+ network: nx.Graph,
37
+ annotations: Dict[str, Any],
38
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
39
+ louvain_resolution: float = 0.1,
40
+ leiden_resolution: float = 1.0,
41
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
42
+ null_distribution: str = "network",
43
+ random_seed: int = 888,
44
+ ) -> Dict[str, Any]:
45
+ """Load significant neighborhoods for the network using the binomial test.
46
+
47
+ Args:
48
+ network (nx.Graph): The network graph.
49
+ annotations (Dict[str, Any]): The annotations associated with the network.
50
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
51
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
52
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
53
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
54
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
55
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
56
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
57
+ Defaults to 0.5.
58
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
59
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
60
+
61
+ Returns:
62
+ Dict[str, Any]: Computed significance of neighborhoods.
63
+ """
64
+ log_header("Running binomial test")
65
+ # Compute neighborhood significance using the binomial test
66
+ return self._load_neighborhoods_by_statistical_test(
67
+ network=network,
68
+ annotations=annotations,
69
+ distance_metric=distance_metric,
70
+ louvain_resolution=louvain_resolution,
71
+ leiden_resolution=leiden_resolution,
72
+ fraction_shortest_edges=fraction_shortest_edges,
73
+ null_distribution=null_distribution,
74
+ random_seed=random_seed,
75
+ statistical_test_key="binom",
76
+ statistical_test_function=compute_binom_test,
77
+ )
78
+
79
+ def load_neighborhoods_by_chi2(
80
+ self,
81
+ network: nx.Graph,
82
+ annotations: Dict[str, Any],
83
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
84
+ louvain_resolution: float = 0.1,
85
+ leiden_resolution: float = 1.0,
86
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
87
+ null_distribution: str = "network",
88
+ random_seed: int = 888,
89
+ ) -> Dict[str, Any]:
90
+ """Load significant neighborhoods for the network using the chi-squared test.
91
+
92
+ Args:
93
+ network (nx.Graph): The network graph.
94
+ annotations (Dict[str, Any]): The annotations associated with the network.
95
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
96
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
97
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
98
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
99
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
100
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
101
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
102
+ Defaults to 0.5.
103
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
104
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
105
+
106
+ Returns:
107
+ Dict[str, Any]: Computed significance of neighborhoods.
108
+ """
109
+ log_header("Running chi-squared test")
110
+ # Compute neighborhood significance using the chi-squared test
111
+ return self._load_neighborhoods_by_statistical_test(
112
+ network=network,
113
+ annotations=annotations,
114
+ distance_metric=distance_metric,
115
+ louvain_resolution=louvain_resolution,
116
+ leiden_resolution=leiden_resolution,
117
+ fraction_shortest_edges=fraction_shortest_edges,
118
+ null_distribution=null_distribution,
119
+ random_seed=random_seed,
120
+ statistical_test_key="chi2",
121
+ statistical_test_function=compute_chi2_test,
122
+ )
123
+
124
+ def load_neighborhoods_by_hypergeom(
125
+ self,
126
+ network: nx.Graph,
127
+ annotations: Dict[str, Any],
128
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
129
+ louvain_resolution: float = 0.1,
130
+ leiden_resolution: float = 1.0,
131
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
132
+ null_distribution: str = "network",
133
+ random_seed: int = 888,
134
+ ) -> Dict[str, Any]:
135
+ """Load significant neighborhoods for the network using the hypergeometric test.
136
+
137
+ Args:
138
+ network (nx.Graph): The network graph.
139
+ annotations (Dict[str, Any]): The annotations associated with the network.
140
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
141
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
142
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
143
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
144
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
145
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
146
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
147
+ Defaults to 0.5.
148
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
149
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
150
+
151
+ Returns:
152
+ Dict[str, Any]: Computed significance of neighborhoods.
153
+ """
154
+ log_header("Running hypergeometric test")
155
+ # Compute neighborhood significance using the hypergeometric test
156
+ return self._load_neighborhoods_by_statistical_test(
157
+ network=network,
158
+ annotations=annotations,
159
+ distance_metric=distance_metric,
160
+ louvain_resolution=louvain_resolution,
161
+ leiden_resolution=leiden_resolution,
162
+ fraction_shortest_edges=fraction_shortest_edges,
163
+ null_distribution=null_distribution,
164
+ random_seed=random_seed,
165
+ statistical_test_key="hypergeom",
166
+ statistical_test_function=compute_hypergeom_test,
167
+ )
168
+
169
+ def load_neighborhoods_by_permutation(
170
+ self,
171
+ network: nx.Graph,
172
+ annotations: Dict[str, Any],
173
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
174
+ louvain_resolution: float = 0.1,
175
+ leiden_resolution: float = 1.0,
176
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
177
+ score_metric: str = "sum",
178
+ null_distribution: str = "network",
179
+ num_permutations: int = 1000,
180
+ random_seed: int = 888,
181
+ max_workers: int = 1,
182
+ ) -> Dict[str, Any]:
183
+ """Load significant neighborhoods for the network using the permutation test.
184
+
185
+ Args:
186
+ network (nx.Graph): The network graph.
187
+ annotations (Dict[str, Any]): The annotations associated with the network.
188
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
189
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
190
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
191
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
192
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
193
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
194
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
195
+ Defaults to 0.5.
196
+ score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
197
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
198
+ num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
199
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
200
+ max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
201
+
202
+ Returns:
203
+ Dict[str, Any]: Computed significance of neighborhoods.
204
+ """
205
+ log_header("Running permutation test")
206
+ # Log and display permutation test settings, which is unique to this test
207
+ logger.debug(f"Neighborhood scoring metric: '{score_metric}'")
208
+ logger.debug(f"Number of permutations: {num_permutations}")
209
+ logger.debug(f"Maximum workers: {max_workers}")
210
+ # Compute neighborhood significance using the permutation test
211
+ return self._load_neighborhoods_by_statistical_test(
212
+ network=network,
213
+ annotations=annotations,
214
+ distance_metric=distance_metric,
215
+ louvain_resolution=louvain_resolution,
216
+ leiden_resolution=leiden_resolution,
217
+ fraction_shortest_edges=fraction_shortest_edges,
218
+ null_distribution=null_distribution,
219
+ random_seed=random_seed,
220
+ statistical_test_key="permutation",
221
+ statistical_test_function=compute_permutation_test,
222
+ score_metric=score_metric,
223
+ num_permutations=num_permutations,
224
+ max_workers=max_workers,
225
+ )
226
+
227
+ def load_neighborhoods_by_poisson(
228
+ self,
229
+ network: nx.Graph,
230
+ annotations: Dict[str, Any],
231
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
232
+ louvain_resolution: float = 0.1,
233
+ leiden_resolution: float = 1.0,
234
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
235
+ null_distribution: str = "network",
236
+ random_seed: int = 888,
237
+ ) -> Dict[str, Any]:
238
+ """Load significant neighborhoods for the network using the Poisson test.
239
+
240
+ Args:
241
+ network (nx.Graph): The network graph.
242
+ annotations (Dict[str, Any]): The annotations associated with the network.
243
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
244
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
245
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
246
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
247
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
248
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
249
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
250
+ Defaults to 0.5.
251
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
252
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
253
+
254
+ Returns:
255
+ Dict[str, Any]: Computed significance of neighborhoods.
256
+ """
257
+ log_header("Running Poisson test")
258
+ # Compute neighborhood significance using the Poisson test
259
+ return self._load_neighborhoods_by_statistical_test(
260
+ network=network,
261
+ annotations=annotations,
262
+ distance_metric=distance_metric,
263
+ louvain_resolution=louvain_resolution,
264
+ leiden_resolution=leiden_resolution,
265
+ fraction_shortest_edges=fraction_shortest_edges,
266
+ null_distribution=null_distribution,
267
+ random_seed=random_seed,
268
+ statistical_test_key="poisson",
269
+ statistical_test_function=compute_poisson_test,
270
+ )
271
+
272
+ def load_neighborhoods_by_zscore(
273
+ self,
274
+ network: nx.Graph,
275
+ annotations: Dict[str, Any],
276
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
277
+ louvain_resolution: float = 0.1,
278
+ leiden_resolution: float = 1.0,
279
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
280
+ null_distribution: str = "network",
281
+ random_seed: int = 888,
282
+ ) -> Dict[str, Any]:
283
+ """Load significant neighborhoods for the network using the Z-score test.
284
+
285
+ Args:
286
+ network (nx.Graph): The network graph.
287
+ annotations (Dict[str, Any]): The annotations associated with the network.
288
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
289
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
290
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
291
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
292
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
293
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
294
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
295
+ Defaults to 0.5.
296
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
297
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
298
+
299
+ Returns:
300
+ Dict[str, Any]: Computed significance of neighborhoods.
301
+ """
302
+ log_header("Running Z-score test")
303
+ # Compute neighborhood significance using the Z-score test
304
+ return self._load_neighborhoods_by_statistical_test(
305
+ network=network,
306
+ annotations=annotations,
307
+ distance_metric=distance_metric,
308
+ louvain_resolution=louvain_resolution,
309
+ leiden_resolution=leiden_resolution,
310
+ fraction_shortest_edges=fraction_shortest_edges,
311
+ null_distribution=null_distribution,
312
+ random_seed=random_seed,
313
+ statistical_test_key="zscore",
314
+ statistical_test_function=compute_zscore_test,
315
+ )
316
+
317
+ def _load_neighborhoods_by_statistical_test(
318
+ self,
319
+ network: nx.Graph,
320
+ annotations: Dict[str, Any],
321
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
322
+ louvain_resolution: float = 0.1,
323
+ leiden_resolution: float = 1.0,
324
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
325
+ null_distribution: str = "network",
326
+ random_seed: int = 888,
327
+ statistical_test_key: str = "hypergeom",
328
+ statistical_test_function: Any = compute_hypergeom_test,
329
+ **kwargs,
330
+ ):
331
+ """Load and compute significant neighborhoods for the network using a specified statistical test.
332
+
333
+ Args:
334
+ network (nx.Graph): The input network graph.
335
+ annotations (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
336
+ distance_metric (Union[str, List, Tuple, np.ndarray], optional): The distance metric or clustering method to define neighborhoods.
337
+ Can be a string specifying one method (e.g., 'louvain', 'leiden') or a collection of methods.
338
+ Defaults to "louvain".
339
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
340
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
341
+ fraction_shortest_edges (Union[float, List, Tuple, np.ndarray], optional): Fraction of shortest edges to consider for creating subgraphs.
342
+ Can be a single value or a collection of thresholds for flexibility. Defaults to 0.5.
343
+ null_distribution (str, optional): The type of null distribution to use ('network' or 'annotations').
344
+ Defaults to "network".
345
+ random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
346
+ statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "poisson").
347
+ Used for logging and debugging. Defaults to "hypergeom".
348
+ statistical_test_function (Any, optional): The function implementing the statistical test.
349
+ It should accept neighborhoods, annotations, null distribution, and additional kwargs.
350
+ Defaults to `compute_hypergeom_test`.
351
+ **kwargs: Additional parameters to be passed to the statistical test function.
352
+
353
+ Returns:
354
+ Dict[str, Any]: A dictionary containing the computed significance values for neighborhoods.
355
+ """
356
+ # Log null distribution type
357
+ logger.debug(f"Null distribution: '{null_distribution}'")
358
+ # Log neighborhood analysis parameters
359
+ params.log_neighborhoods(
360
+ distance_metric=distance_metric,
361
+ louvain_resolution=louvain_resolution,
362
+ leiden_resolution=leiden_resolution,
363
+ fraction_shortest_edges=fraction_shortest_edges,
364
+ statistical_test_function=statistical_test_key,
365
+ null_distribution=null_distribution,
366
+ random_seed=random_seed,
367
+ **kwargs,
368
+ )
369
+
370
+ # Make a copy of the network to avoid modifying the original
371
+ network = copy.deepcopy(network)
372
+ # Load neighborhoods based on the network and distance metric
373
+ neighborhoods = self._load_neighborhoods(
374
+ network,
375
+ distance_metric,
376
+ louvain_resolution=louvain_resolution,
377
+ leiden_resolution=leiden_resolution,
378
+ fraction_shortest_edges=fraction_shortest_edges,
379
+ random_seed=random_seed,
380
+ )
381
+ # Apply statistical test function to compute neighborhood significance
382
+ neighborhood_significance = statistical_test_function(
383
+ neighborhoods=neighborhoods,
384
+ annotations=annotations["matrix"],
385
+ null_distribution=null_distribution,
386
+ **kwargs,
387
+ )
388
+
389
+ # Return the computed neighborhood significance
390
+ return neighborhood_significance
391
+
392
+ def _load_neighborhoods(
393
+ self,
394
+ network: nx.Graph,
395
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
396
+ louvain_resolution: float = 0.1,
397
+ leiden_resolution: float = 1.0,
398
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
399
+ random_seed: int = 888,
400
+ ) -> csr_matrix:
401
+ """Load significant neighborhoods for the network.
402
+
403
+ Args:
404
+ network (nx.Graph): The network graph.
405
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
406
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
407
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
408
+ louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
409
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
410
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
411
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
412
+ Defaults to 0.5.
413
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
414
+
415
+ Returns:
416
+ csr_matrix: Sparse neighborhood matrix calculated based on the selected distance metric.
417
+ """
418
+ # Display the chosen distance metric
419
+ if distance_metric == "louvain":
420
+ for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
421
+ elif distance_metric == "leiden":
422
+ for_print_distance_metric = f"leiden (resolution={leiden_resolution})"
423
+ else:
424
+ for_print_distance_metric = distance_metric
425
+
426
+ # Log and display neighborhood settings
427
+ logger.debug(f"Distance metric: '{for_print_distance_metric}'")
428
+ logger.debug(f"Edge length threshold: {fraction_shortest_edges}")
429
+ logger.debug(f"Random seed: {random_seed}")
430
+
431
+ # Compute neighborhoods
432
+ neighborhoods = get_network_neighborhoods(
433
+ network,
434
+ distance_metric,
435
+ fraction_shortest_edges,
436
+ louvain_resolution=louvain_resolution,
437
+ leiden_resolution=leiden_resolution,
438
+ random_seed=random_seed,
439
+ )
440
+
441
+ # Ensure the neighborhood matrix is in sparse format
442
+ if not isinstance(neighborhoods, csr_matrix):
443
+ neighborhoods = csr_matrix(neighborhoods)
444
+
445
+ # Return the sparse neighborhood matrix
446
+ return neighborhoods
@@ -11,6 +11,8 @@ import numpy as np
11
11
  from leidenalg import find_partition, RBConfigurationVertexPartition
12
12
  from networkx.algorithms.community import greedy_modularity_communities
13
13
 
14
+ from risk.log import logger
15
+
14
16
 
15
17
  def calculate_greedy_modularity_neighborhoods(
16
18
  network: nx.Graph, fraction_shortest_edges: float = 1.0
@@ -266,14 +268,14 @@ def calculate_spinglass_neighborhoods(
266
268
  igraph_subgraph = ig.Graph.from_networkx(subgraph)
267
269
  # Ensure the subgraph is connected before running Spinglass
268
270
  if not igraph_subgraph.is_connected():
269
- print("Warning: Subgraph is not connected. Skipping...")
271
+ logger.error("Warning: Subgraph is not connected. Skipping...")
270
272
  continue
271
273
 
272
274
  # Apply Spinglass community detection
273
275
  try:
274
276
  communities = igraph_subgraph.community_spinglass()
275
277
  except Exception as e:
276
- print(f"Error running Spinglass on component: {e}")
278
+ logger.error(f"Error running Spinglass on component: {e}")
277
279
  continue
278
280
 
279
281
  # Step 3: Assign neighborhoods based on community labels
@@ -41,6 +41,9 @@ def define_domains(
41
41
  try:
42
42
  # Transpose the matrix to cluster annotations
43
43
  m = significant_neighborhoods_significance[:, top_annotations["significant_annotations"]].T
44
+ # Safeguard the matrix by replacing NaN, Inf, and -Inf values
45
+ m = _safeguard_matrix(m)
46
+ # Optimize silhouette score across different linkage methods and distance metrics
44
47
  best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
45
48
  m, linkage_criterion, linkage_method, linkage_metric
46
49
  )
@@ -161,6 +164,29 @@ def trim_domains(
161
164
  return valid_domains, valid_trimmed_domains_matrix
162
165
 
163
166
 
167
+ def _safeguard_matrix(matrix: np.ndarray) -> np.ndarray:
168
+ """Safeguard the matrix by replacing NaN, Inf, and -Inf values.
169
+
170
+ Args:
171
+ matrix (np.ndarray): Data matrix.
172
+
173
+ Returns:
174
+ np.ndarray: Safeguarded data matrix.
175
+ """
176
+ # Replace NaN with column mean
177
+ nan_replacement = np.nanmean(matrix, axis=0)
178
+ matrix = np.where(np.isnan(matrix), nan_replacement, matrix)
179
+ # Replace Inf/-Inf with maximum/minimum finite values
180
+ finite_max = np.nanmax(matrix[np.isfinite(matrix)])
181
+ finite_min = np.nanmin(matrix[np.isfinite(matrix)])
182
+ matrix = np.where(np.isposinf(matrix), finite_max, matrix)
183
+ matrix = np.where(np.isneginf(matrix), finite_min, matrix)
184
+ # Ensure rows have non-zero variance (optional step)
185
+ row_variance = np.var(matrix, axis=1)
186
+ matrix = matrix[row_variance > 0]
187
+ return matrix
188
+
189
+
164
190
  def _optimize_silhouette_across_linkage_and_metrics(
165
191
  m: np.ndarray, linkage_criterion: str, linkage_method: str, linkage_metric: str
166
192
  ) -> Tuple[str, str, float]:
@@ -194,7 +220,8 @@ def _optimize_silhouette_across_linkage_and_metrics(
194
220
  total=total_combinations,
195
221
  bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
196
222
  ):
197
- with suppress(Exception):
223
+ # Some linkage methods and metrics may not work with certain data
224
+ with suppress(ValueError):
198
225
  Z = linkage(m, method=method, metric=metric)
199
226
  threshold, score = _find_best_silhouette_score(Z, m, metric, linkage_criterion)
200
227
  if score > best_overall_score:
risk/network/__init__.py CHANGED
@@ -3,6 +3,4 @@ risk/network
3
3
  ~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from .graph import NetworkGraph
7
- from .io import NetworkIO
8
- from .plot import NetworkPlotter
6
+ from risk.network.io import NetworkIO
@@ -3,4 +3,4 @@ risk/network/graph
3
3
  ~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from .network import NetworkGraph
6
+ from risk.network.graph.api import GraphAPI