risk-network 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. risk/__init__.py +1 -1
  2. risk/annotation/__init__.py +10 -0
  3. risk/{annotations/annotations.py → annotation/annotation.py} +44 -44
  4. risk/{annotations → annotation}/io.py +93 -92
  5. risk/{annotations → annotation}/nltk_setup.py +6 -5
  6. risk/log/__init__.py +1 -1
  7. risk/log/parameters.py +26 -27
  8. risk/neighborhoods/__init__.py +0 -1
  9. risk/neighborhoods/api.py +38 -38
  10. risk/neighborhoods/community.py +33 -4
  11. risk/neighborhoods/domains.py +26 -28
  12. risk/neighborhoods/neighborhoods.py +8 -2
  13. risk/neighborhoods/stats/__init__.py +13 -0
  14. risk/neighborhoods/stats/permutation/__init__.py +6 -0
  15. risk/{stats → neighborhoods/stats}/permutation/permutation.py +24 -21
  16. risk/{stats → neighborhoods/stats}/permutation/test_functions.py +4 -4
  17. risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +62 -54
  18. risk/network/__init__.py +0 -2
  19. risk/network/graph/__init__.py +0 -2
  20. risk/network/graph/api.py +19 -19
  21. risk/network/graph/graph.py +73 -68
  22. risk/{stats/significance.py → network/graph/stats.py} +2 -2
  23. risk/network/graph/summary.py +12 -13
  24. risk/network/io.py +163 -20
  25. risk/network/plotter/__init__.py +0 -2
  26. risk/network/plotter/api.py +1 -1
  27. risk/network/plotter/canvas.py +36 -36
  28. risk/network/plotter/contour.py +14 -15
  29. risk/network/plotter/labels.py +303 -294
  30. risk/network/plotter/network.py +6 -6
  31. risk/network/plotter/plotter.py +8 -10
  32. risk/network/plotter/utils/colors.py +15 -8
  33. risk/network/plotter/utils/layout.py +3 -3
  34. risk/risk.py +6 -6
  35. risk_network-0.0.12.dist-info/METADATA +122 -0
  36. risk_network-0.0.12.dist-info/RECORD +40 -0
  37. {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/WHEEL +1 -1
  38. risk/annotations/__init__.py +0 -7
  39. risk/network/geometry.py +0 -150
  40. risk/stats/__init__.py +0 -15
  41. risk/stats/permutation/__init__.py +0 -6
  42. risk_network-0.0.11.dist-info/METADATA +0 -798
  43. risk_network-0.0.11.dist-info/RECORD +0 -41
  44. {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info/licenses}/LICENSE +0 -0
  45. {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/top_level.txt +0 -0
@@ -4,5 +4,4 @@ risk/neighborhoods
4
4
  """
5
5
 
6
6
  from risk.neighborhoods.domains import define_domains, trim_domains
7
- from risk.neighborhoods.api import NeighborhoodsAPI
8
7
  from risk.neighborhoods.neighborhoods import process_neighborhoods
risk/neighborhoods/api.py CHANGED
@@ -10,9 +10,9 @@ import networkx as nx
10
10
  import numpy as np
11
11
  from scipy.sparse import csr_matrix
12
12
 
13
- from risk.log import logger, log_header, params
13
+ from risk.log import log_header, logger, params
14
14
  from risk.neighborhoods.neighborhoods import get_network_neighborhoods
15
- from risk.stats import (
15
+ from risk.neighborhoods.stats import (
16
16
  compute_binom_test,
17
17
  compute_chi2_test,
18
18
  compute_hypergeom_test,
@@ -28,13 +28,13 @@ class NeighborhoodsAPI:
28
28
  The NeighborhoodsAPI class provides methods to load neighborhood results from statistical tests.
29
29
  """
30
30
 
31
- def __init__() -> None:
31
+ def __init__(self) -> None:
32
32
  pass
33
33
 
34
- def load_neighborhoods_by_binom(
34
+ def load_neighborhoods_binom(
35
35
  self,
36
36
  network: nx.Graph,
37
- annotations: Dict[str, Any],
37
+ annotation: Dict[str, Any],
38
38
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
39
39
  louvain_resolution: float = 0.1,
40
40
  leiden_resolution: float = 1.0,
@@ -46,7 +46,7 @@ class NeighborhoodsAPI:
46
46
 
47
47
  Args:
48
48
  network (nx.Graph): The network graph.
49
- annotations (Dict[str, Any]): The annotations associated with the network.
49
+ annotation (Dict[str, Any]): The annotation associated with the network.
50
50
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
51
51
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
52
52
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -55,7 +55,7 @@ class NeighborhoodsAPI:
55
55
  fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
56
56
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
57
57
  Defaults to 0.5.
58
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
58
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
59
59
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
60
60
 
61
61
  Returns:
@@ -65,7 +65,7 @@ class NeighborhoodsAPI:
65
65
  # Compute neighborhood significance using the binomial test
66
66
  return self._load_neighborhoods_by_statistical_test(
67
67
  network=network,
68
- annotations=annotations,
68
+ annotation=annotation,
69
69
  distance_metric=distance_metric,
70
70
  louvain_resolution=louvain_resolution,
71
71
  leiden_resolution=leiden_resolution,
@@ -76,10 +76,10 @@ class NeighborhoodsAPI:
76
76
  statistical_test_function=compute_binom_test,
77
77
  )
78
78
 
79
- def load_neighborhoods_by_chi2(
79
+ def load_neighborhoods_chi2(
80
80
  self,
81
81
  network: nx.Graph,
82
- annotations: Dict[str, Any],
82
+ annotation: Dict[str, Any],
83
83
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
84
84
  louvain_resolution: float = 0.1,
85
85
  leiden_resolution: float = 1.0,
@@ -91,7 +91,7 @@ class NeighborhoodsAPI:
91
91
 
92
92
  Args:
93
93
  network (nx.Graph): The network graph.
94
- annotations (Dict[str, Any]): The annotations associated with the network.
94
+ annotation (Dict[str, Any]): The annotation associated with the network.
95
95
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
96
96
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
97
97
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -100,7 +100,7 @@ class NeighborhoodsAPI:
100
100
  fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
101
101
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
102
102
  Defaults to 0.5.
103
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
103
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
104
104
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
105
105
 
106
106
  Returns:
@@ -110,7 +110,7 @@ class NeighborhoodsAPI:
110
110
  # Compute neighborhood significance using the chi-squared test
111
111
  return self._load_neighborhoods_by_statistical_test(
112
112
  network=network,
113
- annotations=annotations,
113
+ annotation=annotation,
114
114
  distance_metric=distance_metric,
115
115
  louvain_resolution=louvain_resolution,
116
116
  leiden_resolution=leiden_resolution,
@@ -121,10 +121,10 @@ class NeighborhoodsAPI:
121
121
  statistical_test_function=compute_chi2_test,
122
122
  )
123
123
 
124
- def load_neighborhoods_by_hypergeom(
124
+ def load_neighborhoods_hypergeom(
125
125
  self,
126
126
  network: nx.Graph,
127
- annotations: Dict[str, Any],
127
+ annotation: Dict[str, Any],
128
128
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
129
129
  louvain_resolution: float = 0.1,
130
130
  leiden_resolution: float = 1.0,
@@ -136,7 +136,7 @@ class NeighborhoodsAPI:
136
136
 
137
137
  Args:
138
138
  network (nx.Graph): The network graph.
139
- annotations (Dict[str, Any]): The annotations associated with the network.
139
+ annotation (Dict[str, Any]): The annotation associated with the network.
140
140
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
141
141
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
142
142
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -145,7 +145,7 @@ class NeighborhoodsAPI:
145
145
  fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
146
146
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
147
147
  Defaults to 0.5.
148
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
148
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
149
149
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
150
150
 
151
151
  Returns:
@@ -155,7 +155,7 @@ class NeighborhoodsAPI:
155
155
  # Compute neighborhood significance using the hypergeometric test
156
156
  return self._load_neighborhoods_by_statistical_test(
157
157
  network=network,
158
- annotations=annotations,
158
+ annotation=annotation,
159
159
  distance_metric=distance_metric,
160
160
  louvain_resolution=louvain_resolution,
161
161
  leiden_resolution=leiden_resolution,
@@ -166,10 +166,10 @@ class NeighborhoodsAPI:
166
166
  statistical_test_function=compute_hypergeom_test,
167
167
  )
168
168
 
169
- def load_neighborhoods_by_permutation(
169
+ def load_neighborhoods_permutation(
170
170
  self,
171
171
  network: nx.Graph,
172
- annotations: Dict[str, Any],
172
+ annotation: Dict[str, Any],
173
173
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
174
174
  louvain_resolution: float = 0.1,
175
175
  leiden_resolution: float = 1.0,
@@ -184,7 +184,7 @@ class NeighborhoodsAPI:
184
184
 
185
185
  Args:
186
186
  network (nx.Graph): The network graph.
187
- annotations (Dict[str, Any]): The annotations associated with the network.
187
+ annotation (Dict[str, Any]): The annotation associated with the network.
188
188
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
189
189
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
190
190
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -194,7 +194,7 @@ class NeighborhoodsAPI:
194
194
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
195
195
  Defaults to 0.5.
196
196
  score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
197
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
197
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
198
198
  num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
199
199
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
200
200
  max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
@@ -210,7 +210,7 @@ class NeighborhoodsAPI:
210
210
  # Compute neighborhood significance using the permutation test
211
211
  return self._load_neighborhoods_by_statistical_test(
212
212
  network=network,
213
- annotations=annotations,
213
+ annotation=annotation,
214
214
  distance_metric=distance_metric,
215
215
  louvain_resolution=louvain_resolution,
216
216
  leiden_resolution=leiden_resolution,
@@ -224,10 +224,10 @@ class NeighborhoodsAPI:
224
224
  max_workers=max_workers,
225
225
  )
226
226
 
227
- def load_neighborhoods_by_poisson(
227
+ def load_neighborhoods_poisson(
228
228
  self,
229
229
  network: nx.Graph,
230
- annotations: Dict[str, Any],
230
+ annotation: Dict[str, Any],
231
231
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
232
232
  louvain_resolution: float = 0.1,
233
233
  leiden_resolution: float = 1.0,
@@ -239,7 +239,7 @@ class NeighborhoodsAPI:
239
239
 
240
240
  Args:
241
241
  network (nx.Graph): The network graph.
242
- annotations (Dict[str, Any]): The annotations associated with the network.
242
+ annotation (Dict[str, Any]): The annotation associated with the network.
243
243
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
244
244
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
245
245
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -248,7 +248,7 @@ class NeighborhoodsAPI:
248
248
  fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
249
249
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
250
250
  Defaults to 0.5.
251
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
251
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
252
252
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
253
253
 
254
254
  Returns:
@@ -258,7 +258,7 @@ class NeighborhoodsAPI:
258
258
  # Compute neighborhood significance using the Poisson test
259
259
  return self._load_neighborhoods_by_statistical_test(
260
260
  network=network,
261
- annotations=annotations,
261
+ annotation=annotation,
262
262
  distance_metric=distance_metric,
263
263
  louvain_resolution=louvain_resolution,
264
264
  leiden_resolution=leiden_resolution,
@@ -269,10 +269,10 @@ class NeighborhoodsAPI:
269
269
  statistical_test_function=compute_poisson_test,
270
270
  )
271
271
 
272
- def load_neighborhoods_by_zscore(
272
+ def load_neighborhoods_zscore(
273
273
  self,
274
274
  network: nx.Graph,
275
- annotations: Dict[str, Any],
275
+ annotation: Dict[str, Any],
276
276
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
277
277
  louvain_resolution: float = 0.1,
278
278
  leiden_resolution: float = 1.0,
@@ -284,7 +284,7 @@ class NeighborhoodsAPI:
284
284
 
285
285
  Args:
286
286
  network (nx.Graph): The network graph.
287
- annotations (Dict[str, Any]): The annotations associated with the network.
287
+ annotation (Dict[str, Any]): The annotation associated with the network.
288
288
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
289
289
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
290
290
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -293,7 +293,7 @@ class NeighborhoodsAPI:
293
293
  fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
294
294
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
295
295
  Defaults to 0.5.
296
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
296
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
297
297
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
298
298
 
299
299
  Returns:
@@ -303,7 +303,7 @@ class NeighborhoodsAPI:
303
303
  # Compute neighborhood significance using the z-score test
304
304
  return self._load_neighborhoods_by_statistical_test(
305
305
  network=network,
306
- annotations=annotations,
306
+ annotation=annotation,
307
307
  distance_metric=distance_metric,
308
308
  louvain_resolution=louvain_resolution,
309
309
  leiden_resolution=leiden_resolution,
@@ -317,7 +317,7 @@ class NeighborhoodsAPI:
317
317
  def _load_neighborhoods_by_statistical_test(
318
318
  self,
319
319
  network: nx.Graph,
320
- annotations: Dict[str, Any],
320
+ annotation: Dict[str, Any],
321
321
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
322
322
  louvain_resolution: float = 0.1,
323
323
  leiden_resolution: float = 1.0,
@@ -332,7 +332,7 @@ class NeighborhoodsAPI:
332
332
 
333
333
  Args:
334
334
  network (nx.Graph): The input network graph.
335
- annotations (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
335
+ annotation (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
336
336
  distance_metric (Union[str, List, Tuple, np.ndarray], optional): The distance metric or clustering method to define neighborhoods.
337
337
  Can be a string specifying one method (e.g., 'louvain', 'leiden') or a collection of methods.
338
338
  Defaults to "louvain".
@@ -340,13 +340,13 @@ class NeighborhoodsAPI:
340
340
  leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
341
341
  fraction_shortest_edges (Union[float, List, Tuple, np.ndarray], optional): Fraction of shortest edges to consider for creating subgraphs.
342
342
  Can be a single value or a collection of thresholds for flexibility. Defaults to 0.5.
343
- null_distribution (str, optional): The type of null distribution to use ('network' or 'annotations').
343
+ null_distribution (str, optional): The type of null distribution to use ('network' or 'annotation').
344
344
  Defaults to "network".
345
345
  random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
346
346
  statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "poisson").
347
347
  Used for logging and debugging. Defaults to "hypergeom".
348
348
  statistical_test_function (Any, optional): The function implementing the statistical test.
349
- It should accept neighborhoods, annotations, null distribution, and additional kwargs.
349
+ It should accept neighborhoods, annotation, null distribution, and additional kwargs.
350
350
  Defaults to `compute_hypergeom_test`.
351
351
  **kwargs: Additional parameters to be passed to the statistical test function.
352
352
 
@@ -381,7 +381,7 @@ class NeighborhoodsAPI:
381
381
  # Apply statistical test function to compute neighborhood significance
382
382
  neighborhood_significance = statistical_test_function(
383
383
  neighborhoods=neighborhoods,
384
- annotations=annotations["matrix"],
384
+ annotation=annotation["matrix"],
385
385
  null_distribution=null_distribution,
386
386
  **kwargs,
387
387
  )
@@ -8,7 +8,7 @@ import igraph as ig
8
8
  import markov_clustering as mc
9
9
  import networkx as nx
10
10
  import numpy as np
11
- from leidenalg import find_partition, RBConfigurationVertexPartition
11
+ from leidenalg import RBConfigurationVertexPartition, find_partition
12
12
  from networkx.algorithms.community import greedy_modularity_communities
13
13
  from scipy.sparse import csr_matrix
14
14
 
@@ -27,6 +27,10 @@ def calculate_greedy_modularity_neighborhoods(
27
27
 
28
28
  Returns:
29
29
  csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
30
+
31
+ Raises:
32
+ ValueError: If the subgraph has no edges after filtering.
33
+ Warning: If the resulting subgraph has no edges after filtering.
30
34
  """
31
35
  # Create a subgraph with the shortest edges based on the rank fraction
32
36
  subnetwork = _create_percentile_limited_subgraph(
@@ -67,6 +71,10 @@ def calculate_label_propagation_neighborhoods(
67
71
 
68
72
  Returns:
69
73
  csr_matrix: A binary neighborhood matrix (CSR) on Label Propagation.
74
+
75
+ Raises:
76
+ ValueError: If the subgraph has no edges after filtering.
77
+ Warning: If the resulting subgraph has no edges after filtering.
70
78
  """
71
79
  # Create a subgraph with the shortest edges based on the rank fraction
72
80
  subnetwork = _create_percentile_limited_subgraph(
@@ -115,6 +123,10 @@ def calculate_leiden_neighborhoods(
115
123
 
116
124
  Returns:
117
125
  csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
126
+
127
+ Raises:
128
+ ValueError: If the subgraph has no edges after filtering.
129
+ Warning: If the resulting subgraph has no edges after filtering.
118
130
  """
119
131
  # Create a subgraph with the shortest edges based on the rank fraction
120
132
  subnetwork = _create_percentile_limited_subgraph(
@@ -167,6 +179,10 @@ def calculate_louvain_neighborhoods(
167
179
 
168
180
  Returns:
169
181
  csr_matrix: A binary neighborhood matrix in CSR format.
182
+
183
+ Raises:
184
+ ValueError: If the subgraph has no edges after filtering.
185
+ Warning: If the resulting subgraph has no edges after filtering.
170
186
  """
171
187
  # Create a subgraph with the shortest edges based on the rank fraction
172
188
  subnetwork = _create_percentile_limited_subgraph(
@@ -215,9 +231,10 @@ def calculate_markov_clustering_neighborhoods(
215
231
  Returns:
216
232
  csr_matrix: A binary neighborhood matrix (CSR) on Markov Clustering.
217
233
 
218
- Warning:
219
- This function temporarily converts the adjacency matrix to a dense format, which may lead to
220
- high memory consumption for large graphs.
234
+ Raises:
235
+ ValueError: If the subgraph has no edges after filtering.
236
+ RuntimeError: If MCL fails to run.
237
+ Warning: If the resulting subgraph has no edges after filtering.
221
238
  """
222
239
  # Create a subgraph with the shortest edges based on the rank fraction
223
240
  subnetwork = _create_percentile_limited_subgraph(
@@ -283,6 +300,10 @@ def calculate_spinglass_neighborhoods(
283
300
 
284
301
  Returns:
285
302
  csr_matrix: A binary neighborhood matrix (CSR) based on Spinglass communities.
303
+
304
+ Raises:
305
+ ValueError: If the subgraph has no edges after filtering.
306
+ Warning: If the resulting subgraph has no edges after filtering.
286
307
  """
287
308
  # Create a subgraph with the shortest edges based on the rank fraction
288
309
  subnetwork = _create_percentile_limited_subgraph(
@@ -343,6 +364,10 @@ def calculate_walktrap_neighborhoods(
343
364
 
344
365
  Returns:
345
366
  csr_matrix: A binary neighborhood matrix (CSR) on Walktrap communities.
367
+
368
+ Raises:
369
+ ValueError: If the subgraph has no edges after filtering.
370
+ Warning: If the resulting subgraph has no edges after filtering.
346
371
  """
347
372
  # Create a subgraph with the shortest edges based on the rank fraction
348
373
  subnetwork = _create_percentile_limited_subgraph(
@@ -384,6 +409,10 @@ def _create_percentile_limited_subgraph(G: nx.Graph, fraction_shortest_edges: fl
384
409
  Returns:
385
410
  nx.Graph: A subgraph with nodes and edges where the edges are within the shortest
386
411
  specified rank fraction.
412
+
413
+ Raises:
414
+ ValueError: If no edges with 'length' attributes are found in the graph.
415
+ Warning: If the resulting subgraph has no edges after filtering.
387
416
  """
388
417
  # Step 1: Extract edges with their lengths
389
418
  edges_with_length = [(u, v, d) for u, v, d in G.edges(data=True) if "length" in d]
@@ -9,19 +9,18 @@ from typing import Tuple, Union
9
9
  import numpy as np
10
10
  import pandas as pd
11
11
  from numpy.linalg import LinAlgError
12
- from scipy.cluster.hierarchy import linkage, fcluster
12
+ from scipy.cluster.hierarchy import fcluster, linkage
13
13
  from sklearn.metrics import silhouette_score
14
14
  from tqdm import tqdm
15
15
 
16
- from risk.annotations import get_weighted_description
16
+ from risk.annotation import get_weighted_description
17
17
  from risk.log import logger
18
18
 
19
-
20
19
  # Define constants for clustering
21
20
  # fmt: off
22
21
  LINKAGE_METHODS = {"single", "complete", "average", "weighted", "centroid", "median", "ward"}
23
22
  LINKAGE_METRICS = {
24
- "braycurtis","canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
23
+ "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
25
24
  "hamming", "jaccard", "jensenshannon", "kulczynski1", "mahalanobis", "matching", "minkowski",
26
25
  "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule",
27
26
  }
@@ -29,7 +28,7 @@ LINKAGE_METRICS = {
29
28
 
30
29
 
31
30
  def define_domains(
32
- top_annotations: pd.DataFrame,
31
+ top_annotation: pd.DataFrame,
33
32
  significant_neighborhoods_significance: np.ndarray,
34
33
  linkage_criterion: str,
35
34
  linkage_method: str,
@@ -40,7 +39,7 @@ def define_domains(
40
39
  handling errors by assigning unique domains when clustering fails.
41
40
 
42
41
  Args:
43
- top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
42
+ top_annotation (pd.DataFrame): DataFrame of top annotations data for the network nodes.
44
43
  significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
45
44
  linkage_criterion (str): The clustering criterion for defining groups. Choose "off" to disable clustering.
46
45
  linkage_method (str): The linkage method for clustering. Choose "auto" to optimize.
@@ -49,13 +48,16 @@ def define_domains(
49
48
 
50
49
  Returns:
51
50
  pd.DataFrame: DataFrame with the primary domain for each node.
51
+
52
+ Raises:
53
+ ValueError: If the clustering criterion is set to "off" or if an error occurs during clustering.
52
54
  """
53
55
  try:
54
56
  if linkage_criterion == "off":
55
57
  raise ValueError("Clustering is turned off.")
56
58
 
57
59
  # Transpose the matrix to cluster annotations
58
- m = significant_neighborhoods_significance[:, top_annotations["significant_annotations"]].T
60
+ m = significant_neighborhoods_significance[:, top_annotation["significant_annotation"]].T
59
61
  # Safeguard the matrix by replacing NaN, Inf, and -Inf values
60
62
  m = _safeguard_matrix(m)
61
63
  # Optimize silhouette score across different linkage methods and distance metrics
@@ -69,27 +71,23 @@ def define_domains(
69
71
  )
70
72
  # Calculate the optimal threshold for clustering
71
73
  max_d_optimal = np.max(Z[:, 2]) * best_threshold
72
- # Assign domains to the annotations matrix
74
+ # Assign domains to the annotation matrix
73
75
  domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
74
- top_annotations["domain"] = 0
75
- top_annotations.loc[top_annotations["significant_annotations"], "domain"] = domains
76
+ top_annotation["domain"] = 0
77
+ top_annotation.loc[top_annotation["significant_annotation"], "domain"] = domains
76
78
  except (ValueError, LinAlgError):
77
79
  # If a ValueError is encountered, handle it by assigning unique domains
78
- n_rows = len(top_annotations)
80
+ n_rows = len(top_annotation)
79
81
  if linkage_criterion == "off":
80
- logger.warning(
81
- f"Clustering is turned off. Skipping clustering and assigning {n_rows} unique domains."
82
- )
82
+ logger.warning("Clustering is turned off. Skipping clustering.")
83
83
  else:
84
- logger.error(
85
- f"Error encountered. Skipping clustering and assigning {n_rows} unique domains."
86
- )
87
- top_annotations["domain"] = range(1, n_rows + 1) # Assign unique domains
84
+ logger.error("Error encountered. Skipping clustering.")
85
+ top_annotation["domain"] = range(1, n_rows + 1) # Assign unique domains
88
86
 
89
87
  # Create DataFrames to store domain information
90
88
  node_to_significance = pd.DataFrame(
91
89
  data=significant_neighborhoods_significance,
92
- columns=[top_annotations.index.values, top_annotations["domain"]],
90
+ columns=[top_annotation.index.values, top_annotation["domain"]],
93
91
  )
94
92
  node_to_domain = node_to_significance.T.groupby(level="domain").sum().T
95
93
 
@@ -110,15 +108,15 @@ def define_domains(
110
108
 
111
109
  def trim_domains(
112
110
  domains: pd.DataFrame,
113
- top_annotations: pd.DataFrame,
111
+ top_annotation: pd.DataFrame,
114
112
  min_cluster_size: int = 5,
115
113
  max_cluster_size: int = 1000,
116
- ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
114
+ ) -> Tuple[pd.DataFrame, pd.DataFrame]:
117
115
  """Trim domains that do not meet size criteria and find outliers.
118
116
 
119
117
  Args:
120
118
  domains (pd.DataFrame): DataFrame of domain data for the network nodes.
121
- top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
119
+ top_annotation (pd.DataFrame): DataFrame of top annotations data for the network nodes.
122
120
  min_cluster_size (int, optional): Minimum size of a cluster to be retained. Defaults to 5.
123
121
  max_cluster_size (int, optional): Maximum size of a cluster to be retained. Defaults to 1000.
124
122
 
@@ -137,21 +135,21 @@ def trim_domains(
137
135
  invalid_domain_id = 888888
138
136
  invalid_domain_ids = {0, invalid_domain_id}
139
137
  # Mark domains to be removed
140
- top_annotations["domain"] = top_annotations["domain"].replace(to_remove, invalid_domain_id)
138
+ top_annotation["domain"] = top_annotation["domain"].replace(to_remove, invalid_domain_id)
141
139
  domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
142
140
 
143
141
  # Normalize "num significant neighborhoods" by percentile for each domain and scale to 0-10
144
- top_annotations["normalized_value"] = top_annotations.groupby("domain")[
142
+ top_annotation["normalized_value"] = top_annotation.groupby("domain")[
145
143
  "significant_neighborhood_significance_sums"
146
144
  ].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
147
145
  # Modify the lambda function to pass both full_terms and significant_significance_score
148
- top_annotations["combined_terms"] = top_annotations.apply(
146
+ top_annotation["combined_terms"] = top_annotation.apply(
149
147
  lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
150
148
  )
151
149
 
152
150
  # Perform the groupby operation while retaining the other columns and adding the weighting with significance scores
153
151
  domain_labels = (
154
- top_annotations.groupby("domain")
152
+ top_annotation.groupby("domain")
155
153
  .agg(
156
154
  full_terms=("full_terms", lambda x: list(x)),
157
155
  significance_scores=("significant_significance_score", lambda x: list(x)),
@@ -231,7 +229,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
231
229
  # Initialize best overall values
232
230
  best_overall_method = linkage_method
233
231
  best_overall_metric = linkage_metric
234
- best_overall_threshold = linkage_threshold
232
+ best_overall_threshold = 0.0
235
233
  best_overall_score = -np.inf
236
234
 
237
235
  # Set linkage methods and metrics to all combinations if "auto" is selected
@@ -242,7 +240,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
242
240
  # Evaluating optimal linkage method and metric
243
241
  for method, metric in tqdm(
244
242
  product(linkage_methods, linkage_metrics),
245
- desc="Evaluating optimal linkage method and metric",
243
+ desc="Evaluating linkage methods and metrics",
246
244
  total=total_combinations,
247
245
  bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
248
246
  ):
@@ -13,6 +13,7 @@ from scipy.sparse import csr_matrix
13
13
  from sklearn.exceptions import DataConversionWarning
14
14
  from sklearn.metrics.pairwise import cosine_similarity
15
15
 
16
+ from risk.log import logger
16
17
  from risk.neighborhoods.community import (
17
18
  calculate_greedy_modularity_neighborhoods,
18
19
  calculate_label_propagation_neighborhoods,
@@ -22,7 +23,6 @@ from risk.neighborhoods.community import (
22
23
  calculate_spinglass_neighborhoods,
23
24
  calculate_walktrap_neighborhoods,
24
25
  )
25
- from risk.log import logger
26
26
 
27
27
  # Suppress DataConversionWarning
28
28
  warnings.filterwarnings(action="ignore", category=DataConversionWarning)
@@ -48,6 +48,9 @@ def get_network_neighborhoods(
48
48
 
49
49
  Returns:
50
50
  csr_matrix: The combined neighborhood matrix.
51
+
52
+ Raises:
53
+ ValueError: If the number of distance metrics does not match the number of edge length thresholds.
51
54
  """
52
55
  # Set random seed for reproducibility
53
56
  random.seed(random_seed)
@@ -490,6 +493,9 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
490
493
 
491
494
  Returns:
492
495
  float: The calculated distance threshold value.
496
+
497
+ Raises:
498
+ ValueError: If no significant annotation is found in the median distances.
493
499
  """
494
500
  # Sort the median distances
495
501
  sorted_distances = np.sort(median_distances)
@@ -500,7 +506,7 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
500
506
  try:
501
507
  smoothed_distances = np.interp(interpolated_percentiles, rank_percentiles, sorted_distances)
502
508
  except ValueError as e:
503
- raise ValueError("No significant annotations found.") from e
509
+ raise ValueError("No significant annotation found.") from e
504
510
 
505
511
  # Determine the index corresponding to the distance threshold
506
512
  threshold_index = int(np.ceil(distance_threshold * len(smoothed_distances))) - 1
@@ -0,0 +1,13 @@
1
+ """
2
+ risk/neighborhoods/stats
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from risk.neighborhoods.stats.permutation import compute_permutation_test
7
+ from risk.neighborhoods.stats.tests import (
8
+ compute_binom_test,
9
+ compute_chi2_test,
10
+ compute_hypergeom_test,
11
+ compute_poisson_test,
12
+ compute_zscore_test,
13
+ )
@@ -0,0 +1,6 @@
1
+ """
2
+ risk/neighborhoods/stats/permutation
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from risk.neighborhoods.stats.permutation.permutation import compute_permutation_test