risk-network 0.0.7b11__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/risk.py CHANGED
@@ -3,7 +3,8 @@ risk/risk
3
3
  ~~~~~~~~~
4
4
  """
5
5
 
6
- from typing import Any, Dict, Tuple
6
+ import copy
7
+ from typing import Any, Dict, List, Tuple, Union
7
8
 
8
9
  import networkx as nx
9
10
  import numpy as np
@@ -33,24 +34,17 @@ class RISK(NetworkIO, AnnotationsIO):
33
34
  and performing network-based statistical analysis, such as neighborhood significance testing.
34
35
  """
35
36
 
36
- def __init__(self, *args, verbose: bool = True, **kwargs):
37
+ def __init__(self, verbose: bool = True):
37
38
  """Initialize the RISK class with configuration settings.
38
39
 
39
40
  Args:
40
41
  verbose (bool): If False, suppresses all log messages to the console. Defaults to True.
41
- *args: Variable length argument list.
42
- **kwargs: Arbitrary keyword arguments.
43
-
44
- Note:
45
- - All *args and **kwargs are passed to NetworkIO's __init__ method.
46
- - AnnotationsIO does not take any arguments and is initialized without them.
47
42
  """
48
43
  # Set global verbosity for logging
49
44
  set_global_verbosity(verbose)
50
45
  # Initialize and log network parameters
51
46
  params.initialize()
52
- # Use super() to call NetworkIO's __init__ with the given arguments and keyword arguments
53
- super().__init__(*args, **kwargs)
47
+ super().__init__()
54
48
 
55
49
  @property
56
50
  def params(self) -> params:
@@ -65,9 +59,9 @@ class RISK(NetworkIO, AnnotationsIO):
65
59
  self,
66
60
  network: nx.Graph,
67
61
  annotations: Dict[str, Any],
68
- distance_metric: str = "louvain",
62
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
69
63
  louvain_resolution: float = 0.1,
70
- edge_length_threshold: float = 0.5,
64
+ edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
71
65
  null_distribution: str = "network",
72
66
  random_seed: int = 888,
73
67
  ) -> Dict[str, Any]:
@@ -75,15 +69,19 @@ class RISK(NetworkIO, AnnotationsIO):
75
69
 
76
70
  Args:
77
71
  network (nx.Graph): The network graph.
78
- annotations (dict): The annotations associated with the network.
79
- distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
72
+ annotations (Dict[str, Any]): The annotations associated with the network.
73
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
74
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
75
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
80
76
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
81
- edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
77
+ edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
78
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
79
+ Defaults to 0.5.
82
80
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
83
81
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
84
82
 
85
83
  Returns:
86
- dict: Computed significance of neighborhoods.
84
+ Dict[str, Any]: Computed significance of neighborhoods.
87
85
  """
88
86
  log_header("Running hypergeometric test")
89
87
  # Log neighborhood analysis parameters
@@ -96,6 +94,9 @@ class RISK(NetworkIO, AnnotationsIO):
96
94
  random_seed=random_seed,
97
95
  )
98
96
 
97
+ # Make a copy of the network to avoid modifying the original
98
+ network = copy.deepcopy(network)
99
+
99
100
  # Load neighborhoods based on the network and distance metric
100
101
  neighborhoods = self._load_neighborhoods(
101
102
  network,
@@ -118,9 +119,9 @@ class RISK(NetworkIO, AnnotationsIO):
118
119
  self,
119
120
  network: nx.Graph,
120
121
  annotations: Dict[str, Any],
121
- distance_metric: str = "louvain",
122
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
122
123
  louvain_resolution: float = 0.1,
123
- edge_length_threshold: float = 0.5,
124
+ edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
124
125
  null_distribution: str = "network",
125
126
  random_seed: int = 888,
126
127
  ) -> Dict[str, Any]:
@@ -128,15 +129,19 @@ class RISK(NetworkIO, AnnotationsIO):
128
129
 
129
130
  Args:
130
131
  network (nx.Graph): The network graph.
131
- annotations (dict): The annotations associated with the network.
132
- distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
132
+ annotations (Dict[str, Any]): The annotations associated with the network.
133
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
134
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
135
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
133
136
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
134
- edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
137
+ edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
138
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
139
+ Defaults to 0.5.
135
140
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
136
141
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
137
142
 
138
143
  Returns:
139
- dict: Computed significance of neighborhoods.
144
+ Dict[str, Any]: Computed significance of neighborhoods.
140
145
  """
141
146
  log_header("Running Poisson test")
142
147
  # Log neighborhood analysis parameters
@@ -149,6 +154,9 @@ class RISK(NetworkIO, AnnotationsIO):
149
154
  random_seed=random_seed,
150
155
  )
151
156
 
157
+ # Make a copy of the network to avoid modifying the original
158
+ network = copy.deepcopy(network)
159
+
152
160
  # Load neighborhoods based on the network and distance metric
153
161
  neighborhoods = self._load_neighborhoods(
154
162
  network,
@@ -171,9 +179,9 @@ class RISK(NetworkIO, AnnotationsIO):
171
179
  self,
172
180
  network: nx.Graph,
173
181
  annotations: Dict[str, Any],
174
- distance_metric: str = "louvain",
182
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
175
183
  louvain_resolution: float = 0.1,
176
- edge_length_threshold: float = 0.5,
184
+ edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
177
185
  score_metric: str = "sum",
178
186
  null_distribution: str = "network",
179
187
  num_permutations: int = 1000,
@@ -184,10 +192,14 @@ class RISK(NetworkIO, AnnotationsIO):
184
192
 
185
193
  Args:
186
194
  network (nx.Graph): The network graph.
187
- annotations (dict): The annotations associated with the network.
188
- distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
195
+ annotations (Dict[str, Any]): The annotations associated with the network.
196
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
197
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
198
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
189
199
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
190
- edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
200
+ edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
201
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
202
+ Defaults to 0.5.
191
203
  score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
192
204
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
193
205
  num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
@@ -195,7 +207,7 @@ class RISK(NetworkIO, AnnotationsIO):
195
207
  max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
196
208
 
197
209
  Returns:
198
- dict: Computed significance of neighborhoods.
210
+ Dict[str, Any]: Computed significance of neighborhoods.
199
211
  """
200
212
  log_header("Running permutation test")
201
213
  # Log neighborhood analysis parameters
@@ -211,6 +223,9 @@ class RISK(NetworkIO, AnnotationsIO):
211
223
  max_workers=max_workers,
212
224
  )
213
225
 
226
+ # Make a copy of the network to avoid modifying the original
227
+ network = copy.deepcopy(network)
228
+
214
229
  # Load neighborhoods based on the network and distance metric
215
230
  neighborhoods = self._load_neighborhoods(
216
231
  network,
@@ -221,10 +236,10 @@ class RISK(NetworkIO, AnnotationsIO):
221
236
  )
222
237
 
223
238
  # Log and display permutation test settings
224
- logger.info(f"Neighborhood scoring metric: '{score_metric}'")
225
- logger.info(f"Null distribution: '{null_distribution}'")
226
- logger.info(f"Number of permutations: {num_permutations}")
227
- logger.info(f"Maximum workers: {max_workers}")
239
+ logger.debug(f"Neighborhood scoring metric: '{score_metric}'")
240
+ logger.debug(f"Null distribution: '{null_distribution}'")
241
+ logger.debug(f"Number of permutations: {num_permutations}")
242
+ logger.debug(f"Maximum workers: {max_workers}")
228
243
  # Run permutation test to compute neighborhood significance
229
244
  neighborhood_significance = compute_permutation_test(
230
245
  neighborhoods=neighborhoods,
@@ -260,7 +275,7 @@ class RISK(NetworkIO, AnnotationsIO):
260
275
  Args:
261
276
  network (nx.Graph): The network graph.
262
277
  annotations (pd.DataFrame): DataFrame containing annotation data for the network.
263
- neighborhoods (dict): Neighborhood enrichment data.
278
+ neighborhoods (Dict[str, Any]): Neighborhood enrichment data.
264
279
  tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
265
280
  pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
266
281
  fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
@@ -290,9 +305,12 @@ class RISK(NetworkIO, AnnotationsIO):
290
305
  max_cluster_size=max_cluster_size,
291
306
  )
292
307
 
293
- logger.info(f"p-value cutoff: {pval_cutoff}")
294
- logger.info(f"FDR BH cutoff: {fdr_cutoff}")
295
- logger.info(
308
+ # Make a copy of the network to avoid modifying the original
309
+ network = copy.deepcopy(network)
310
+
311
+ logger.debug(f"p-value cutoff: {pval_cutoff}")
312
+ logger.debug(f"FDR BH cutoff: {fdr_cutoff}")
313
+ logger.debug(
296
314
  f"Significance tail: '{tail}' ({'enrichment' if tail == 'right' else 'depletion' if tail == 'left' else 'both'})"
297
315
  )
298
316
  # Calculate significant neighborhoods based on the provided parameters
@@ -314,8 +332,8 @@ class RISK(NetworkIO, AnnotationsIO):
314
332
  )
315
333
 
316
334
  log_header("Finding top annotations")
317
- logger.info(f"Min cluster size: {min_cluster_size}")
318
- logger.info(f"Max cluster size: {max_cluster_size}")
335
+ logger.debug(f"Min cluster size: {min_cluster_size}")
336
+ logger.debug(f"Max cluster size: {max_cluster_size}")
319
337
  # Define top annotations based on processed neighborhoods
320
338
  top_annotations = self._define_top_annotations(
321
339
  network=network,
@@ -360,39 +378,41 @@ class RISK(NetworkIO, AnnotationsIO):
360
378
  def load_plotter(
361
379
  self,
362
380
  graph: NetworkGraph,
363
- figsize: Tuple = (10, 10),
381
+ figsize: Union[List, Tuple, np.ndarray] = (10, 10),
364
382
  background_color: str = "white",
383
+ background_alpha: Union[float, None] = 1.0,
384
+ pad: float = 0.3,
365
385
  ) -> NetworkPlotter:
366
386
  """Get a NetworkPlotter object for plotting.
367
387
 
368
388
  Args:
369
389
  graph (NetworkGraph): The graph to plot.
370
- figsize (tuple, optional): Size of the figure. Defaults to (10, 10).
390
+ figsize (List, Tuple, or np.ndarray, optional): Size of the plot. Defaults to (10, 10)., optional): Size of the figure. Defaults to (10, 10).
371
391
  background_color (str, optional): Background color of the plot. Defaults to "white".
392
+ background_alpha (float, None, optional): Transparency level of the background color. If provided, it overrides
393
+ any existing alpha values found in background_color. Defaults to 1.0.
394
+ pad (float, optional): Padding value to adjust the axis limits. Defaults to 0.3.
372
395
 
373
396
  Returns:
374
397
  NetworkPlotter: A NetworkPlotter object configured with the given parameters.
375
398
  """
376
399
  log_header("Loading plotter")
377
- # Log the plotter settings
378
- params.log_plotter(
379
- figsize=figsize,
380
- background_color=background_color,
381
- )
382
400
 
383
401
  # Initialize and return a NetworkPlotter object
384
402
  return NetworkPlotter(
385
403
  graph,
386
404
  figsize=figsize,
387
405
  background_color=background_color,
406
+ background_alpha=background_alpha,
407
+ pad=pad,
388
408
  )
389
409
 
390
410
  def _load_neighborhoods(
391
411
  self,
392
412
  network: nx.Graph,
393
- distance_metric: str = "louvain",
413
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
394
414
  louvain_resolution: float = 0.1,
395
- edge_length_threshold: float = 0.5,
415
+ edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
396
416
  random_seed: int = 888,
397
417
  ) -> np.ndarray:
398
418
  """Load significant neighborhoods for the network.
@@ -400,9 +420,13 @@ class RISK(NetworkIO, AnnotationsIO):
400
420
  Args:
401
421
  network (nx.Graph): The network graph.
402
422
  annotations (pd.DataFrame): The matrix of annotations associated with the network.
403
- distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
423
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
424
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
425
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
404
426
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
405
- edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
427
+ edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
428
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
429
+ Defaults to 0.5.
406
430
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
407
431
 
408
432
  Returns:
@@ -414,9 +438,9 @@ class RISK(NetworkIO, AnnotationsIO):
414
438
  else:
415
439
  for_print_distance_metric = distance_metric
416
440
  # Log and display neighborhood settings
417
- logger.info(f"Distance metric: '{for_print_distance_metric}'")
418
- logger.info(f"Edge length threshold: {edge_length_threshold}")
419
- logger.info(f"Random seed: {random_seed}")
441
+ logger.debug(f"Distance metric: '{for_print_distance_metric}'")
442
+ logger.debug(f"Edge length threshold: {edge_length_threshold}")
443
+ logger.debug(f"Random seed: {random_seed}")
420
444
 
421
445
  # Compute neighborhoods based on the network and distance metric
422
446
  neighborhoods = get_network_neighborhoods(
@@ -442,24 +466,26 @@ class RISK(NetworkIO, AnnotationsIO):
442
466
 
443
467
  Args:
444
468
  network (nx.Graph): The network graph.
445
- annotations (dict): Annotations data for the network.
446
- neighborhoods (dict): Neighborhood enrichment data.
469
+ annotations (Dict[str, Any]): Annotations data for the network.
470
+ neighborhoods (Dict[str, Any]): Neighborhood enrichment data.
447
471
  min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
448
472
  max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
449
473
 
450
474
  Returns:
451
- dict: Top annotations identified within the network.
475
+ Dict[str, Any]: Top annotations identified within the network.
452
476
  """
453
477
  # Extract necessary data from annotations and neighborhoods
454
478
  ordered_annotations = annotations["ordered_annotations"]
455
479
  neighborhood_enrichment_sums = neighborhoods["neighborhood_enrichment_counts"]
456
- neighborhoods_binary_enrichment_matrix = neighborhoods["binary_enrichment_matrix"]
480
+ significant_enrichment_matrix = neighborhoods["significant_enrichment_matrix"]
481
+ significant_binary_enrichment_matrix = neighborhoods["significant_binary_enrichment_matrix"]
457
482
  # Call external function to define top annotations
458
483
  return define_top_annotations(
459
484
  network=network,
460
485
  ordered_annotation_labels=ordered_annotations,
461
486
  neighborhood_enrichment_sums=neighborhood_enrichment_sums,
462
- binary_enrichment_matrix=neighborhoods_binary_enrichment_matrix,
487
+ significant_enrichment_matrix=significant_enrichment_matrix,
488
+ significant_binary_enrichment_matrix=significant_binary_enrichment_matrix,
463
489
  min_cluster_size=min_cluster_size,
464
490
  max_cluster_size=max_cluster_size,
465
491
  )
@@ -475,7 +501,7 @@ class RISK(NetworkIO, AnnotationsIO):
475
501
  """Define domains in the network based on enrichment data.
476
502
 
477
503
  Args:
478
- neighborhoods (dict): Enrichment data for neighborhoods.
504
+ neighborhoods (Dict[str, Any]): Enrichment data for neighborhoods.
479
505
  top_annotations (pd.DataFrame): Enrichment matrix for top annotations.
480
506
  linkage_criterion (str): Clustering criterion for defining domains.
481
507
  linkage_method (str): Clustering method to use.
risk/stats/hypergeom.py CHANGED
@@ -20,7 +20,7 @@ def compute_hypergeom_test(
20
20
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
21
21
 
22
22
  Returns:
23
- dict: Dictionary containing depletion and enrichment p-values.
23
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
24
24
  """
25
25
  # Get the total number of nodes in the network
26
26
  total_node_count = neighborhoods.shape[0]
@@ -35,7 +35,7 @@ def compute_permutation_test(
35
35
  max_workers (int, optional): Number of workers for multiprocessing. Defaults to 1.
36
36
 
37
37
  Returns:
38
- dict: Dictionary containing depletion and enrichment p-values.
38
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
39
39
  """
40
40
  # Ensure that the matrices are in the correct format and free of NaN values
41
41
  neighborhoods = neighborhoods.astype(np.float32)
@@ -133,6 +133,7 @@ def _run_permutation_test(
133
133
  observed_neighborhood_scores,
134
134
  neighborhood_score_func,
135
135
  subset_size + (1 if i < remainder else 0),
136
+ num_permutations,
136
137
  progress_counter,
137
138
  max_workers,
138
139
  rng, # Pass the random number generator to each worker
@@ -144,11 +145,9 @@ def _run_permutation_test(
144
145
  results = pool.starmap_async(_permutation_process_subset, params_list, chunksize=1)
145
146
 
146
147
  # Update progress bar based on progress_counter
147
- # NOTE: Waiting for results to be ready while updating progress bar gives a big improvement
148
- # in performance, especially for large number of permutations and workers
149
148
  while not results.ready():
150
149
  progress.update(progress_counter.value - progress.n)
151
- results.wait(0.05) # Wait for 50ms
150
+ results.wait(0.1) # Wait for 100ms
152
151
  # Ensure progress bar reaches 100%
153
152
  progress.update(total_progress - progress.n)
154
153
 
@@ -167,6 +166,7 @@ def _permutation_process_subset(
167
166
  observed_neighborhood_scores: np.ndarray,
168
167
  neighborhood_score_func: Callable,
169
168
  subset_size: int,
169
+ num_permutations: int,
170
170
  progress_counter: ValueProxy,
171
171
  max_workers: int,
172
172
  rng: np.random.Generator,
@@ -180,6 +180,7 @@ def _permutation_process_subset(
180
180
  observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
181
181
  neighborhood_score_func (Callable): Function to calculate neighborhood scores.
182
182
  subset_size (int): Number of permutations to run in this subset.
183
+ num_permutations (int): Number of total permutations across all subsets.
183
184
  progress_counter (multiprocessing.managers.ValueProxy): Shared counter for tracking progress.
184
185
  max_workers (int): Number of workers for multiprocessing.
185
186
  rng (np.random.Generator): Random number generator object.
@@ -190,11 +191,15 @@ def _permutation_process_subset(
190
191
  # Initialize local count matrices for this worker
191
192
  local_counts_depletion = np.zeros(observed_neighborhood_scores.shape)
192
193
  local_counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
194
+
193
195
  # NOTE: Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used.
194
- # This can help prevent oversubscription of CPU resources during multiprocessing, ensuring that each process
195
- # doesn't use more than one CPU core.
196
196
  limits = None if max_workers == 1 else 1
197
197
  with threadpool_limits(limits=limits, user_api="blas"):
198
+ # Initialize a local counter for batched progress updates
199
+ local_progress = 0
200
+ # Calculate the modulo value based on total permutations for 1/100th frequency updates
201
+ modulo_value = max(1, num_permutations // 100)
202
+
198
203
  for _ in range(subset_size):
199
204
  # Permute the annotation matrix using the RNG
200
205
  annotation_matrix_permut = annotation_matrix[rng.permutation(idxs)]
@@ -212,7 +217,15 @@ def _permutation_process_subset(
212
217
  local_counts_enrichment,
213
218
  permuted_neighborhood_scores >= observed_neighborhood_scores,
214
219
  )
215
- # Update the shared progress counter
216
- progress_counter.value += 1
220
+
221
+ # Update local progress counter
222
+ local_progress += 1
223
+ # Update shared progress counter every 1/100th of total permutations
224
+ if local_progress % modulo_value == 0:
225
+ progress_counter.value += modulo_value
226
+
227
+ # Final progress update for any remaining iterations
228
+ if local_progress % modulo_value != 0:
229
+ progress_counter.value += modulo_value
217
230
 
218
231
  return local_counts_depletion, local_counts_enrichment
risk/stats/poisson.py CHANGED
@@ -3,7 +3,7 @@ risk/stats/poisson
3
3
  ~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from typing import Dict, Any
6
+ from typing import Any, Dict
7
7
 
8
8
  import numpy as np
9
9
  from scipy.stats import poisson
@@ -20,7 +20,7 @@ def compute_poisson_test(
20
20
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
21
21
 
22
22
  Returns:
23
- dict: Dictionary containing depletion and enrichment p-values.
23
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
24
24
  """
25
25
  # Matrix multiplication to get the number of annotated nodes in each neighborhood
26
26
  annotated_in_neighborhood = neighborhoods @ annotations
risk/stats/stats.py CHANGED
@@ -3,7 +3,7 @@ risk/stats/stats
3
3
  ~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from typing import Union
6
+ from typing import Any, Dict, Union
7
7
 
8
8
  import numpy as np
9
9
  from statsmodels.stats.multitest import fdrcorrection
@@ -15,7 +15,7 @@ def calculate_significance_matrices(
15
15
  tail: str = "right",
16
16
  pval_cutoff: float = 0.05,
17
17
  fdr_cutoff: float = 0.05,
18
- ) -> dict:
18
+ ) -> Dict[str, Any]:
19
19
  """Calculate significance matrices based on p-values and specified tail.
20
20
 
21
21
  Args:
@@ -26,8 +26,8 @@ def calculate_significance_matrices(
26
26
  fdr_cutoff (float, optional): Cutoff for FDR significance if applied. Defaults to 0.05.
27
27
 
28
28
  Returns:
29
- dict: Dictionary containing the enrichment matrix, binary significance matrix,
30
- and the matrix of significant enrichment values.
29
+ Dict[str, Any]: Dictionary containing the enrichment matrix, binary significance matrix,
30
+ and the matrix of significant enrichment values.
31
31
  """
32
32
  if fdr_cutoff < 1.0:
33
33
  # Apply FDR correction to depletion p-values
@@ -62,7 +62,7 @@ def calculate_significance_matrices(
62
62
  log_enrichment_matrix = -np.log10(enrichment_matrix)
63
63
 
64
64
  # Select the appropriate significance matrices based on the specified tail
65
- enrichment_matrix, binary_enrichment_matrix = _select_significance_matrices(
65
+ enrichment_matrix, significant_binary_enrichment_matrix = _select_significance_matrices(
66
66
  tail,
67
67
  log_depletion_matrix,
68
68
  depletion_alpha_threshold_matrix,
@@ -71,11 +71,13 @@ def calculate_significance_matrices(
71
71
  )
72
72
 
73
73
  # Filter the enrichment matrix using the binary significance matrix
74
- significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
74
+ significant_enrichment_matrix = np.where(
75
+ significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
76
+ )
75
77
 
76
78
  return {
77
79
  "enrichment_matrix": enrichment_matrix,
78
- "binary_enrichment_matrix": binary_enrichment_matrix,
80
+ "significant_binary_enrichment_matrix": significant_binary_enrichment_matrix,
79
81
  "significant_enrichment_matrix": significant_enrichment_matrix,
80
82
  }
81
83
 
@@ -127,10 +129,10 @@ def _select_significance_matrices(
127
129
 
128
130
  # Create a binary significance matrix where valid indices meet the alpha threshold
129
131
  valid_idxs = ~np.isnan(alpha_threshold_matrix)
130
- binary_enrichment_matrix = np.zeros(alpha_threshold_matrix.shape)
131
- binary_enrichment_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
132
+ significant_binary_enrichment_matrix = np.zeros(alpha_threshold_matrix.shape)
133
+ significant_binary_enrichment_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
132
134
 
133
- return enrichment_matrix, binary_enrichment_matrix
135
+ return enrichment_matrix, significant_binary_enrichment_matrix
134
136
 
135
137
 
136
138
  def _compute_threshold_matrix(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.7b11
3
+ Version: 0.0.8
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -709,42 +709,105 @@ Requires-Dist: statsmodels
709
709
  Requires-Dist: threadpoolctl
710
710
  Requires-Dist: tqdm
711
711
 
712
- <p align="center">
713
- <img src="https://i.imgur.com/Fo9EmnK.png" width="400" />
714
- </p>
712
+ # RISK Network
715
713
 
716
714
  <p align="center">
717
- <a href="https://pypi.python.org/pypi/risk-network"><img src="https://img.shields.io/pypi/v/risk-network.svg" alt="pypiv"></a>
718
- <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.8+-blue.svg" alt="Python 3.8+"></a>
719
- <a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
715
+ <img src="https://i.imgur.com/8TleEJs.png" width="50%" />
720
716
  </p>
721
717
 
722
- ## RISK
718
+ <br>
719
+
720
+ ![Python](https://img.shields.io/badge/python-3.8%2B-yellow)
721
+ [![pypiv](https://img.shields.io/pypi/v/risk-network.svg)](https://pypi.python.org/pypi/risk-network)
722
+ ![License](https://img.shields.io/badge/license-GPLv3-purple)
723
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.xxxxxxx.svg)](https://doi.org/10.5281/zenodo.xxxxxxx)
724
+ ![Downloads](https://img.shields.io/pypi/dm/risk-network)
725
+ ![Platforms](https://img.shields.io/badge/platform-linux%20%7C%20macos%20%7C%20windows-lightgrey)
726
+
727
+ **RISK (RISK Infers Spatial Kinships)** is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
723
728
 
724
- #### RISK Infers Spatial Kinships
729
+ ## Documentation and Tutorial
730
+
731
+ - **Documentation**: Comprehensive documentation is available [here](Documentation link).
732
+ - **Tutorial**: An interactive Jupyter notebook tutorial can be found [here](https://github.com/riskportal/network-tutorial).
733
+ We highly recommend new users to consult the documentation and tutorial early on to fully leverage RISK's capabilities.
734
+
735
+ ## Installation
725
736
 
726
- RISK is a software tool for visualizing spatial relationships in networks. It aims to enhance network analysis by integrating advanced network annotation algorithms, such as Louvain and Markov Clustering, to identify key functional modules and pathways.
737
+ RISK is compatible with Python 3.8 and later versions and operates on all major operating systems. Install RISK via pip:
738
+
739
+ ```bash
740
+ pip install risk-network
741
+ ```
727
742
 
728
743
  ## Features
729
744
 
730
- - Spatial analysis of biological networks
731
- - Functional enrichment detection
732
- - Optimized performance
745
+ - **Comprehensive Network Analysis**: Analyze biological networks such as protein–protein interaction (PPI) and gene regulatory networks, as well as non-biological networks.
746
+ - **Advanced Clustering Algorithms**: Utilize algorithms like Louvain, Markov Clustering, Spinglass, and more to identify key functional modules.
747
+ - **Flexible Visualization**: Generate clear, publication-quality figures with customizable node and edge attributes, including colors, shapes, sizes, and labels.
748
+ - **Efficient Data Handling**: Optimized for large datasets, supporting multiple file formats such as JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
749
+ - **Statistical Analysis**: Integrated statistical tests, including hypergeometric, permutation, and Poisson tests, to assess the significance of enriched regions.
750
+ - **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
733
751
 
734
- ## Example
752
+ ## Example Usage
735
753
 
736
- *Saccharomyces cerevisiae* proteins oriented by physical interactions discovered through affinity enrichment and mass spectrometry (Michaelis et al., 2023).
754
+ We applied RISK to a *Saccharomyces cerevisiae* protein–protein interaction network, revealing both established and novel functional relationships. The visualization below highlights key biological processes such as ribosomal assembly and mitochondrial organization.
737
755
 
738
- ![PPI Network Demo](https://i.imgur.com/NnyK6nO.png)
756
+ ![RISK Main Figure](https://i.imgur.com/5OP3Hqe.jpeg)
739
757
 
740
- ## Installation
758
+ RISK successfully detected both known and novel functional clusters within the yeast interactome. Clusters related to Golgi transport and actin nucleation were clearly defined and closely located, showcasing RISK's ability to map well-characterized interactions. Additionally, RISK identified links between mRNA processing pathways and vesicle trafficking proteins, consistent with recent studies demonstrating the role of vesicles in mRNA localization and stability.
759
+
760
+ ## Citation
761
+
762
+ If you use RISK in your research, please cite the following:
763
+
764
+ **Horecka**, *et al.*, "RISK: a next-generation tool for biological network annotation and visualization", **[Journal Name]**, 2024. DOI: [10.1234/zenodo.xxxxxxx](https://doi.org/10.1234/zenodo.xxxxxxx)
765
+
766
+ ## Software Architecture and Implementation
741
767
 
742
- Coming soon...
768
+ RISK features a streamlined, modular architecture designed to meet diverse research needs. Each module focuses on a specific task—such as network input/output, statistical analysis, or visualization—ensuring ease of adaptation and extension. This design enhances flexibility and reduces development overhead for users integrating RISK into their workflows.
743
769
 
744
- ## Usage
770
+ ### Supported Data Formats
745
771
 
746
- Coming soon...
772
+ - **Input/Output**: JSON, CSV, TSV, Excel, Cytoscape, GPickle.
773
+ - **Visualization Outputs**: SVG, PNG, PDF.
774
+
775
+ ### Clustering Algorithms
776
+
777
+ - **Available Algorithms**:
778
+ - Greedy Modularity
779
+ - Label Propagation
780
+ - Louvain
781
+ - Markov Clustering
782
+ - Spinglass
783
+ - Walktrap
784
+ - **Distance Metrics**: Supports both spherical and Euclidean distance metrics.
785
+
786
+ ### Statistical Tests
787
+
788
+ - **Hypergeometric Test**
789
+ - **Permutation Test** (single- or multi-process modes)
790
+ - **Poisson Test**
791
+
792
+ ## Performance and Efficiency
793
+
794
+ In benchmarking tests using the yeast interactome network, RISK demonstrated substantial improvements over previous tools in both computational performance and memory efficiency. RISK processed the dataset approximately **3.25 times faster**, reducing CPU time by **69%**, and required **25% less peak memory usage**, underscoring its efficient utilization of computational resources.
795
+
796
+ ## Contributing
797
+
798
+ We welcome contributions from the community. Please use the following resources:
799
+
800
+ - [Issues Tracker](https://github.com/irahorecka/risk/issues)
801
+ - [Source Code](https://github.com/irahorecka/risk/tree/main/risk)
802
+
803
+ ## Support
804
+
805
+ If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/irahorecka/risk/issues) on GitHub.
747
806
 
748
807
  ## License
749
808
 
750
- This project is licensed under the GPL-3.0 license.
809
+ RISK is freely available as open-source software under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
810
+
811
+ ---
812
+
813
+ **Note**: For detailed documentation and to access the interactive tutorial, please visit the links provided in the [Documentation and Tutorial](#documentation-and-tutorial) section.