risk-network 0.0.7b12__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/risk.py CHANGED
@@ -3,7 +3,8 @@ risk/risk
3
3
  ~~~~~~~~~
4
4
  """
5
5
 
6
- from typing import Any, Dict, Tuple
6
+ import copy
7
+ from typing import Any, Dict, List, Tuple, Union
7
8
 
8
9
  import networkx as nx
9
10
  import numpy as np
@@ -58,9 +59,9 @@ class RISK(NetworkIO, AnnotationsIO):
58
59
  self,
59
60
  network: nx.Graph,
60
61
  annotations: Dict[str, Any],
61
- distance_metric: str = "louvain",
62
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
62
63
  louvain_resolution: float = 0.1,
63
- edge_length_threshold: float = 0.5,
64
+ edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
64
65
  null_distribution: str = "network",
65
66
  random_seed: int = 888,
66
67
  ) -> Dict[str, Any]:
@@ -68,15 +69,19 @@ class RISK(NetworkIO, AnnotationsIO):
68
69
 
69
70
  Args:
70
71
  network (nx.Graph): The network graph.
71
- annotations (dict): The annotations associated with the network.
72
- distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
72
+ annotations (Dict[str, Any]): The annotations associated with the network.
73
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
74
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
75
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
73
76
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
74
- edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
77
+ edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
78
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
79
+ Defaults to 0.5.
75
80
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
76
81
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
77
82
 
78
83
  Returns:
79
- dict: Computed significance of neighborhoods.
84
+ Dict[str, Any]: Computed significance of neighborhoods.
80
85
  """
81
86
  log_header("Running hypergeometric test")
82
87
  # Log neighborhood analysis parameters
@@ -89,6 +94,9 @@ class RISK(NetworkIO, AnnotationsIO):
89
94
  random_seed=random_seed,
90
95
  )
91
96
 
97
+ # Make a copy of the network to avoid modifying the original
98
+ network = copy.deepcopy(network)
99
+
92
100
  # Load neighborhoods based on the network and distance metric
93
101
  neighborhoods = self._load_neighborhoods(
94
102
  network,
@@ -111,9 +119,9 @@ class RISK(NetworkIO, AnnotationsIO):
111
119
  self,
112
120
  network: nx.Graph,
113
121
  annotations: Dict[str, Any],
114
- distance_metric: str = "louvain",
122
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
115
123
  louvain_resolution: float = 0.1,
116
- edge_length_threshold: float = 0.5,
124
+ edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
117
125
  null_distribution: str = "network",
118
126
  random_seed: int = 888,
119
127
  ) -> Dict[str, Any]:
@@ -121,15 +129,19 @@ class RISK(NetworkIO, AnnotationsIO):
121
129
 
122
130
  Args:
123
131
  network (nx.Graph): The network graph.
124
- annotations (dict): The annotations associated with the network.
125
- distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
132
+ annotations (Dict[str, Any]): The annotations associated with the network.
133
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
134
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
135
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
126
136
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
127
- edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
137
+ edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
138
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
139
+ Defaults to 0.5.
128
140
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
129
141
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
130
142
 
131
143
  Returns:
132
- dict: Computed significance of neighborhoods.
144
+ Dict[str, Any]: Computed significance of neighborhoods.
133
145
  """
134
146
  log_header("Running Poisson test")
135
147
  # Log neighborhood analysis parameters
@@ -142,6 +154,9 @@ class RISK(NetworkIO, AnnotationsIO):
142
154
  random_seed=random_seed,
143
155
  )
144
156
 
157
+ # Make a copy of the network to avoid modifying the original
158
+ network = copy.deepcopy(network)
159
+
145
160
  # Load neighborhoods based on the network and distance metric
146
161
  neighborhoods = self._load_neighborhoods(
147
162
  network,
@@ -164,9 +179,9 @@ class RISK(NetworkIO, AnnotationsIO):
164
179
  self,
165
180
  network: nx.Graph,
166
181
  annotations: Dict[str, Any],
167
- distance_metric: str = "louvain",
182
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
168
183
  louvain_resolution: float = 0.1,
169
- edge_length_threshold: float = 0.5,
184
+ edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
170
185
  score_metric: str = "sum",
171
186
  null_distribution: str = "network",
172
187
  num_permutations: int = 1000,
@@ -177,10 +192,14 @@ class RISK(NetworkIO, AnnotationsIO):
177
192
 
178
193
  Args:
179
194
  network (nx.Graph): The network graph.
180
- annotations (dict): The annotations associated with the network.
181
- distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
195
+ annotations (Dict[str, Any]): The annotations associated with the network.
196
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
197
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
198
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
182
199
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
183
- edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
200
+ edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
201
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
202
+ Defaults to 0.5.
184
203
  score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
185
204
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
186
205
  num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
@@ -188,7 +207,7 @@ class RISK(NetworkIO, AnnotationsIO):
188
207
  max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
189
208
 
190
209
  Returns:
191
- dict: Computed significance of neighborhoods.
210
+ Dict[str, Any]: Computed significance of neighborhoods.
192
211
  """
193
212
  log_header("Running permutation test")
194
213
  # Log neighborhood analysis parameters
@@ -204,6 +223,9 @@ class RISK(NetworkIO, AnnotationsIO):
204
223
  max_workers=max_workers,
205
224
  )
206
225
 
226
+ # Make a copy of the network to avoid modifying the original
227
+ network = copy.deepcopy(network)
228
+
207
229
  # Load neighborhoods based on the network and distance metric
208
230
  neighborhoods = self._load_neighborhoods(
209
231
  network,
@@ -253,7 +275,7 @@ class RISK(NetworkIO, AnnotationsIO):
253
275
  Args:
254
276
  network (nx.Graph): The network graph.
255
277
  annotations (pd.DataFrame): DataFrame containing annotation data for the network.
256
- neighborhoods (dict): Neighborhood enrichment data.
278
+ neighborhoods (Dict[str, Any]): Neighborhood enrichment data.
257
279
  tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
258
280
  pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
259
281
  fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
@@ -283,6 +305,9 @@ class RISK(NetworkIO, AnnotationsIO):
283
305
  max_cluster_size=max_cluster_size,
284
306
  )
285
307
 
308
+ # Make a copy of the network to avoid modifying the original
309
+ network = copy.deepcopy(network)
310
+
286
311
  logger.debug(f"p-value cutoff: {pval_cutoff}")
287
312
  logger.debug(f"FDR BH cutoff: {fdr_cutoff}")
288
313
  logger.debug(
@@ -353,39 +378,41 @@ class RISK(NetworkIO, AnnotationsIO):
353
378
  def load_plotter(
354
379
  self,
355
380
  graph: NetworkGraph,
356
- figsize: Tuple = (10, 10),
381
+ figsize: Union[List, Tuple, np.ndarray] = (10, 10),
357
382
  background_color: str = "white",
383
+ background_alpha: Union[float, None] = 1.0,
384
+ pad: float = 0.3,
358
385
  ) -> NetworkPlotter:
359
386
  """Get a NetworkPlotter object for plotting.
360
387
 
361
388
  Args:
362
389
  graph (NetworkGraph): The graph to plot.
363
- figsize (tuple, optional): Size of the figure. Defaults to (10, 10).
390
+ figsize (List, Tuple, or np.ndarray, optional): Size of the plot. Defaults to (10, 10)., optional): Size of the figure. Defaults to (10, 10).
364
391
  background_color (str, optional): Background color of the plot. Defaults to "white".
392
+ background_alpha (float, None, optional): Transparency level of the background color. If provided, it overrides
393
+ any existing alpha values found in background_color. Defaults to 1.0.
394
+ pad (float, optional): Padding value to adjust the axis limits. Defaults to 0.3.
365
395
 
366
396
  Returns:
367
397
  NetworkPlotter: A NetworkPlotter object configured with the given parameters.
368
398
  """
369
399
  log_header("Loading plotter")
370
- # Log the plotter settings
371
- params.log_plotter(
372
- figsize=figsize,
373
- background_color=background_color,
374
- )
375
400
 
376
401
  # Initialize and return a NetworkPlotter object
377
402
  return NetworkPlotter(
378
403
  graph,
379
404
  figsize=figsize,
380
405
  background_color=background_color,
406
+ background_alpha=background_alpha,
407
+ pad=pad,
381
408
  )
382
409
 
383
410
  def _load_neighborhoods(
384
411
  self,
385
412
  network: nx.Graph,
386
- distance_metric: str = "louvain",
413
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
387
414
  louvain_resolution: float = 0.1,
388
- edge_length_threshold: float = 0.5,
415
+ edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
389
416
  random_seed: int = 888,
390
417
  ) -> np.ndarray:
391
418
  """Load significant neighborhoods for the network.
@@ -393,9 +420,13 @@ class RISK(NetworkIO, AnnotationsIO):
393
420
  Args:
394
421
  network (nx.Graph): The network graph.
395
422
  annotations (pd.DataFrame): The matrix of annotations associated with the network.
396
- distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
423
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
424
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
425
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
397
426
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
398
- edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
427
+ edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
428
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
429
+ Defaults to 0.5.
399
430
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
400
431
 
401
432
  Returns:
@@ -435,24 +466,26 @@ class RISK(NetworkIO, AnnotationsIO):
435
466
 
436
467
  Args:
437
468
  network (nx.Graph): The network graph.
438
- annotations (dict): Annotations data for the network.
439
- neighborhoods (dict): Neighborhood enrichment data.
469
+ annotations (Dict[str, Any]): Annotations data for the network.
470
+ neighborhoods (Dict[str, Any]): Neighborhood enrichment data.
440
471
  min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
441
472
  max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
442
473
 
443
474
  Returns:
444
- dict: Top annotations identified within the network.
475
+ Dict[str, Any]: Top annotations identified within the network.
445
476
  """
446
477
  # Extract necessary data from annotations and neighborhoods
447
478
  ordered_annotations = annotations["ordered_annotations"]
448
479
  neighborhood_enrichment_sums = neighborhoods["neighborhood_enrichment_counts"]
449
- neighborhoods_binary_enrichment_matrix = neighborhoods["binary_enrichment_matrix"]
480
+ significant_enrichment_matrix = neighborhoods["significant_enrichment_matrix"]
481
+ significant_binary_enrichment_matrix = neighborhoods["significant_binary_enrichment_matrix"]
450
482
  # Call external function to define top annotations
451
483
  return define_top_annotations(
452
484
  network=network,
453
485
  ordered_annotation_labels=ordered_annotations,
454
486
  neighborhood_enrichment_sums=neighborhood_enrichment_sums,
455
- binary_enrichment_matrix=neighborhoods_binary_enrichment_matrix,
487
+ significant_enrichment_matrix=significant_enrichment_matrix,
488
+ significant_binary_enrichment_matrix=significant_binary_enrichment_matrix,
456
489
  min_cluster_size=min_cluster_size,
457
490
  max_cluster_size=max_cluster_size,
458
491
  )
@@ -468,7 +501,7 @@ class RISK(NetworkIO, AnnotationsIO):
468
501
  """Define domains in the network based on enrichment data.
469
502
 
470
503
  Args:
471
- neighborhoods (dict): Enrichment data for neighborhoods.
504
+ neighborhoods (Dict[str, Any]): Enrichment data for neighborhoods.
472
505
  top_annotations (pd.DataFrame): Enrichment matrix for top annotations.
473
506
  linkage_criterion (str): Clustering criterion for defining domains.
474
507
  linkage_method (str): Clustering method to use.
risk/stats/hypergeom.py CHANGED
@@ -20,7 +20,7 @@ def compute_hypergeom_test(
20
20
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
21
21
 
22
22
  Returns:
23
- dict: Dictionary containing depletion and enrichment p-values.
23
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
24
24
  """
25
25
  # Get the total number of nodes in the network
26
26
  total_node_count = neighborhoods.shape[0]
@@ -35,7 +35,7 @@ def compute_permutation_test(
35
35
  max_workers (int, optional): Number of workers for multiprocessing. Defaults to 1.
36
36
 
37
37
  Returns:
38
- dict: Dictionary containing depletion and enrichment p-values.
38
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
39
39
  """
40
40
  # Ensure that the matrices are in the correct format and free of NaN values
41
41
  neighborhoods = neighborhoods.astype(np.float32)
@@ -133,6 +133,7 @@ def _run_permutation_test(
133
133
  observed_neighborhood_scores,
134
134
  neighborhood_score_func,
135
135
  subset_size + (1 if i < remainder else 0),
136
+ num_permutations,
136
137
  progress_counter,
137
138
  max_workers,
138
139
  rng, # Pass the random number generator to each worker
@@ -144,11 +145,9 @@ def _run_permutation_test(
144
145
  results = pool.starmap_async(_permutation_process_subset, params_list, chunksize=1)
145
146
 
146
147
  # Update progress bar based on progress_counter
147
- # NOTE: Waiting for results to be ready while updating progress bar gives a big improvement
148
- # in performance, especially for large number of permutations and workers
149
148
  while not results.ready():
150
149
  progress.update(progress_counter.value - progress.n)
151
- results.wait(0.05) # Wait for 50ms
150
+ results.wait(0.1) # Wait for 100ms
152
151
  # Ensure progress bar reaches 100%
153
152
  progress.update(total_progress - progress.n)
154
153
 
@@ -167,6 +166,7 @@ def _permutation_process_subset(
167
166
  observed_neighborhood_scores: np.ndarray,
168
167
  neighborhood_score_func: Callable,
169
168
  subset_size: int,
169
+ num_permutations: int,
170
170
  progress_counter: ValueProxy,
171
171
  max_workers: int,
172
172
  rng: np.random.Generator,
@@ -180,6 +180,7 @@ def _permutation_process_subset(
180
180
  observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
181
181
  neighborhood_score_func (Callable): Function to calculate neighborhood scores.
182
182
  subset_size (int): Number of permutations to run in this subset.
183
+ num_permutations (int): Number of total permutations across all subsets.
183
184
  progress_counter (multiprocessing.managers.ValueProxy): Shared counter for tracking progress.
184
185
  max_workers (int): Number of workers for multiprocessing.
185
186
  rng (np.random.Generator): Random number generator object.
@@ -190,11 +191,15 @@ def _permutation_process_subset(
190
191
  # Initialize local count matrices for this worker
191
192
  local_counts_depletion = np.zeros(observed_neighborhood_scores.shape)
192
193
  local_counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
194
+
193
195
  # NOTE: Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used.
194
- # This can help prevent oversubscription of CPU resources during multiprocessing, ensuring that each process
195
- # doesn't use more than one CPU core.
196
196
  limits = None if max_workers == 1 else 1
197
197
  with threadpool_limits(limits=limits, user_api="blas"):
198
+ # Initialize a local counter for batched progress updates
199
+ local_progress = 0
200
+ # Calculate the modulo value based on total permutations for 1/100th frequency updates
201
+ modulo_value = max(1, num_permutations // 100)
202
+
198
203
  for _ in range(subset_size):
199
204
  # Permute the annotation matrix using the RNG
200
205
  annotation_matrix_permut = annotation_matrix[rng.permutation(idxs)]
@@ -212,7 +217,15 @@ def _permutation_process_subset(
212
217
  local_counts_enrichment,
213
218
  permuted_neighborhood_scores >= observed_neighborhood_scores,
214
219
  )
215
- # Update the shared progress counter
216
- progress_counter.value += 1
220
+
221
+ # Update local progress counter
222
+ local_progress += 1
223
+ # Update shared progress counter every 1/100th of total permutations
224
+ if local_progress % modulo_value == 0:
225
+ progress_counter.value += modulo_value
226
+
227
+ # Final progress update for any remaining iterations
228
+ if local_progress % modulo_value != 0:
229
+ progress_counter.value += modulo_value
217
230
 
218
231
  return local_counts_depletion, local_counts_enrichment
risk/stats/poisson.py CHANGED
@@ -3,7 +3,7 @@ risk/stats/poisson
3
3
  ~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from typing import Dict, Any
6
+ from typing import Any, Dict
7
7
 
8
8
  import numpy as np
9
9
  from scipy.stats import poisson
@@ -20,7 +20,7 @@ def compute_poisson_test(
20
20
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
21
21
 
22
22
  Returns:
23
- dict: Dictionary containing depletion and enrichment p-values.
23
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
24
24
  """
25
25
  # Matrix multiplication to get the number of annotated nodes in each neighborhood
26
26
  annotated_in_neighborhood = neighborhoods @ annotations
risk/stats/stats.py CHANGED
@@ -3,7 +3,7 @@ risk/stats/stats
3
3
  ~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from typing import Union
6
+ from typing import Any, Dict, Union
7
7
 
8
8
  import numpy as np
9
9
  from statsmodels.stats.multitest import fdrcorrection
@@ -15,7 +15,7 @@ def calculate_significance_matrices(
15
15
  tail: str = "right",
16
16
  pval_cutoff: float = 0.05,
17
17
  fdr_cutoff: float = 0.05,
18
- ) -> dict:
18
+ ) -> Dict[str, Any]:
19
19
  """Calculate significance matrices based on p-values and specified tail.
20
20
 
21
21
  Args:
@@ -26,8 +26,8 @@ def calculate_significance_matrices(
26
26
  fdr_cutoff (float, optional): Cutoff for FDR significance if applied. Defaults to 0.05.
27
27
 
28
28
  Returns:
29
- dict: Dictionary containing the enrichment matrix, binary significance matrix,
30
- and the matrix of significant enrichment values.
29
+ Dict[str, Any]: Dictionary containing the enrichment matrix, binary significance matrix,
30
+ and the matrix of significant enrichment values.
31
31
  """
32
32
  if fdr_cutoff < 1.0:
33
33
  # Apply FDR correction to depletion p-values
@@ -62,7 +62,7 @@ def calculate_significance_matrices(
62
62
  log_enrichment_matrix = -np.log10(enrichment_matrix)
63
63
 
64
64
  # Select the appropriate significance matrices based on the specified tail
65
- enrichment_matrix, binary_enrichment_matrix = _select_significance_matrices(
65
+ enrichment_matrix, significant_binary_enrichment_matrix = _select_significance_matrices(
66
66
  tail,
67
67
  log_depletion_matrix,
68
68
  depletion_alpha_threshold_matrix,
@@ -71,11 +71,13 @@ def calculate_significance_matrices(
71
71
  )
72
72
 
73
73
  # Filter the enrichment matrix using the binary significance matrix
74
- significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
74
+ significant_enrichment_matrix = np.where(
75
+ significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
76
+ )
75
77
 
76
78
  return {
77
79
  "enrichment_matrix": enrichment_matrix,
78
- "binary_enrichment_matrix": binary_enrichment_matrix,
80
+ "significant_binary_enrichment_matrix": significant_binary_enrichment_matrix,
79
81
  "significant_enrichment_matrix": significant_enrichment_matrix,
80
82
  }
81
83
 
@@ -127,10 +129,10 @@ def _select_significance_matrices(
127
129
 
128
130
  # Create a binary significance matrix where valid indices meet the alpha threshold
129
131
  valid_idxs = ~np.isnan(alpha_threshold_matrix)
130
- binary_enrichment_matrix = np.zeros(alpha_threshold_matrix.shape)
131
- binary_enrichment_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
132
+ significant_binary_enrichment_matrix = np.zeros(alpha_threshold_matrix.shape)
133
+ significant_binary_enrichment_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
132
134
 
133
- return enrichment_matrix, binary_enrichment_matrix
135
+ return enrichment_matrix, significant_binary_enrichment_matrix
134
136
 
135
137
 
136
138
  def _compute_threshold_matrix(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.7b12
3
+ Version: 0.0.8
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -709,42 +709,105 @@ Requires-Dist: statsmodels
709
709
  Requires-Dist: threadpoolctl
710
710
  Requires-Dist: tqdm
711
711
 
712
- <p align="center">
713
- <img src="https://i.imgur.com/Fo9EmnK.png" width="400" />
714
- </p>
712
+ # RISK Network
715
713
 
716
714
  <p align="center">
717
- <a href="https://pypi.python.org/pypi/risk-network"><img src="https://img.shields.io/pypi/v/risk-network.svg" alt="pypiv"></a>
718
- <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.8+-blue.svg" alt="Python 3.8+"></a>
719
- <a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
715
+ <img src="https://i.imgur.com/8TleEJs.png" width="50%" />
720
716
  </p>
721
717
 
722
- ## RISK
718
+ <br>
719
+
720
+ ![Python](https://img.shields.io/badge/python-3.8%2B-yellow)
721
+ [![pypiv](https://img.shields.io/pypi/v/risk-network.svg)](https://pypi.python.org/pypi/risk-network)
722
+ ![License](https://img.shields.io/badge/license-GPLv3-purple)
723
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.xxxxxxx.svg)](https://doi.org/10.5281/zenodo.xxxxxxx)
724
+ ![Downloads](https://img.shields.io/pypi/dm/risk-network)
725
+ ![Platforms](https://img.shields.io/badge/platform-linux%20%7C%20macos%20%7C%20windows-lightgrey)
726
+
727
+ **RISK (RISK Infers Spatial Kinships)** is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
723
728
 
724
- #### RISK Infers Spatial Kinships
729
+ ## Documentation and Tutorial
730
+
731
+ - **Documentation**: Comprehensive documentation is available [here](Documentation link).
732
+ - **Tutorial**: An interactive Jupyter notebook tutorial can be found [here](https://github.com/riskportal/network-tutorial).
733
+ We highly recommend new users to consult the documentation and tutorial early on to fully leverage RISK's capabilities.
734
+
735
+ ## Installation
725
736
 
726
- RISK is a software tool for visualizing spatial relationships in networks. It aims to enhance network analysis by integrating advanced network annotation algorithms, such as Louvain and Markov Clustering, to identify key functional modules and pathways.
737
+ RISK is compatible with Python 3.8 and later versions and operates on all major operating systems. Install RISK via pip:
738
+
739
+ ```bash
740
+ pip install risk-network
741
+ ```
727
742
 
728
743
  ## Features
729
744
 
730
- - Spatial analysis of biological networks
731
- - Functional enrichment detection
732
- - Optimized performance
745
+ - **Comprehensive Network Analysis**: Analyze biological networks such as protein–protein interaction (PPI) and gene regulatory networks, as well as non-biological networks.
746
+ - **Advanced Clustering Algorithms**: Utilize algorithms like Louvain, Markov Clustering, Spinglass, and more to identify key functional modules.
747
+ - **Flexible Visualization**: Generate clear, publication-quality figures with customizable node and edge attributes, including colors, shapes, sizes, and labels.
748
+ - **Efficient Data Handling**: Optimized for large datasets, supporting multiple file formats such as JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
749
+ - **Statistical Analysis**: Integrated statistical tests, including hypergeometric, permutation, and Poisson tests, to assess the significance of enriched regions.
750
+ - **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
733
751
 
734
- ## Example
752
+ ## Example Usage
735
753
 
736
- *Saccharomyces cerevisiae* proteins oriented by physical interactions discovered through affinity enrichment and mass spectrometry (Michaelis et al., 2023).
754
+ We applied RISK to a *Saccharomyces cerevisiae* protein–protein interaction network, revealing both established and novel functional relationships. The visualization below highlights key biological processes such as ribosomal assembly and mitochondrial organization.
737
755
 
738
- ![PPI Network Demo](https://i.imgur.com/NnyK6nO.png)
756
+ ![RISK Main Figure](https://i.imgur.com/5OP3Hqe.jpeg)
739
757
 
740
- ## Installation
758
+ RISK successfully detected both known and novel functional clusters within the yeast interactome. Clusters related to Golgi transport and actin nucleation were clearly defined and closely located, showcasing RISK's ability to map well-characterized interactions. Additionally, RISK identified links between mRNA processing pathways and vesicle trafficking proteins, consistent with recent studies demonstrating the role of vesicles in mRNA localization and stability.
759
+
760
+ ## Citation
761
+
762
+ If you use RISK in your research, please cite the following:
763
+
764
+ **Horecka**, *et al.*, "RISK: a next-generation tool for biological network annotation and visualization", **[Journal Name]**, 2024. DOI: [10.1234/zenodo.xxxxxxx](https://doi.org/10.1234/zenodo.xxxxxxx)
765
+
766
+ ## Software Architecture and Implementation
741
767
 
742
- Coming soon...
768
+ RISK features a streamlined, modular architecture designed to meet diverse research needs. Each module focuses on a specific task—such as network input/output, statistical analysis, or visualization—ensuring ease of adaptation and extension. This design enhances flexibility and reduces development overhead for users integrating RISK into their workflows.
743
769
 
744
- ## Usage
770
+ ### Supported Data Formats
745
771
 
746
- Coming soon...
772
+ - **Input/Output**: JSON, CSV, TSV, Excel, Cytoscape, GPickle.
773
+ - **Visualization Outputs**: SVG, PNG, PDF.
774
+
775
+ ### Clustering Algorithms
776
+
777
+ - **Available Algorithms**:
778
+ - Greedy Modularity
779
+ - Label Propagation
780
+ - Louvain
781
+ - Markov Clustering
782
+ - Spinglass
783
+ - Walktrap
784
+ - **Distance Metrics**: Supports both spherical and Euclidean distance metrics.
785
+
786
+ ### Statistical Tests
787
+
788
+ - **Hypergeometric Test**
789
+ - **Permutation Test** (single- or multi-process modes)
790
+ - **Poisson Test**
791
+
792
+ ## Performance and Efficiency
793
+
794
+ In benchmarking tests using the yeast interactome network, RISK demonstrated substantial improvements over previous tools in both computational performance and memory efficiency. RISK processed the dataset approximately **3.25 times faster**, reducing CPU time by **69%**, and required **25% less peak memory usage**, underscoring its efficient utilization of computational resources.
795
+
796
+ ## Contributing
797
+
798
+ We welcome contributions from the community. Please use the following resources:
799
+
800
+ - [Issues Tracker](https://github.com/irahorecka/risk/issues)
801
+ - [Source Code](https://github.com/irahorecka/risk/tree/main/risk)
802
+
803
+ ## Support
804
+
805
+ If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/irahorecka/risk/issues) on GitHub.
747
806
 
748
807
  ## License
749
808
 
750
- This project is licensed under the GPL-3.0 license.
809
+ RISK is freely available as open-source software under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
810
+
811
+ ---
812
+
813
+ **Note**: For detailed documentation and to access the interactive tutorial, please visit the links provided in the [Documentation and Tutorial](#documentation-and-tutorial) section.
@@ -0,0 +1,37 @@
1
+ risk/__init__.py,sha256=1uHw76jOGBsjhDxEmv0vYmPZhEY2JBiXt_n6-TI3x5w,105
2
+ risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
3
+ risk/risk.py,sha256=8GTUpj3TC7XLwEUD22-fDTKXtW7PndzW9TixWaZ23bI,23853
4
+ risk/annotations/__init__.py,sha256=kXgadEXaCh0z8OyhOhTj7c3qXGmWgOhaSZ4gSzSb59U,147
5
+ risk/annotations/annotations.py,sha256=dHO6kQOQjMA57nYA-yTAU1uB-ieiZ5sknAKvX6vF0Os,13024
6
+ risk/annotations/io.py,sha256=powWzeimVdE0WCwlBCXyu5otMyZZHQujC0DS3m5DC0c,9505
7
+ risk/log/__init__.py,sha256=xKeU9uK1AnVk7Yt9GTp-E-dn7Ealow2igEXZZnQRa2c,135
8
+ risk/log/console.py,sha256=C52s3FgQ2e9kQWcXL8m7rs_pnKXt5Yy8PBHmQkOTiNo,4537
9
+ risk/log/params.py,sha256=qSTktJ3OazldTzgtDGZkh0s30vu5kiXPkiNGLdSFDvg,6416
10
+ risk/neighborhoods/__init__.py,sha256=tKKEg4lsbqFukpgYlUGxU_v_9FOqK7V0uvM9T2QzoL0,206
11
+ risk/neighborhoods/community.py,sha256=MAgIblbuisEPwVU6mFZd4Yd9NUKlaHK99suw51r1Is0,7065
12
+ risk/neighborhoods/domains.py,sha256=3iV0-nRLF2sL9_7epHY5b9AtTU-QQ84hOWO76VwFcrs,11685
13
+ risk/neighborhoods/neighborhoods.py,sha256=cT9CCi1uQLn9Kv9Lxt8AN_4s63SKIlOZspvUZnx27nE,21832
14
+ risk/network/__init__.py,sha256=iEPeJdZfqp0toxtbElryB8jbz9_t_k4QQ3iDvKE8C_0,126
15
+ risk/network/geometry.py,sha256=gFtYUj9j9aul4paKq_qSGJn39Nazxu_MXv8m-tYYtrk,6840
16
+ risk/network/graph.py,sha256=-tslu8nSbuBaqNGf6TQ8ON7C27v-BLH_37J2aC6Ke14,9602
17
+ risk/network/io.py,sha256=-NJ9Tg1s-DxhlDbwQGO4o87rbMqO4-BzShgnIgFoRRE,22962
18
+ risk/network/plot/__init__.py,sha256=MfmaXJgAZJgXZ2wrhK8pXwzETlcMaLChhWXKAozniAo,98
19
+ risk/network/plot/canvas.py,sha256=TlCpNtvoceizAumNr9I02JcBrBO6FiAFAa2ZC0bx3SU,13356
20
+ risk/network/plot/contour.py,sha256=CwX4i3uE5HL0W4kfx34U7YyoTTqMxyb7xaXKRVoNLzY,15265
21
+ risk/network/plot/labels.py,sha256=fNccRds6seShMFPN6WX_7M1_qnscBkcWEH3QOJAKalk,45502
22
+ risk/network/plot/network.py,sha256=6RURL1OdBFyQ34qNcwM_uH3LSQGYZZ8tZT51dggH1a0,13685
23
+ risk/network/plot/plotter.py,sha256=iTPMiTnTTatM_-q1Ox_bjt5Pvv-Lo8gceiYB6TVzDcw,5770
24
+ risk/network/plot/utils/color.py,sha256=WSs1ge2oZ8yXwyVk2QqBF-avRd0aYT-sYZr9cxxAn7M,19626
25
+ risk/network/plot/utils/layout.py,sha256=RnJq0yODpoheZnDl7KKFPQeXrnrsS3FLIdxupoYVZq4,3553
26
+ risk/stats/__init__.py,sha256=WcgoETQ-hS0LQqKRsAMIPtP15xZ-4eul6VUBuUx4Wzc,220
27
+ risk/stats/hypergeom.py,sha256=oc39f02ViB1vQ-uaDrxG_tzAT6dxQBRjc88EK2EGn78,2282
28
+ risk/stats/poisson.py,sha256=polLgwS08MTCNzupYdmMUoEUYrJOjAbcYtYwjlfeE5Y,1803
29
+ risk/stats/stats.py,sha256=6iGi0-oN05mTmupg6X_VEBxEQvi2rujNhfPk4aLjwNI,7186
30
+ risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
31
+ risk/stats/permutation/permutation.py,sha256=meBNSrbRa9P8WJ54n485l0H7VQJlMSfHqdN4aCKYCtQ,10105
32
+ risk/stats/permutation/test_functions.py,sha256=lftOude6hee0pyR80HlBD32522JkDoN5hrKQ9VEbuoY,2345
33
+ risk_network-0.0.8.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
34
+ risk_network-0.0.8.dist-info/METADATA,sha256=bFaieAp2hbwf-6YMsIryAIoZ9kUKGqewW1iQUNxfXlI,47495
35
+ risk_network-0.0.8.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
36
+ risk_network-0.0.8.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
37
+ risk_network-0.0.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5