risk-network 0.0.11__py3-none-any.whl → 0.0.12b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. risk/__init__.py +1 -1
  2. risk/annotations/__init__.py +4 -1
  3. risk/annotations/io.py +48 -47
  4. risk/annotations/nltk_setup.py +2 -1
  5. risk/log/__init__.py +1 -1
  6. risk/log/parameters.py +21 -22
  7. risk/neighborhoods/__init__.py +0 -1
  8. risk/neighborhoods/api.py +2 -2
  9. risk/neighborhoods/community.py +33 -4
  10. risk/neighborhoods/domains.py +6 -4
  11. risk/neighborhoods/neighborhoods.py +7 -1
  12. risk/neighborhoods/stats/__init__.py +13 -0
  13. risk/neighborhoods/stats/permutation/__init__.py +6 -0
  14. risk/{stats → neighborhoods/stats}/permutation/permutation.py +7 -4
  15. risk/{stats → neighborhoods/stats}/permutation/test_functions.py +2 -2
  16. risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +21 -13
  17. risk/network/__init__.py +0 -2
  18. risk/network/graph/__init__.py +0 -2
  19. risk/network/graph/api.py +2 -2
  20. risk/network/graph/graph.py +56 -57
  21. risk/{stats/significance.py → network/graph/stats.py} +2 -2
  22. risk/network/graph/summary.py +2 -3
  23. risk/network/io.py +151 -8
  24. risk/network/plotter/__init__.py +0 -2
  25. risk/network/plotter/api.py +1 -1
  26. risk/network/plotter/canvas.py +35 -35
  27. risk/network/plotter/contour.py +11 -12
  28. risk/network/plotter/labels.py +257 -246
  29. risk/network/plotter/plotter.py +2 -4
  30. risk/network/plotter/utils/colors.py +3 -0
  31. risk/risk.py +5 -5
  32. risk_network-0.0.12b1.dist-info/METADATA +122 -0
  33. risk_network-0.0.12b1.dist-info/RECORD +40 -0
  34. {risk_network-0.0.11.dist-info → risk_network-0.0.12b1.dist-info}/WHEEL +1 -1
  35. risk/network/geometry.py +0 -150
  36. risk/stats/__init__.py +0 -15
  37. risk/stats/permutation/__init__.py +0 -6
  38. risk_network-0.0.11.dist-info/METADATA +0 -798
  39. risk_network-0.0.11.dist-info/RECORD +0 -41
  40. {risk_network-0.0.11.dist-info → risk_network-0.0.12b1.dist-info/licenses}/LICENSE +0 -0
  41. {risk_network-0.0.11.dist-info → risk_network-0.0.12b1.dist-info}/top_level.txt +0 -0
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.11"
10
+ __version__ = "0.0.12-beta.1"
@@ -3,5 +3,8 @@ risk/annotations
3
3
  ~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from risk.annotations.annotations import define_top_annotations, get_weighted_description
6
+ from risk.annotations.annotations import (
7
+ define_top_annotations,
8
+ get_weighted_description,
9
+ )
7
10
  from risk.annotations.io import AnnotationsIO
risk/annotations/io.py CHANGED
@@ -10,7 +10,7 @@ import networkx as nx
10
10
  import pandas as pd
11
11
 
12
12
  from risk.annotations.annotations import load_annotations
13
- from risk.log import params, logger, log_header
13
+ from risk.log import log_header, logger, params
14
14
 
15
15
 
16
16
  class AnnotationsIO:
@@ -20,9 +20,6 @@ class AnnotationsIO:
20
20
  and to export parameter data to various formats like JSON, CSV, and text files.
21
21
  """
22
22
 
23
- def __init__(self):
24
- pass
25
-
26
23
  def load_json_annotation(
27
24
  self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
28
25
  ) -> Dict[str, Any]:
@@ -42,7 +39,7 @@ class AnnotationsIO:
42
39
  params.log_annotations(
43
40
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
44
41
  )
45
- _log_loading(filetype, filepath=filepath)
42
+ self._log_loading(filetype, filepath=filepath)
46
43
 
47
44
  # Load the JSON file into a dictionary
48
45
  with open(filepath, "r", encoding="utf-8") as file:
@@ -81,7 +78,7 @@ class AnnotationsIO:
81
78
  params.log_annotations(
82
79
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
83
80
  )
84
- _log_loading(filetype, filepath=filepath)
81
+ self._log_loading(filetype, filepath=filepath)
85
82
 
86
83
  # Load the specified sheet from the Excel file
87
84
  annotation = pd.read_excel(filepath, sheet_name=sheet_name)
@@ -123,10 +120,10 @@ class AnnotationsIO:
123
120
  params.log_annotations(
124
121
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
125
122
  )
126
- _log_loading(filetype, filepath=filepath)
123
+ self._log_loading(filetype, filepath=filepath)
127
124
 
128
125
  # Load the CSV file into a dictionary
129
- annotations_input = _load_matrix_file(
126
+ annotations_input = self._load_matrix_file(
130
127
  filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
131
128
  )
132
129
 
@@ -161,10 +158,10 @@ class AnnotationsIO:
161
158
  params.log_annotations(
162
159
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
163
160
  )
164
- _log_loading(filetype, filepath=filepath)
161
+ self._log_loading(filetype, filepath=filepath)
165
162
 
166
163
  # Load the TSV file into a dictionary
167
- annotations_input = _load_matrix_file(
164
+ annotations_input = self._load_matrix_file(
168
165
  filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
169
166
  )
170
167
 
@@ -183,6 +180,9 @@ class AnnotationsIO:
183
180
 
184
181
  Returns:
185
182
  Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
183
+
184
+ Raises:
185
+ TypeError: If the content is not a dictionary.
186
186
  """
187
187
  # Ensure the input content is a dictionary
188
188
  if not isinstance(content, dict):
@@ -193,48 +193,49 @@ class AnnotationsIO:
193
193
  filetype = "Dictionary"
194
194
  # Log the loading of the annotations from the dictionary
195
195
  params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
196
- _log_loading(filetype, "In-memory dictionary")
196
+ self._log_loading(filetype, "In-memory dictionary")
197
197
 
198
198
  # Load the annotations as a dictionary from the provided dictionary
199
199
  return load_annotations(network, content, min_nodes_per_term)
200
200
 
201
+ def _load_matrix_file(
202
+ self,
203
+ filepath: str,
204
+ label_colname: str,
205
+ nodes_colname: str,
206
+ delimiter: str = ",",
207
+ nodes_delimiter: str = ";",
208
+ ) -> Dict[str, Any]:
209
+ """Load annotations from a CSV or TSV file and convert them to a dictionary.
201
210
 
202
- def _load_matrix_file(
203
- filepath: str,
204
- label_colname: str,
205
- nodes_colname: str,
206
- delimiter: str = ",",
207
- nodes_delimiter: str = ";",
208
- ) -> Dict[str, Any]:
209
- """Load annotations from a CSV or TSV file and convert them to a dictionary.
210
-
211
- Args:
212
- filepath (str): Path to the annotation file.
213
- label_colname (str): Name of the column containing the labels (e.g., GO terms).
214
- nodes_colname (str): Name of the column containing the nodes associated with each label.
215
- delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
216
- nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
217
-
218
- Returns:
219
- Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
220
- """
221
- # Load the CSV or TSV file into a DataFrame
222
- annotation = pd.read_csv(filepath, delimiter=delimiter)
223
- # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
224
- annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
225
- # Create a dictionary pairing labels with their corresponding list of nodes
226
- label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
227
- return label_node_dict
211
+ Args:
212
+ filepath (str): Path to the annotation file.
213
+ label_colname (str): Name of the column containing the labels (e.g., GO terms).
214
+ nodes_colname (str): Name of the column containing the nodes associated with each label.
215
+ delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
216
+ nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
228
217
 
218
+ Returns:
219
+ Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
220
+ """
221
+ # Load the CSV or TSV file into a DataFrame
222
+ annotation = pd.read_csv(filepath, delimiter=delimiter)
223
+ # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
224
+ annotation[nodes_colname] = annotation[nodes_colname].apply(
225
+ lambda x: x.split(nodes_delimiter)
226
+ )
227
+ # Create a dictionary pairing labels with their corresponding list of nodes
228
+ label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
229
+ return label_node_dict
229
230
 
230
- def _log_loading(filetype: str, filepath: str = "") -> None:
231
- """Log information about the network file being loaded.
231
+ def _log_loading(self, filetype: str, filepath: str = "") -> None:
232
+ """Log information about the network file being loaded.
232
233
 
233
- Args:
234
- filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
235
- filepath (str, optional): The path to the file being loaded.
236
- """
237
- log_header("Loading annotations")
238
- logger.debug(f"Filetype: {filetype}")
239
- if filepath:
240
- logger.debug(f"Filepath: {filepath}")
234
+ Args:
235
+ filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
236
+ filepath (str, optional): The path to the file being loaded.
237
+ """
238
+ log_header("Loading annotations")
239
+ logger.debug(f"Filetype: {filetype}")
240
+ if filepath:
241
+ logger.debug(f"Filepath: {filepath}")
@@ -8,7 +8,8 @@ import zipfile
8
8
  from typing import List, Tuple
9
9
 
10
10
  import nltk
11
- from nltk.data import find, path as nltk_data_path
11
+ from nltk.data import find
12
+ from nltk.data import path as nltk_data_path
12
13
 
13
14
  from risk.log import logger
14
15
 
risk/log/__init__.py CHANGED
@@ -3,7 +3,7 @@ risk/log
3
3
  ~~~~~~~~
4
4
  """
5
5
 
6
- from risk.log.console import logger, log_header, set_global_verbosity
6
+ from risk.log.console import log_header, logger, set_global_verbosity
7
7
  from risk.log.parameters import Params
8
8
 
9
9
  # Initialize the global parameters logger
risk/log/parameters.py CHANGED
@@ -11,7 +11,7 @@ from typing import Any, Dict
11
11
 
12
12
  import numpy as np
13
13
 
14
- from risk.log.console import logger, log_header
14
+ from risk.log.console import log_header, logger
15
15
 
16
16
  # Suppress all warnings - this is to resolve warnings from multiprocessing
17
17
  warnings.filterwarnings("ignore")
@@ -137,7 +137,7 @@ class Params:
137
137
  Dict[str, Any]: A dictionary containing the processed parameters.
138
138
  """
139
139
  log_header("Loading parameters")
140
- return _convert_ndarray_to_list(
140
+ return self._convert_ndarray_to_list(
141
141
  {
142
142
  "annotations": self.annotations,
143
143
  "datetime": self.datetime,
@@ -148,25 +148,24 @@ class Params:
148
148
  }
149
149
  )
150
150
 
151
+ def _convert_ndarray_to_list(self, d: Dict[str, Any]) -> Dict[str, Any]:
152
+ """Recursively convert all np.ndarray values in the dictionary to lists.
151
153
 
152
- def _convert_ndarray_to_list(d: Dict[str, Any]) -> Dict[str, Any]:
153
- """Recursively convert all np.ndarray values in the dictionary to lists.
154
-
155
- Args:
156
- d (Dict[str, Any]): The dictionary to process.
154
+ Args:
155
+ d (Dict[str, Any]): The dictionary to process.
157
156
 
158
- Returns:
159
- Dict[str, Any]: The processed dictionary with np.ndarray values converted to lists.
160
- """
161
- if isinstance(d, dict):
162
- # Recursively process each value in the dictionary
163
- return {k: _convert_ndarray_to_list(v) for k, v in d.items()}
164
- if isinstance(d, list):
165
- # Recursively process each item in the list
166
- return [_convert_ndarray_to_list(v) for v in d]
167
- if isinstance(d, np.ndarray):
168
- # Convert numpy arrays to lists
169
- return d.tolist()
170
-
171
- # Return the value unchanged if it's not a dict, List, or ndarray
172
- return d
157
+ Returns:
158
+ Dict[str, Any]: The processed dictionary with np.ndarray values converted to lists.
159
+ """
160
+ if isinstance(d, dict):
161
+ # Recursively process each value in the dictionary
162
+ return {k: self._convert_ndarray_to_list(v) for k, v in d.items()}
163
+ if isinstance(d, list):
164
+ # Recursively process each item in the list
165
+ return [self._convert_ndarray_to_list(v) for v in d]
166
+ if isinstance(d, np.ndarray):
167
+ # Convert numpy arrays to lists
168
+ return d.tolist()
169
+
170
+ # Return the value unchanged if it's not a dict, List, or ndarray
171
+ return d
@@ -4,5 +4,4 @@ risk/neighborhoods
4
4
  """
5
5
 
6
6
  from risk.neighborhoods.domains import define_domains, trim_domains
7
- from risk.neighborhoods.api import NeighborhoodsAPI
8
7
  from risk.neighborhoods.neighborhoods import process_neighborhoods
risk/neighborhoods/api.py CHANGED
@@ -10,9 +10,9 @@ import networkx as nx
10
10
  import numpy as np
11
11
  from scipy.sparse import csr_matrix
12
12
 
13
- from risk.log import logger, log_header, params
13
+ from risk.log import log_header, logger, params
14
14
  from risk.neighborhoods.neighborhoods import get_network_neighborhoods
15
- from risk.stats import (
15
+ from risk.neighborhoods.stats import (
16
16
  compute_binom_test,
17
17
  compute_chi2_test,
18
18
  compute_hypergeom_test,
@@ -8,7 +8,7 @@ import igraph as ig
8
8
  import markov_clustering as mc
9
9
  import networkx as nx
10
10
  import numpy as np
11
- from leidenalg import find_partition, RBConfigurationVertexPartition
11
+ from leidenalg import RBConfigurationVertexPartition, find_partition
12
12
  from networkx.algorithms.community import greedy_modularity_communities
13
13
  from scipy.sparse import csr_matrix
14
14
 
@@ -27,6 +27,10 @@ def calculate_greedy_modularity_neighborhoods(
27
27
 
28
28
  Returns:
29
29
  csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
30
+
31
+ Raises:
32
+ ValueError: If the subgraph has no edges after filtering.
33
+ Warning: If the resulting subgraph has no edges after filtering.
30
34
  """
31
35
  # Create a subgraph with the shortest edges based on the rank fraction
32
36
  subnetwork = _create_percentile_limited_subgraph(
@@ -67,6 +71,10 @@ def calculate_label_propagation_neighborhoods(
67
71
 
68
72
  Returns:
69
73
  csr_matrix: A binary neighborhood matrix (CSR) on Label Propagation.
74
+
75
+ Raises:
76
+ ValueError: If the subgraph has no edges after filtering.
77
+ Warning: If the resulting subgraph has no edges after filtering.
70
78
  """
71
79
  # Create a subgraph with the shortest edges based on the rank fraction
72
80
  subnetwork = _create_percentile_limited_subgraph(
@@ -115,6 +123,10 @@ def calculate_leiden_neighborhoods(
115
123
 
116
124
  Returns:
117
125
  csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
126
+
127
+ Raises:
128
+ ValueError: If the subgraph has no edges after filtering.
129
+ Warning: If the resulting subgraph has no edges after filtering.
118
130
  """
119
131
  # Create a subgraph with the shortest edges based on the rank fraction
120
132
  subnetwork = _create_percentile_limited_subgraph(
@@ -167,6 +179,10 @@ def calculate_louvain_neighborhoods(
167
179
 
168
180
  Returns:
169
181
  csr_matrix: A binary neighborhood matrix in CSR format.
182
+
183
+ Raises:
184
+ ValueError: If the subgraph has no edges after filtering.
185
+ Warning: If the resulting subgraph has no edges after filtering.
170
186
  """
171
187
  # Create a subgraph with the shortest edges based on the rank fraction
172
188
  subnetwork = _create_percentile_limited_subgraph(
@@ -215,9 +231,10 @@ def calculate_markov_clustering_neighborhoods(
215
231
  Returns:
216
232
  csr_matrix: A binary neighborhood matrix (CSR) on Markov Clustering.
217
233
 
218
- Warning:
219
- This function temporarily converts the adjacency matrix to a dense format, which may lead to
220
- high memory consumption for large graphs.
234
+ Raises:
235
+ ValueError: If the subgraph has no edges after filtering.
236
+ RuntimeError: If MCL fails to run.
237
+ Warning: If the resulting subgraph has no edges after filtering.
221
238
  """
222
239
  # Create a subgraph with the shortest edges based on the rank fraction
223
240
  subnetwork = _create_percentile_limited_subgraph(
@@ -283,6 +300,10 @@ def calculate_spinglass_neighborhoods(
283
300
 
284
301
  Returns:
285
302
  csr_matrix: A binary neighborhood matrix (CSR) based on Spinglass communities.
303
+
304
+ Raises:
305
+ ValueError: If the subgraph has no edges after filtering.
306
+ Warning: If the resulting subgraph has no edges after filtering.
286
307
  """
287
308
  # Create a subgraph with the shortest edges based on the rank fraction
288
309
  subnetwork = _create_percentile_limited_subgraph(
@@ -343,6 +364,10 @@ def calculate_walktrap_neighborhoods(
343
364
 
344
365
  Returns:
345
366
  csr_matrix: A binary neighborhood matrix (CSR) on Walktrap communities.
367
+
368
+ Raises:
369
+ ValueError: If the subgraph has no edges after filtering.
370
+ Warning: If the resulting subgraph has no edges after filtering.
346
371
  """
347
372
  # Create a subgraph with the shortest edges based on the rank fraction
348
373
  subnetwork = _create_percentile_limited_subgraph(
@@ -384,6 +409,10 @@ def _create_percentile_limited_subgraph(G: nx.Graph, fraction_shortest_edges: fl
384
409
  Returns:
385
410
  nx.Graph: A subgraph with nodes and edges where the edges are within the shortest
386
411
  specified rank fraction.
412
+
413
+ Raises:
414
+ ValueError: If no edges with 'length' attributes are found in the graph.
415
+ Warning: If the resulting subgraph has no edges after filtering.
387
416
  """
388
417
  # Step 1: Extract edges with their lengths
389
418
  edges_with_length = [(u, v, d) for u, v, d in G.edges(data=True) if "length" in d]
@@ -9,19 +9,18 @@ from typing import Tuple, Union
9
9
  import numpy as np
10
10
  import pandas as pd
11
11
  from numpy.linalg import LinAlgError
12
- from scipy.cluster.hierarchy import linkage, fcluster
12
+ from scipy.cluster.hierarchy import fcluster, linkage
13
13
  from sklearn.metrics import silhouette_score
14
14
  from tqdm import tqdm
15
15
 
16
16
  from risk.annotations import get_weighted_description
17
17
  from risk.log import logger
18
18
 
19
-
20
19
  # Define constants for clustering
21
20
  # fmt: off
22
21
  LINKAGE_METHODS = {"single", "complete", "average", "weighted", "centroid", "median", "ward"}
23
22
  LINKAGE_METRICS = {
24
- "braycurtis","canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
23
+ "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
25
24
  "hamming", "jaccard", "jensenshannon", "kulczynski1", "mahalanobis", "matching", "minkowski",
26
25
  "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule",
27
26
  }
@@ -49,6 +48,9 @@ def define_domains(
49
48
 
50
49
  Returns:
51
50
  pd.DataFrame: DataFrame with the primary domain for each node.
51
+
52
+ Raises:
53
+ ValueError: If the clustering criterion is set to "off" or if an error occurs during clustering.
52
54
  """
53
55
  try:
54
56
  if linkage_criterion == "off":
@@ -242,7 +244,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
242
244
  # Evaluating optimal linkage method and metric
243
245
  for method, metric in tqdm(
244
246
  product(linkage_methods, linkage_metrics),
245
- desc="Evaluating optimal linkage method and metric",
247
+ desc="Evaluating linkage methods and metrics",
246
248
  total=total_combinations,
247
249
  bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
248
250
  ):
@@ -13,6 +13,7 @@ from scipy.sparse import csr_matrix
13
13
  from sklearn.exceptions import DataConversionWarning
14
14
  from sklearn.metrics.pairwise import cosine_similarity
15
15
 
16
+ from risk.log import logger
16
17
  from risk.neighborhoods.community import (
17
18
  calculate_greedy_modularity_neighborhoods,
18
19
  calculate_label_propagation_neighborhoods,
@@ -22,7 +23,6 @@ from risk.neighborhoods.community import (
22
23
  calculate_spinglass_neighborhoods,
23
24
  calculate_walktrap_neighborhoods,
24
25
  )
25
- from risk.log import logger
26
26
 
27
27
  # Suppress DataConversionWarning
28
28
  warnings.filterwarnings(action="ignore", category=DataConversionWarning)
@@ -48,6 +48,9 @@ def get_network_neighborhoods(
48
48
 
49
49
  Returns:
50
50
  csr_matrix: The combined neighborhood matrix.
51
+
52
+ Raises:
53
+ ValueError: If the number of distance metrics does not match the number of edge length thresholds.
51
54
  """
52
55
  # Set random seed for reproducibility
53
56
  random.seed(random_seed)
@@ -490,6 +493,9 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
490
493
 
491
494
  Returns:
492
495
  float: The calculated distance threshold value.
496
+
497
+ Raises:
498
+ ValueError: If no significant annotations are found in the median distances.
493
499
  """
494
500
  # Sort the median distances
495
501
  sorted_distances = np.sort(median_distances)
@@ -0,0 +1,13 @@
1
+ """
2
+ risk/neighborhoods/stats
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from risk.neighborhoods.stats.permutation import compute_permutation_test
7
+ from risk.neighborhoods.stats.tests import (
8
+ compute_binom_test,
9
+ compute_chi2_test,
10
+ compute_hypergeom_test,
11
+ compute_poisson_test,
12
+ compute_zscore_test,
13
+ )
@@ -0,0 +1,6 @@
1
+ """
2
+ risk/neighborhoods/stats/permutation
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from risk.neighborhoods.stats.permutation.permutation import compute_permutation_test
@@ -1,9 +1,9 @@
1
1
  """
2
- risk/stats/permutation/permutation
3
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
+ risk/neighborhoods/stats/permutation/permutation
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from multiprocessing import get_context, Manager
6
+ from multiprocessing import Manager, get_context
7
7
  from multiprocessing.managers import ValueProxy
8
8
  from typing import Any, Callable, Dict, List, Tuple, Union
9
9
 
@@ -12,7 +12,7 @@ from scipy.sparse import csr_matrix
12
12
  from threadpoolctl import threadpool_limits
13
13
  from tqdm import tqdm
14
14
 
15
- from risk.stats.permutation.test_functions import DISPATCH_TEST_FUNCTIONS
15
+ from risk.neighborhoods.stats.permutation.test_functions import DISPATCH_TEST_FUNCTIONS
16
16
 
17
17
 
18
18
  def compute_permutation_test(
@@ -88,6 +88,9 @@ def _run_permutation_test(
88
88
 
89
89
  Returns:
90
90
  tuple: Depletion and enrichment counts.
91
+
92
+ Raises:
93
+ ValueError: If an invalid null_distribution value is provided.
91
94
  """
92
95
  # Initialize the RNG for reproducibility
93
96
  rng = np.random.default_rng(seed=random_seed)
@@ -1,6 +1,6 @@
1
1
  """
2
- risk/stats/permutation/test_functions
3
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
+ risk/neighborhoods/stats/permutation/test_functions
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import numpy as np
@@ -1,17 +1,13 @@
1
1
  """
2
- risk/stats/stat_tests
3
- ~~~~~~~~~~~~~~~~~~~~~
2
+ risk/neighborhoods/stats/tests
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  from typing import Any, Dict
7
7
 
8
8
  import numpy as np
9
9
  from scipy.sparse import csr_matrix
10
- from scipy.stats import binom
11
- from scipy.stats import chi2
12
- from scipy.stats import hypergeom
13
- from scipy.stats import norm
14
- from scipy.stats import poisson
10
+ from scipy.stats import binom, chi2, hypergeom, norm, poisson
15
11
 
16
12
 
17
13
  def compute_binom_test(
@@ -28,6 +24,9 @@ def compute_binom_test(
28
24
 
29
25
  Returns:
30
26
  Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
27
+
28
+ Raises:
29
+ ValueError: If an invalid null_distribution value is provided.
31
30
  """
32
31
  # Get the total number of nodes in the network
33
32
  total_nodes = neighborhoods.shape[1]
@@ -70,6 +69,9 @@ def compute_chi2_test(
70
69
 
71
70
  Returns:
72
71
  Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
72
+
73
+ Raises:
74
+ ValueError: If an invalid null_distribution value is provided.
73
75
  """
74
76
  # Total number of nodes in the network
75
77
  total_node_count = neighborhoods.shape[0]
@@ -120,8 +122,7 @@ def compute_hypergeom_test(
120
122
  annotations: csr_matrix,
121
123
  null_distribution: str = "network",
122
124
  ) -> Dict[str, Any]:
123
- """
124
- Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
125
+ """Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
125
126
 
126
127
  Args:
127
128
  neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
@@ -130,6 +131,9 @@ def compute_hypergeom_test(
130
131
 
131
132
  Returns:
132
133
  Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
134
+
135
+ Raises:
136
+ ValueError: If an invalid null_distribution value is provided.
133
137
  """
134
138
  # Get the total number of nodes in the network
135
139
  total_nodes = neighborhoods.shape[1]
@@ -174,8 +178,7 @@ def compute_poisson_test(
174
178
  annotations: csr_matrix,
175
179
  null_distribution: str = "network",
176
180
  ) -> Dict[str, Any]:
177
- """
178
- Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
181
+ """Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
179
182
 
180
183
  Args:
181
184
  neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
@@ -184,6 +187,9 @@ def compute_poisson_test(
184
187
 
185
188
  Returns:
186
189
  Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
190
+
191
+ Raises:
192
+ ValueError: If an invalid null_distribution value is provided.
187
193
  """
188
194
  # Matrix multiplication to get the number of annotated nodes in each neighborhood
189
195
  annotated_in_neighborhood = neighborhoods @ annotations # Sparse result
@@ -214,8 +220,7 @@ def compute_zscore_test(
214
220
  annotations: csr_matrix,
215
221
  null_distribution: str = "network",
216
222
  ) -> Dict[str, Any]:
217
- """
218
- Compute z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
223
+ """Compute z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
219
224
 
220
225
  Args:
221
226
  neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
@@ -224,6 +229,9 @@ def compute_zscore_test(
224
229
 
225
230
  Returns:
226
231
  Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
232
+
233
+ Raises:
234
+ ValueError: If an invalid null_distribution value is provided.
227
235
  """
228
236
  # Total number of nodes in the network
229
237
  total_node_count = neighborhoods.shape[1]
risk/network/__init__.py CHANGED
@@ -2,5 +2,3 @@
2
2
  risk/network
3
3
  ~~~~~~~~~~~~
4
4
  """
5
-
6
- from risk.network.io import NetworkIO
@@ -2,5 +2,3 @@
2
2
  risk/network/graph
3
3
  ~~~~~~~~~~~~~~~~~~
4
4
  """
5
-
6
- from risk.network.graph.api import GraphAPI
risk/network/graph/api.py CHANGED
@@ -10,14 +10,14 @@ import networkx as nx
10
10
  import pandas as pd
11
11
 
12
12
  from risk.annotations import define_top_annotations
13
- from risk.log import logger, log_header, params
13
+ from risk.log import log_header, logger, params
14
14
  from risk.neighborhoods import (
15
15
  define_domains,
16
16
  process_neighborhoods,
17
17
  trim_domains,
18
18
  )
19
19
  from risk.network.graph.graph import Graph
20
- from risk.stats import calculate_significance_matrices
20
+ from risk.network.graph.stats import calculate_significance_matrices
21
21
 
22
22
 
23
23
  class GraphAPI: