risk-network 0.0.8b26__py3-none-any.whl → 0.0.9b26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. risk/__init__.py +2 -2
  2. risk/annotations/__init__.py +2 -2
  3. risk/annotations/annotations.py +74 -47
  4. risk/annotations/io.py +47 -31
  5. risk/log/__init__.py +4 -2
  6. risk/log/{config.py → console.py} +5 -3
  7. risk/log/{params.py → parameters.py} +17 -42
  8. risk/neighborhoods/__init__.py +3 -5
  9. risk/neighborhoods/api.py +446 -0
  10. risk/neighborhoods/community.py +255 -77
  11. risk/neighborhoods/domains.py +62 -31
  12. risk/neighborhoods/neighborhoods.py +156 -160
  13. risk/network/__init__.py +1 -3
  14. risk/network/geometry.py +65 -57
  15. risk/network/graph/__init__.py +6 -0
  16. risk/network/graph/api.py +194 -0
  17. risk/network/{graph.py → graph/network.py} +87 -37
  18. risk/network/graph/summary.py +254 -0
  19. risk/network/io.py +56 -47
  20. risk/network/plotter/__init__.py +6 -0
  21. risk/network/plotter/api.py +54 -0
  22. risk/network/{plot → plotter}/canvas.py +7 -4
  23. risk/network/{plot → plotter}/contour.py +22 -19
  24. risk/network/{plot → plotter}/labels.py +69 -74
  25. risk/network/{plot → plotter}/network.py +170 -34
  26. risk/network/{plot/utils/color.py → plotter/utils/colors.py} +104 -112
  27. risk/network/{plot → plotter}/utils/layout.py +8 -5
  28. risk/risk.py +11 -500
  29. risk/stats/__init__.py +8 -4
  30. risk/stats/binom.py +51 -0
  31. risk/stats/chi2.py +69 -0
  32. risk/stats/hypergeom.py +27 -17
  33. risk/stats/permutation/__init__.py +1 -1
  34. risk/stats/permutation/permutation.py +44 -38
  35. risk/stats/permutation/test_functions.py +25 -17
  36. risk/stats/poisson.py +15 -9
  37. risk/stats/stats.py +15 -13
  38. risk/stats/zscore.py +68 -0
  39. {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
  40. risk_network-0.0.9b26.dist-info/RECORD +44 -0
  41. {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
  42. risk/network/plot/__init__.py +0 -6
  43. risk/network/plot/plotter.py +0 -137
  44. risk_network-0.0.8b26.dist-info/RECORD +0 -37
  45. {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
  46. {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0
risk/stats/hypergeom.py CHANGED
@@ -6,44 +6,54 @@ risk/stats/hypergeom
6
6
  from typing import Any, Dict
7
7
 
8
8
  import numpy as np
9
+ from scipy.sparse import csr_matrix
9
10
  from scipy.stats import hypergeom
10
11
 
11
12
 
12
13
  def compute_hypergeom_test(
13
- neighborhoods: np.ndarray, annotations: np.ndarray, null_distribution: str = "network"
14
+ neighborhoods: csr_matrix,
15
+ annotations: csr_matrix,
16
+ null_distribution: str = "network",
14
17
  ) -> Dict[str, Any]:
15
- """Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
18
+ """
19
+ Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
16
20
 
17
21
  Args:
18
- neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
19
- annotations (np.ndarray): Binary matrix representing annotations.
22
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
23
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
20
24
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
21
25
 
22
26
  Returns:
23
27
  Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
24
28
  """
25
29
  # Get the total number of nodes in the network
26
- total_node_count = neighborhoods.shape[0]
30
+ total_nodes = neighborhoods.shape[1]
31
+
32
+ # Compute sums
33
+ neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Convert to dense array
34
+ annotation_sums = annotations.sum(axis=0).A.flatten() # Convert to dense array
27
35
 
28
36
  if null_distribution == "network":
29
- # Case 1: Use all nodes as the background
30
- background_population = total_node_count
31
- neighborhood_sums = np.sum(neighborhoods, axis=0, keepdims=True).T
32
- annotation_sums = np.sum(annotations, axis=0, keepdims=True)
37
+ background_population = total_nodes
33
38
  elif null_distribution == "annotations":
34
- # Case 2: Only consider nodes with at least one annotation
35
- annotated_nodes = np.sum(annotations, axis=1) > 0
36
- background_population = np.sum(annotated_nodes)
37
- neighborhood_sums = np.sum(neighborhoods[annotated_nodes], axis=0, keepdims=True).T
38
- annotation_sums = np.sum(annotations[annotated_nodes], axis=0, keepdims=True)
39
+ annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Boolean mask
40
+ background_population = annotated_nodes.sum()
41
+ neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
42
+ annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
39
43
  else:
40
44
  raise ValueError(
41
45
  "Invalid null_distribution value. Choose either 'network' or 'annotations'."
42
46
  )
43
47
 
44
- # Matrix multiplication for annotated nodes in each neighborhood
45
- annotated_in_neighborhood = neighborhoods.T @ annotations
46
- # Calculate depletion and enrichment p-values using the hypergeometric distribution
48
+ # Observed counts
49
+ annotated_in_neighborhood = neighborhoods.T @ annotations # Sparse result
50
+ annotated_in_neighborhood = annotated_in_neighborhood.toarray() # Convert to dense
51
+ # Align shapes for broadcasting
52
+ neighborhood_sums = neighborhood_sums.reshape(-1, 1)
53
+ annotation_sums = annotation_sums.reshape(1, -1)
54
+ background_population = np.array(background_population).reshape(1, 1)
55
+
56
+ # Compute hypergeometric p-values
47
57
  depletion_pvals = hypergeom.cdf(
48
58
  annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
49
59
  )
@@ -3,4 +3,4 @@ risk/stats/permutation
3
3
  ~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from .permutation import compute_permutation_test
6
+ from risk.stats.permutation.permutation import compute_permutation_test
@@ -5,18 +5,19 @@ risk/stats/permutation/permutation
5
5
 
6
6
  from multiprocessing import get_context, Manager
7
7
  from multiprocessing.managers import ValueProxy
8
- from tqdm import tqdm
9
- from typing import Any, Callable, Dict
8
+ from typing import Any, Callable, Dict, List, Tuple, Union
10
9
 
11
10
  import numpy as np
11
+ from scipy.sparse import csr_matrix
12
12
  from threadpoolctl import threadpool_limits
13
+ from tqdm import tqdm
13
14
 
14
15
  from risk.stats.permutation.test_functions import DISPATCH_TEST_FUNCTIONS
15
16
 
16
17
 
17
18
  def compute_permutation_test(
18
- neighborhoods: np.ndarray,
19
- annotations: np.ndarray,
19
+ neighborhoods: csr_matrix,
20
+ annotations: csr_matrix,
20
21
  score_metric: str = "sum",
21
22
  null_distribution: str = "network",
22
23
  num_permutations: int = 1000,
@@ -26,9 +27,9 @@ def compute_permutation_test(
26
27
  """Compute permutation test for enrichment and depletion in neighborhoods.
27
28
 
28
29
  Args:
29
- neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
30
- annotations (np.ndarray): Binary matrix representing annotations.
31
- score_metric (str, optional): Metric to use for scoring ('sum', 'mean', etc.). Defaults to "sum".
30
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
31
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
32
+ score_metric (str, optional): Metric to use for scoring ('sum' or 'stdev'). Defaults to "sum".
32
33
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
33
34
  num_permutations (int, optional): Number of permutations to run. Defaults to 1000.
34
35
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
@@ -38,6 +39,7 @@ def compute_permutation_test(
38
39
  Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
39
40
  """
40
41
  # Ensure that the matrices are in the correct format and free of NaN values
42
+ # NOTE: Keep the data type as float32 to avoid locking issues with dot product operations
41
43
  neighborhoods = neighborhoods.astype(np.float32)
42
44
  annotations = annotations.astype(np.float32)
43
45
  # Retrieve the appropriate neighborhood score function based on the metric
@@ -65,19 +67,19 @@ def compute_permutation_test(
65
67
 
66
68
 
67
69
  def _run_permutation_test(
68
- neighborhoods: np.ndarray,
69
- annotations: np.ndarray,
70
+ neighborhoods: csr_matrix,
71
+ annotations: csr_matrix,
70
72
  neighborhood_score_func: Callable,
71
73
  null_distribution: str = "network",
72
74
  num_permutations: int = 1000,
73
75
  random_seed: int = 888,
74
76
  max_workers: int = 4,
75
77
  ) -> tuple:
76
- """Run a permutation test to calculate enrichment and depletion counts.
78
+ """Run the permutation test to calculate depletion and enrichment counts.
77
79
 
78
80
  Args:
79
- neighborhoods (np.ndarray): The neighborhood matrix.
80
- annotations (np.ndarray): The annotation matrix.
81
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
82
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
81
83
  neighborhood_score_func (Callable): Function to calculate neighborhood scores.
82
84
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
83
85
  num_permutations (int, optional): Number of permutations. Defaults to 1000.
@@ -93,14 +95,14 @@ def _run_permutation_test(
93
95
  if null_distribution == "network":
94
96
  idxs = range(annotations.shape[0])
95
97
  elif null_distribution == "annotations":
96
- idxs = np.nonzero(np.sum(~np.isnan(annotations), axis=1))[0]
98
+ idxs = np.nonzero(annotations.getnnz(axis=1) > 0)[0]
97
99
  else:
98
100
  raise ValueError(
99
101
  "Invalid null_distribution value. Choose either 'network' or 'annotations'."
100
102
  )
101
103
 
102
- # Replace NaNs with zeros in the annotations matrix
103
- annotations[np.isnan(annotations)] = 0
104
+ # Replace NaNs with zeros in the sparse annotations matrix
105
+ annotations.data[np.isnan(annotations.data)] = 0
104
106
  annotation_matrix_obsv = annotations[idxs]
105
107
  neighborhoods_matrix_obsv = neighborhoods.T[idxs].T
106
108
  # Calculate observed neighborhood scores
@@ -121,28 +123,35 @@ def _run_permutation_test(
121
123
  manager = Manager()
122
124
  progress_counter = manager.Value("i", 0)
123
125
  total_progress = num_permutations
126
+
127
+ # Generate precomputed permutations
128
+ permutations = [rng.permutation(idxs) for _ in range(num_permutations)]
129
+ # Divide permutations into batches for workers
130
+ batch_size = subset_size + (1 if remainder > 0 else 0)
131
+ permutation_batches = [
132
+ permutations[i * batch_size : (i + 1) * batch_size] for i in range(max_workers)
133
+ ]
134
+
124
135
  # Execute the permutation test using multiprocessing
125
136
  with ctx.Pool(max_workers) as pool:
126
137
  with tqdm(total=total_progress, desc="Total progress", position=0) as progress:
127
138
  # Prepare parameters for multiprocessing
128
139
  params_list = [
129
140
  (
141
+ permutation_batches[i], # Pass the batch of precomputed permutations
130
142
  annotations,
131
- np.array(idxs),
132
143
  neighborhoods_matrix_obsv,
133
144
  observed_neighborhood_scores,
134
145
  neighborhood_score_func,
135
- subset_size + (1 if i < remainder else 0),
136
146
  num_permutations,
137
147
  progress_counter,
138
148
  max_workers,
139
- rng, # Pass the random number generator to each worker
140
149
  )
141
150
  for i in range(max_workers)
142
151
  ]
143
152
 
144
153
  # Start the permutation process in parallel
145
- results = pool.starmap_async(_permutation_process_subset, params_list, chunksize=1)
154
+ results = pool.starmap_async(_permutation_process_batch, params_list, chunksize=1)
146
155
 
147
156
  # Update progress bar based on progress_counter
148
157
  while not results.ready():
@@ -159,31 +168,27 @@ def _run_permutation_test(
159
168
  return counts_depletion, counts_enrichment
160
169
 
161
170
 
162
- def _permutation_process_subset(
163
- annotation_matrix: np.ndarray,
164
- idxs: np.ndarray,
165
- neighborhoods_matrix_obsv: np.ndarray,
171
+ def _permutation_process_batch(
172
+ permutations: Union[List, Tuple, np.ndarray],
173
+ annotation_matrix: csr_matrix,
174
+ neighborhoods_matrix_obsv: csr_matrix,
166
175
  observed_neighborhood_scores: np.ndarray,
167
176
  neighborhood_score_func: Callable,
168
- subset_size: int,
169
177
  num_permutations: int,
170
178
  progress_counter: ValueProxy,
171
179
  max_workers: int,
172
- rng: np.random.Generator,
173
180
  ) -> tuple:
174
- """Process a subset of permutations for the permutation test.
181
+ """Process a batch of permutations in a worker process.
175
182
 
176
183
  Args:
177
- annotation_matrix (np.ndarray): The annotation matrix.
178
- idxs (np.ndarray): Indices of valid rows in the matrix.
179
- neighborhoods_matrix_obsv (np.ndarray): Observed neighborhoods matrix.
184
+ permutations (Union[List, Tuple, np.ndarray]): Permutation batch to process.
185
+ annotation_matrix (csr_matrix): Sparse binary matrix representing annotations.
186
+ neighborhoods_matrix_obsv (csr_matrix): Sparse binary matrix representing observed neighborhoods.
180
187
  observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
181
188
  neighborhood_score_func (Callable): Function to calculate neighborhood scores.
182
- subset_size (int): Number of permutations to run in this subset.
183
189
  num_permutations (int): Number of total permutations across all subsets.
184
190
  progress_counter (multiprocessing.managers.ValueProxy): Shared counter for tracking progress.
185
191
  max_workers (int): Number of workers for multiprocessing.
186
- rng (np.random.Generator): Random number generator object.
187
192
 
188
193
  Returns:
189
194
  tuple: Local counts of depletion and enrichment.
@@ -192,7 +197,9 @@ def _permutation_process_subset(
192
197
  local_counts_depletion = np.zeros(observed_neighborhood_scores.shape)
193
198
  local_counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
194
199
 
195
- # NOTE: Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used.
200
+ # Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used
201
+ # NOTE: This does not work for Mac M chips due to a bug in the threadpoolctl package
202
+ # This is currently a known issue and is being addressed by the maintainers [https://github.com/joblib/threadpoolctl/issues/135]
196
203
  limits = None if max_workers == 1 else 1
197
204
  with threadpool_limits(limits=limits, user_api="blas"):
198
205
  # Initialize a local counter for batched progress updates
@@ -200,16 +207,16 @@ def _permutation_process_subset(
200
207
  # Calculate the modulo value based on total permutations for 1/100th frequency updates
201
208
  modulo_value = max(1, num_permutations // 100)
202
209
 
203
- for _ in range(subset_size):
204
- # Permute the annotation matrix using the RNG
205
- annotation_matrix_permut = annotation_matrix[rng.permutation(idxs)]
210
+ for permuted_idxs in permutations:
211
+ # Apply precomputed permutation
212
+ annotation_matrix_permut = annotation_matrix[permuted_idxs]
206
213
  # Calculate permuted neighborhood scores
207
214
  with np.errstate(invalid="ignore", divide="ignore"):
208
215
  permuted_neighborhood_scores = neighborhood_score_func(
209
216
  neighborhoods_matrix_obsv, annotation_matrix_permut
210
217
  )
211
218
 
212
- # Update local depletion and enrichment counts based on permuted scores
219
+ # Update local depletion and enrichment counts
213
220
  local_counts_depletion = np.add(
214
221
  local_counts_depletion, permuted_neighborhood_scores <= observed_neighborhood_scores
215
222
  )
@@ -218,9 +225,8 @@ def _permutation_process_subset(
218
225
  permuted_neighborhood_scores >= observed_neighborhood_scores,
219
226
  )
220
227
 
221
- # Update local progress counter
228
+ # Update progress
222
229
  local_progress += 1
223
- # Update shared progress counter every 1/100th of total permutations
224
230
  if local_progress % modulo_value == 0:
225
231
  progress_counter.value += modulo_value
226
232
 
@@ -4,6 +4,7 @@ risk/stats/permutation/test_functions
4
4
  """
5
5
 
6
6
  import numpy as np
7
+ from scipy.sparse import csr_matrix
7
8
 
8
9
  # Note: Cython optimizations provided minimal performance benefits.
9
10
  # The final version with Cython is archived in the `cython_permutation` branch.
@@ -11,46 +12,53 @@ import numpy as np
11
12
 
12
13
 
13
14
  def compute_neighborhood_score_by_sum(
14
- neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
15
+ neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
15
16
  ) -> np.ndarray:
16
- """Compute the sum of attribute values for each neighborhood.
17
+ """Compute the sum of attribute values for each neighborhood using sparse matrices.
17
18
 
18
19
  Args:
19
- neighborhoods_matrix (np.ndarray): Binary matrix representing neighborhoods.
20
- annotation_matrix (np.ndarray): Matrix representing annotation values.
20
+ neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
21
+ annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
21
22
 
22
23
  Returns:
23
- np.ndarray: Sum of attribute values for each neighborhood.
24
+ np.ndarray: Dense array of summed attribute values for each neighborhood.
24
25
  """
25
26
  # Calculate the neighborhood score as the dot product of neighborhoods and annotations
26
- neighborhood_sum = np.dot(neighborhoods_matrix, annotation_matrix)
27
- return neighborhood_sum
27
+ neighborhood_score = neighborhoods_matrix @ annotation_matrix # Sparse matrix multiplication
28
+ # Convert the result to a dense array for downstream calculations
29
+ neighborhood_score_dense = neighborhood_score.toarray()
30
+ return neighborhood_score_dense
28
31
 
29
32
 
30
33
  def compute_neighborhood_score_by_stdev(
31
- neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
34
+ neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
32
35
  ) -> np.ndarray:
33
- """Compute the standard deviation of neighborhood scores.
36
+ """Compute the standard deviation of neighborhood scores for sparse matrices.
34
37
 
35
38
  Args:
36
- neighborhoods_matrix (np.ndarray): Binary matrix representing neighborhoods.
37
- annotation_matrix (np.ndarray): Matrix representing annotation values.
39
+ neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
40
+ annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
38
41
 
39
42
  Returns:
40
43
  np.ndarray: Standard deviation of the neighborhood scores.
41
44
  """
42
45
  # Calculate the neighborhood score as the dot product of neighborhoods and annotations
43
- neighborhood_score = np.dot(neighborhoods_matrix, annotation_matrix)
44
- # Calculate the number of elements in each neighborhood
45
- N = np.sum(neighborhoods_matrix, axis=1)
46
+ neighborhood_score = neighborhoods_matrix @ annotation_matrix # Sparse matrix multiplication
47
+ # Calculate the number of elements in each neighborhood (sum of rows)
48
+ N = neighborhoods_matrix.sum(axis=1).A.flatten() # Convert to 1D array
49
+ # Avoid division by zero by replacing zeros in N with np.nan temporarily
50
+ N[N == 0] = np.nan
46
51
  # Compute the mean of the neighborhood scores
47
- M = neighborhood_score / N[:, None]
52
+ M = neighborhood_score.multiply(1 / N[:, None]).toarray() # Sparse element-wise division
48
53
  # Compute the mean of squares (EXX) directly using squared annotation matrix
49
- EXX = np.dot(neighborhoods_matrix, annotation_matrix**2) / N[:, None]
54
+ annotation_squared = annotation_matrix.multiply(annotation_matrix) # Element-wise squaring
55
+ EXX = (neighborhoods_matrix @ annotation_squared).multiply(1 / N[:, None]).toarray()
50
56
  # Calculate variance as EXX - M^2
51
- variance = EXX - M**2
57
+ variance = EXX - np.power(M, 2)
52
58
  # Compute the standard deviation as the square root of the variance
53
59
  neighborhood_stdev = np.sqrt(variance)
60
+ # Replace np.nan back with zeros in case N was 0 (no elements in the neighborhood)
61
+ neighborhood_stdev[np.isnan(neighborhood_stdev)] = 0
54
62
  return neighborhood_stdev
55
63
 
56
64
 
risk/stats/poisson.py CHANGED
@@ -6,39 +6,45 @@ risk/stats/poisson
6
6
  from typing import Any, Dict
7
7
 
8
8
  import numpy as np
9
+ from scipy.sparse import csr_matrix
9
10
  from scipy.stats import poisson
10
11
 
11
12
 
12
13
  def compute_poisson_test(
13
- neighborhoods: np.ndarray, annotations: np.ndarray, null_distribution: str = "network"
14
+ neighborhoods: csr_matrix,
15
+ annotations: csr_matrix,
16
+ null_distribution: str = "network",
14
17
  ) -> Dict[str, Any]:
15
- """Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
18
+ """
19
+ Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
16
20
 
17
21
  Args:
18
- neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
19
- annotations (np.ndarray): Binary matrix representing annotations.
22
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
23
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
20
24
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
21
25
 
22
26
  Returns:
23
27
  Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
24
28
  """
25
29
  # Matrix multiplication to get the number of annotated nodes in each neighborhood
26
- annotated_in_neighborhood = neighborhoods @ annotations
30
+ annotated_in_neighborhood = neighborhoods @ annotations # Sparse result
31
+ # Convert annotated counts to dense for downstream calculations
32
+ annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
27
33
 
28
34
  # Compute lambda_expected based on the chosen null distribution
29
35
  if null_distribution == "network":
30
36
  # Use the mean across neighborhoods (axis=1)
31
- lambda_expected = np.mean(annotated_in_neighborhood, axis=1, keepdims=True)
37
+ lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
32
38
  elif null_distribution == "annotations":
33
39
  # Use the mean across annotations (axis=0)
34
- lambda_expected = np.mean(annotated_in_neighborhood, axis=0, keepdims=True)
40
+ lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
35
41
  else:
36
42
  raise ValueError(
37
43
  "Invalid null_distribution value. Choose either 'network' or 'annotations'."
38
44
  )
39
45
 
40
46
  # Compute p-values for enrichment and depletion using Poisson distribution
41
- enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood - 1, lambda_expected)
42
- depletion_pvals = poisson.cdf(annotated_in_neighborhood, lambda_expected)
47
+ enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
48
+ depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
43
49
 
44
50
  return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
risk/stats/stats.py CHANGED
@@ -44,7 +44,7 @@ def calculate_significance_matrices(
44
44
  enrichment_pvals, enrichment_qvals, pval_cutoff=pval_cutoff, fdr_cutoff=fdr_cutoff
45
45
  )
46
46
  # Compute the enrichment matrix using both q-values and p-values
47
- enrichment_matrix = (enrichment_qvals**2) * (enrichment_pvals**0.5)
47
+ enrichment_matrix = (enrichment_pvals**0.5) * (enrichment_qvals**2)
48
48
  else:
49
49
  # Compute threshold matrices based on p-value cutoffs only
50
50
  depletion_alpha_threshold_matrix = _compute_threshold_matrix(
@@ -62,7 +62,7 @@ def calculate_significance_matrices(
62
62
  log_enrichment_matrix = -np.log10(enrichment_matrix)
63
63
 
64
64
  # Select the appropriate significance matrices based on the specified tail
65
- enrichment_matrix, significant_binary_enrichment_matrix = _select_significance_matrices(
65
+ significance_matrix, significant_binary_significance_matrix = _select_significance_matrices(
66
66
  tail,
67
67
  log_depletion_matrix,
68
68
  depletion_alpha_threshold_matrix,
@@ -71,14 +71,14 @@ def calculate_significance_matrices(
71
71
  )
72
72
 
73
73
  # Filter the enrichment matrix using the binary significance matrix
74
- significant_enrichment_matrix = np.where(
75
- significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
74
+ significant_significance_matrix = np.where(
75
+ significant_binary_significance_matrix == 1, significance_matrix, 0
76
76
  )
77
77
 
78
78
  return {
79
- "enrichment_matrix": enrichment_matrix,
80
- "significant_binary_enrichment_matrix": significant_binary_enrichment_matrix,
81
- "significant_enrichment_matrix": significant_enrichment_matrix,
79
+ "significance_matrix": significance_matrix,
80
+ "significant_significance_matrix": significant_significance_matrix,
81
+ "significant_binary_significance_matrix": significant_binary_significance_matrix,
82
82
  }
83
83
 
84
84
 
@@ -109,15 +109,15 @@ def _select_significance_matrices(
109
109
 
110
110
  if tail == "left":
111
111
  # Select depletion matrix and corresponding alpha threshold for left-tail analysis
112
- enrichment_matrix = -log_depletion_matrix
112
+ significance_matrix = -log_depletion_matrix
113
113
  alpha_threshold_matrix = depletion_alpha_threshold_matrix
114
114
  elif tail == "right":
115
115
  # Select enrichment matrix and corresponding alpha threshold for right-tail analysis
116
- enrichment_matrix = log_enrichment_matrix
116
+ significance_matrix = log_enrichment_matrix
117
117
  alpha_threshold_matrix = enrichment_alpha_threshold_matrix
118
118
  elif tail == "both":
119
119
  # Select the matrix with the highest absolute values while preserving the sign
120
- enrichment_matrix = np.where(
120
+ significance_matrix = np.where(
121
121
  np.abs(log_depletion_matrix) >= np.abs(log_enrichment_matrix),
122
122
  -log_depletion_matrix,
123
123
  log_enrichment_matrix,
@@ -126,13 +126,15 @@ def _select_significance_matrices(
126
126
  alpha_threshold_matrix = np.logical_or(
127
127
  depletion_alpha_threshold_matrix, enrichment_alpha_threshold_matrix
128
128
  )
129
+ else:
130
+ raise ValueError("Invalid value for 'tail'. Must be 'left', 'right', or 'both'.")
129
131
 
130
132
  # Create a binary significance matrix where valid indices meet the alpha threshold
131
133
  valid_idxs = ~np.isnan(alpha_threshold_matrix)
132
- significant_binary_enrichment_matrix = np.zeros(alpha_threshold_matrix.shape)
133
- significant_binary_enrichment_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
134
+ significant_binary_significance_matrix = np.zeros(alpha_threshold_matrix.shape)
135
+ significant_binary_significance_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
134
136
 
135
- return enrichment_matrix, significant_binary_enrichment_matrix
137
+ return significance_matrix, significant_binary_significance_matrix
136
138
 
137
139
 
138
140
  def _compute_threshold_matrix(
risk/stats/zscore.py ADDED
@@ -0,0 +1,68 @@
1
+ """
2
+ risk/stats/zscore
3
+ ~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from typing import Any, Dict
7
+
8
+ import numpy as np
9
+ from scipy.sparse import csr_matrix
10
+ from scipy.stats import norm
11
+
12
+
13
+ def compute_zscore_test(
14
+ neighborhoods: csr_matrix,
15
+ annotations: csr_matrix,
16
+ null_distribution: str = "network",
17
+ ) -> Dict[str, Any]:
18
+ """
19
+ Compute Z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
20
+
21
+ Args:
22
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
23
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
24
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
25
+
26
+ Returns:
27
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
28
+ """
29
+ # Total number of nodes in the network
30
+ total_node_count = neighborhoods.shape[1]
31
+
32
+ # Compute sums
33
+ if null_distribution == "network":
34
+ background_population = total_node_count
35
+ neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
36
+ annotation_sums = annotations.sum(axis=0).A.flatten() # Dense row sums
37
+ elif null_distribution == "annotations":
38
+ annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Dense boolean mask
39
+ background_population = annotated_nodes.sum()
40
+ neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
41
+ annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
42
+ else:
43
+ raise ValueError(
44
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
45
+ )
46
+
47
+ # Observed values
48
+ observed = (neighborhoods.T @ annotations).toarray() # Convert sparse result to dense
49
+ # Expected values under the null
50
+ neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
51
+ annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
52
+ expected = (neighborhood_sums @ annotation_sums) / background_population
53
+
54
+ # Standard deviation under the null
55
+ std_dev = np.sqrt(
56
+ expected
57
+ * (1 - annotation_sums / background_population)
58
+ * (1 - neighborhood_sums / background_population)
59
+ )
60
+ std_dev[std_dev == 0] = np.nan # Avoid division by zero
61
+ # Compute Z-scores
62
+ z_scores = (observed - expected) / std_dev
63
+
64
+ # Convert Z-scores to depletion and enrichment p-values
65
+ enrichment_pvals = norm.sf(z_scores) # Upper tail
66
+ depletion_pvals = norm.cdf(z_scores) # Lower tail
67
+
68
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
@@ -1,10 +1,10 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: risk-network
3
- Version: 0.0.8b26
3
+ Version: 0.0.9b26
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
7
- License: GNU GENERAL PUBLIC LICENSE
7
+ License: GNU GENERAL PUBLIC LICENSE
8
8
  Version 3, 29 June 2007
9
9
 
10
10
  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
@@ -695,19 +695,23 @@ Requires-Python: >=3.8
695
695
  Description-Content-Type: text/markdown
696
696
  License-File: LICENSE
697
697
  Requires-Dist: ipywidgets
698
- Requires-Dist: markov-clustering
698
+ Requires-Dist: leidenalg
699
+ Requires-Dist: markov_clustering
699
700
  Requires-Dist: matplotlib
700
701
  Requires-Dist: networkx
701
702
  Requires-Dist: nltk==3.8.1
702
703
  Requires-Dist: numpy
703
704
  Requires-Dist: openpyxl
704
705
  Requires-Dist: pandas
706
+ Requires-Dist: python-igraph
705
707
  Requires-Dist: python-louvain
706
708
  Requires-Dist: scikit-learn
707
709
  Requires-Dist: scipy
708
710
  Requires-Dist: statsmodels
709
711
  Requires-Dist: threadpoolctl
710
712
  Requires-Dist: tqdm
713
+ Dynamic: author
714
+ Dynamic: requires-python
711
715
 
712
716
  # RISK Network
713
717
 
@@ -724,7 +728,7 @@ Requires-Dist: tqdm
724
728
  ![Downloads](https://img.shields.io/pypi/dm/risk-network)
725
729
  ![Platforms](https://img.shields.io/badge/platform-linux%20%7C%20macos%20%7C%20windows-lightgrey)
726
730
 
727
- **RISK (RISK Infers Spatial Kinships)** is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
731
+ **RISK** (Regional Inference of Significant Kinships) is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
728
732
 
729
733
  ## Documentation and Tutorial
730
734