risk-network 0.0.9b28__py3-none-any.whl → 0.0.9b29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.9-beta.28"
10
+ __version__ = "0.0.9-beta.29"
risk/stats/stat_tests.py CHANGED
@@ -3,6 +3,7 @@ risk/stats/stat_tests
3
3
  ~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
+ from concurrent.futures import ThreadPoolExecutor
6
7
  from typing import Any, Dict
7
8
 
8
9
  import numpy as np
@@ -120,56 +121,62 @@ def compute_hypergeom_test(
120
121
  annotations: csr_matrix,
121
122
  null_distribution: str = "network",
122
123
  ) -> Dict[str, Any]:
123
- """
124
- Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
124
+ """Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
125
125
 
126
126
  Args:
127
127
  neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
128
128
  annotations (csr_matrix): Sparse binary matrix representing annotations.
129
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
129
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations').
130
+ Defaults to "network".
130
131
 
131
132
  Returns:
132
133
  Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
133
134
  """
134
- # Total number of nodes
135
+ # Get the total number of nodes in the network
135
136
  total_nodes = neighborhoods.shape[1]
137
+ # Calculate neighborhood and annotation sums
138
+ neighborhood_sums = neighborhoods.sum(axis=0).A1 # A1 returns a 1D array
139
+ annotation_sums = annotations.sum(axis=0).A1
136
140
 
137
- # Compute sums directly using sparse operations
138
- neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()
139
- annotation_sums = annotations.sum(axis=0).A.flatten()
140
-
141
- if null_distribution == "network":
142
- background_population = total_nodes
143
- elif null_distribution == "annotations":
144
- # Boolean mask for nodes with annotations
145
- annotated_nodes = annotations.getnnz(axis=1) > 0
141
+ if null_distribution == "annotations":
142
+ annotated_nodes = annotations.getnnz(axis=1) > 0 # Nodes with any annotation
146
143
  background_population = annotated_nodes.sum()
147
- # Filter neighborhoods and annotations to include only annotated nodes
148
144
  neighborhoods = neighborhoods[annotated_nodes]
149
145
  annotations = annotations[annotated_nodes]
150
- neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()
151
- annotation_sums = annotations.sum(axis=0).A.flatten()
146
+ neighborhood_sums = neighborhoods.sum(axis=0).A1
147
+ annotation_sums = annotations.sum(axis=0).A1
148
+ elif null_distribution == "network":
149
+ background_population = total_nodes
152
150
  else:
153
- raise ValueError(
154
- "Invalid null_distribution value. Choose either 'network' or 'annotations'."
151
+ raise ValueError("Invalid null_distribution value. Choose 'network' or 'annotations'.")
152
+
153
+ # Sparse matrix multiplication for observed counts
154
+ annotated_in_neighborhood = neighborhoods.T @ annotations # Result is sparse
155
+
156
+ def compute_pvals_for_column(col_idx: int):
157
+ """Compute depletion and enrichment p-values for a single annotation column."""
158
+ observed_counts = annotated_in_neighborhood[:, col_idx].toarray().flatten()
159
+ ann_total = annotation_sums[col_idx]
160
+ # Compute depletion and enrichment p-values
161
+ depletion_pvals = hypergeom.cdf(
162
+ observed_counts, background_population, ann_total, neighborhood_sums
155
163
  )
156
-
157
- # Compute annotated nodes in each neighborhood
158
- annotated_in_neighborhood = neighborhoods.T @ annotations # Sparse multiplication
159
- # Convert to dense arrays for vectorized operations
160
- annotated_in_neighborhood = annotated_in_neighborhood.toarray()
161
- # Align shapes for broadcasting
162
- neighborhood_sums = neighborhood_sums[:, np.newaxis]
163
- annotation_sums = annotation_sums[np.newaxis, :]
164
- background_population = np.array([[background_population]])
165
-
166
- # Fully vectorized hypergeometric calculations
167
- depletion_pvals = hypergeom.cdf(
168
- annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
169
- )
170
- enrichment_pvals = hypergeom.sf(
171
- annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
172
- )
164
+ enrichment_pvals = hypergeom.sf(
165
+ observed_counts - 1, background_population, ann_total, neighborhood_sums
166
+ )
167
+ return col_idx, depletion_pvals, enrichment_pvals
168
+
169
+ # Use ThreadPoolExecutor to process columns in parallel
170
+ num_columns = annotations.shape[1]
171
+ depletion_pvals = np.empty((annotated_in_neighborhood.shape[0], num_columns), dtype=np.float64)
172
+ enrichment_pvals = np.empty((annotated_in_neighborhood.shape[0], num_columns), dtype=np.float64)
173
+ with ThreadPoolExecutor() as executor:
174
+ results = executor.map(compute_pvals_for_column, range(num_columns))
175
+
176
+ # Collect results
177
+ for col_idx, dep_pval, enr_pval in results:
178
+ depletion_pvals[:, col_idx] = dep_pval
179
+ enrichment_pvals[:, col_idx] = enr_pval
173
180
 
174
181
  return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
175
182
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: risk-network
3
- Version: 0.0.9b28
3
+ Version: 0.0.9b29
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -1,4 +1,4 @@
1
- risk/__init__.py,sha256=32Lq_wPcVY8stW7c0jkvgihM15jnYka5Hnw8M9gbjN0,127
1
+ risk/__init__.py,sha256=zxCDSH8dgk6s0jbXYvLcOxAb7IGjNoZoOCoCwQ1Vd7g,127
2
2
  risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
3
3
  risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
4
4
  risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
@@ -30,12 +30,12 @@ risk/network/plotter/utils/colors.py,sha256=VU1sLPRC99ll6EGK4vRNgLMUXU8lja1vjiXU
30
30
  risk/network/plotter/utils/layout.py,sha256=OPqV8jzV9dpnOhYU4SYMSfsIXalVzESrlBSI_Y43OGU,3640
31
31
  risk/stats/__init__.py,sha256=2zdLv3tUHKyAjwAo7LprVXRaak1cHgrpYMVMSik6JM4,324
32
32
  risk/stats/significance.py,sha256=6cKv2xBQXWTHZ6HpNWIqlNfKKS5pG_BcCUdMM3r_zw4,7336
33
- risk/stats/stat_tests.py,sha256=qYn85VrNJeIlEptkEUoYsPco4BQ604CLJxXczgekXgc,11986
33
+ risk/stats/stat_tests.py,sha256=kZu48JPDneMwAZGVci2i_oaJUPLUKwiYTT3c4pKhr-0,12490
34
34
  risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
35
35
  risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
36
36
  risk/stats/permutation/test_functions.py,sha256=D3XMPM8CasUNytWSRce22TI6KK6XulYn5uGG4lWxaHs,3120
37
- risk_network-0.0.9b28.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
38
- risk_network-0.0.9b28.dist-info/METADATA,sha256=v9uuUQ9EwyI5WzIirw_ONry2KPaiHqTiw2TDWX60Y6c,47627
39
- risk_network-0.0.9b28.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
40
- risk_network-0.0.9b28.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
41
- risk_network-0.0.9b28.dist-info/RECORD,,
37
+ risk_network-0.0.9b29.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
38
+ risk_network-0.0.9b29.dist-info/METADATA,sha256=K9e3VRGdXrMUzmhtpX0PLQBFCdakxyBgKbW88xpIVYw,47627
39
+ risk_network-0.0.9b29.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
40
+ risk_network-0.0.9b29.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
41
+ risk_network-0.0.9b29.dist-info/RECORD,,