risk-network 0.0.3b0__cp38-cp38-musllinux_1_2_i686.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +13 -0
- risk/annotations/__init__.py +7 -0
- risk/annotations/annotations.py +259 -0
- risk/annotations/io.py +170 -0
- risk/constants.py +31 -0
- risk/log/__init__.py +9 -0
- risk/log/console.py +16 -0
- risk/log/params.py +198 -0
- risk/neighborhoods/__init__.py +10 -0
- risk/neighborhoods/domains.py +257 -0
- risk/neighborhoods/graph.py +189 -0
- risk/neighborhoods/neighborhoods.py +319 -0
- risk/network/__init__.py +8 -0
- risk/network/geometry.py +165 -0
- risk/network/graph.py +280 -0
- risk/network/io.py +326 -0
- risk/network/plot.py +804 -0
- risk/risk.py +389 -0
- risk/stats/__init__.py +6 -0
- risk/stats/permutation/__init__.py +15 -0
- risk/stats/permutation/_cython/permutation.cpython-38-i386-linux-gnu.so +0 -0
- risk/stats/permutation/_cython/permutation.pyx +82 -0
- risk/stats/permutation/_cython/setup.py +11 -0
- risk/stats/permutation/_python/permutation.py +83 -0
- risk/stats/stats.py +443 -0
- risk_network-0.0.3b0.dist-info/LICENSE +674 -0
- risk_network-0.0.3b0.dist-info/METADATA +745 -0
- risk_network-0.0.3b0.dist-info/RECORD +30 -0
- risk_network-0.0.3b0.dist-info/WHEEL +5 -0
- risk_network-0.0.3b0.dist-info/top_level.txt +1 -0
risk/risk.py
ADDED
@@ -0,0 +1,389 @@
|
|
1
|
+
"""
|
2
|
+
risk/risk
|
3
|
+
~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
from typing import Any, Dict
|
7
|
+
|
8
|
+
import networkx as nx
|
9
|
+
import pandas as pd
|
10
|
+
|
11
|
+
from risk.annotations import AnnotationsIO, define_top_annotations
|
12
|
+
from risk.log import params, print_header
|
13
|
+
from risk.neighborhoods import (
|
14
|
+
define_domains,
|
15
|
+
get_network_neighborhoods,
|
16
|
+
process_neighborhoods,
|
17
|
+
trim_domains_and_top_annotations,
|
18
|
+
)
|
19
|
+
from risk.network import NetworkIO, NetworkGraph, NetworkPlotter
|
20
|
+
from risk.stats import compute_permutation, calculate_significance_matrices
|
21
|
+
|
22
|
+
|
23
|
+
class RISK(NetworkIO, AnnotationsIO):
|
24
|
+
"""RISK: A class for network analysis and visualization.
|
25
|
+
|
26
|
+
The RISK class integrates functionalities for loading networks, processing annotations,
|
27
|
+
and performing network-based statistical analysis, such as neighborhood significance testing.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(
|
31
|
+
self,
|
32
|
+
compute_sphere: bool = True,
|
33
|
+
surface_depth: float = 0.0,
|
34
|
+
distance_metric: str = "dijkstra",
|
35
|
+
louvain_resolution: float = 0.1,
|
36
|
+
min_edges_per_node: int = 0,
|
37
|
+
edge_length_threshold: float = 0.5,
|
38
|
+
include_edge_weight: bool = True,
|
39
|
+
weight_label: str = "weight",
|
40
|
+
):
|
41
|
+
"""Initialize the RISK class with configuration settings.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
compute_sphere (bool, optional): Whether to map nodes to a sphere. Defaults to True.
|
45
|
+
surface_depth (float, optional): Surface depth for the sphere. Defaults to 0.0.
|
46
|
+
distance_metric (str, optional): Distance metric to use in network analysis. Defaults to "dijkstra".
|
47
|
+
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
48
|
+
min_edges_per_node (int, optional): Minimum number of edges per node. Defaults to 0.
|
49
|
+
edge_length_threshold (float, optional): Edge length threshold for analysis. Defaults to 0.5.
|
50
|
+
include_edge_weight (bool, optional): Whether to include edge weights in calculations. Defaults to True.
|
51
|
+
weight_label (str, optional): Label for edge weights. Defaults to "weight".
|
52
|
+
"""
|
53
|
+
# Initialize and log network parameters
|
54
|
+
params.initialize()
|
55
|
+
params.log_network(
|
56
|
+
compute_sphere=compute_sphere,
|
57
|
+
surface_depth=surface_depth,
|
58
|
+
distance_metric=distance_metric,
|
59
|
+
louvain_resolution=louvain_resolution,
|
60
|
+
min_edges_per_node=min_edges_per_node,
|
61
|
+
edge_length_threshold=edge_length_threshold,
|
62
|
+
include_edge_weight=include_edge_weight,
|
63
|
+
weight_label=weight_label,
|
64
|
+
)
|
65
|
+
# Initialize parent classes
|
66
|
+
NetworkIO.__init__(
|
67
|
+
self,
|
68
|
+
compute_sphere=compute_sphere,
|
69
|
+
surface_depth=surface_depth,
|
70
|
+
distance_metric=distance_metric,
|
71
|
+
louvain_resolution=louvain_resolution,
|
72
|
+
min_edges_per_node=min_edges_per_node,
|
73
|
+
edge_length_threshold=edge_length_threshold,
|
74
|
+
include_edge_weight=include_edge_weight,
|
75
|
+
weight_label=weight_label,
|
76
|
+
)
|
77
|
+
AnnotationsIO.__init__(self)
|
78
|
+
|
79
|
+
# Set class attributes
|
80
|
+
self.compute_sphere = compute_sphere
|
81
|
+
self.surface_depth = surface_depth
|
82
|
+
self.distance_metric = distance_metric
|
83
|
+
self.louvain_resolution = louvain_resolution
|
84
|
+
self.min_edges_per_node = min_edges_per_node
|
85
|
+
self.edge_length_threshold = edge_length_threshold
|
86
|
+
self.include_edge_weight = include_edge_weight
|
87
|
+
self.weight_label = weight_label
|
88
|
+
|
89
|
+
@property
|
90
|
+
def params(self):
|
91
|
+
"""Access the logged parameters."""
|
92
|
+
return params
|
93
|
+
|
94
|
+
def load_neighborhoods(
|
95
|
+
self,
|
96
|
+
network: nx.Graph,
|
97
|
+
annotations: Dict[str, Any],
|
98
|
+
score_metric: str = "sum",
|
99
|
+
null_distribution: str = "network",
|
100
|
+
num_permutations: int = 1000,
|
101
|
+
use_cython=True,
|
102
|
+
random_seed: int = 888,
|
103
|
+
max_workers: int = 1,
|
104
|
+
) -> Dict[str, Any]:
|
105
|
+
"""Load significant neighborhoods for the network.
|
106
|
+
|
107
|
+
Args:
|
108
|
+
network (nx.Graph): The network graph.
|
109
|
+
annotations (pd.DataFrame): The matrix of annotations associated with the network.
|
110
|
+
score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
|
111
|
+
null_distribution (str, optional): Distribution used for permutation tests. Defaults to "network".
|
112
|
+
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
113
|
+
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
114
|
+
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
dict: Computed significance of neighborhoods.
|
118
|
+
"""
|
119
|
+
print_header("Running permutation test")
|
120
|
+
# Log neighborhood analysis parameters
|
121
|
+
params.log_neighborhoods(
|
122
|
+
score_metric=score_metric,
|
123
|
+
null_distribution=null_distribution,
|
124
|
+
num_permutations=num_permutations,
|
125
|
+
use_cython=use_cython,
|
126
|
+
random_seed=random_seed,
|
127
|
+
max_workers=max_workers,
|
128
|
+
)
|
129
|
+
|
130
|
+
# Display the chosen distance metric
|
131
|
+
if self.distance_metric == "louvain":
|
132
|
+
for_print_distance_metric = f"louvain (resolution={self.louvain_resolution})"
|
133
|
+
else:
|
134
|
+
for_print_distance_metric = self.distance_metric
|
135
|
+
print(f"Distance metric: '{for_print_distance_metric}'")
|
136
|
+
# Compute neighborhoods based on the network and distance metric
|
137
|
+
neighborhoods = get_network_neighborhoods(
|
138
|
+
network,
|
139
|
+
self.distance_metric,
|
140
|
+
self.edge_length_threshold,
|
141
|
+
louvain_resolution=self.louvain_resolution,
|
142
|
+
random_seed=random_seed,
|
143
|
+
)
|
144
|
+
|
145
|
+
# Log and display permutation test settings
|
146
|
+
print(f"Null distribution: '{null_distribution}'")
|
147
|
+
print(f"Neighborhood scoring metric: '{score_metric}'")
|
148
|
+
print(f"Number of permutations: {num_permutations}")
|
149
|
+
# Run the permutation test to compute neighborhood significance
|
150
|
+
neighborhood_significance = compute_permutation(
|
151
|
+
neighborhoods=neighborhoods,
|
152
|
+
annotations=annotations["matrix"],
|
153
|
+
score_metric=score_metric,
|
154
|
+
null_distribution=null_distribution,
|
155
|
+
num_permutations=num_permutations,
|
156
|
+
use_cython=use_cython,
|
157
|
+
random_seed=random_seed,
|
158
|
+
max_workers=max_workers,
|
159
|
+
)
|
160
|
+
|
161
|
+
return neighborhood_significance
|
162
|
+
|
163
|
+
def load_graph(
|
164
|
+
self,
|
165
|
+
network: nx.Graph,
|
166
|
+
annotations: Dict[str, Any],
|
167
|
+
neighborhoods: Dict[str, Any],
|
168
|
+
tail: str = "right", # OPTIONS: "right" (enrichment), "left" (depletion), "both"
|
169
|
+
pval_cutoff: float = 0.01, # OPTIONS: Any value between 0 to 1
|
170
|
+
apply_fdr: bool = False,
|
171
|
+
fdr_cutoff: float = 0.9999, # OPTIONS: Any value between 0 to 1
|
172
|
+
impute_depth: int = 1,
|
173
|
+
prune_threshold: float = 0.0,
|
174
|
+
linkage_criterion: str = "distance",
|
175
|
+
linkage_method: str = "average",
|
176
|
+
linkage_metric: str = "yule",
|
177
|
+
min_cluster_size: int = 5,
|
178
|
+
max_cluster_size: int = 1000,
|
179
|
+
) -> NetworkGraph:
|
180
|
+
"""Load and process the network graph, defining top annotations and domains.
|
181
|
+
|
182
|
+
Args:
|
183
|
+
network (nx.Graph): The network graph.
|
184
|
+
annotations (pd.DataFrame): DataFrame containing annotation data for the network.
|
185
|
+
neighborhoods (dict): Neighborhood enrichment data.
|
186
|
+
tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
|
187
|
+
pval_cutoff (float, optional): P-value cutoff for significance. Defaults to 0.01.
|
188
|
+
apply_fdr (bool, optional): Whether to apply FDR correction. Defaults to False.
|
189
|
+
fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
|
190
|
+
impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
|
191
|
+
prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
|
192
|
+
linkage_criterion (str, optional): Clustering criterion for defining domains. Defaults to "distance".
|
193
|
+
linkage_method (str, optional): Clustering method to use. Defaults to "average".
|
194
|
+
linkage_metric (str, optional): Metric to use for calculating distances. Defaults to "yule".
|
195
|
+
min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
|
196
|
+
max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
NetworkGraph: A fully initialized and processed NetworkGraph object.
|
200
|
+
"""
|
201
|
+
# Log the parameters and display headers
|
202
|
+
print_header("Finding significant neighborhoods")
|
203
|
+
params.log_graph(
|
204
|
+
tail=tail,
|
205
|
+
pval_cutoff=pval_cutoff,
|
206
|
+
apply_fdr=apply_fdr,
|
207
|
+
fdr_cutoff=fdr_cutoff,
|
208
|
+
impute_depth=impute_depth,
|
209
|
+
prune_threshold=prune_threshold,
|
210
|
+
linkage_criterion=linkage_criterion,
|
211
|
+
linkage_method=linkage_method,
|
212
|
+
linkage_metric=linkage_metric,
|
213
|
+
min_cluster_size=min_cluster_size,
|
214
|
+
max_cluster_size=max_cluster_size,
|
215
|
+
)
|
216
|
+
|
217
|
+
print(f"P-value cutoff: {pval_cutoff}")
|
218
|
+
print(f"FDR cutoff: {'N/A' if not apply_fdr else apply_fdr}")
|
219
|
+
print(
|
220
|
+
f"Significance tail: '{tail}' ({'enrichment' if tail == 'right' else 'depletion' if tail == 'left' else 'both'})"
|
221
|
+
)
|
222
|
+
# Calculate significant neighborhoods based on the provided parameters
|
223
|
+
significant_neighborhoods = calculate_significance_matrices(
|
224
|
+
neighborhoods["depletion_pvals"],
|
225
|
+
neighborhoods["enrichment_pvals"],
|
226
|
+
tail=tail,
|
227
|
+
pval_cutoff=pval_cutoff,
|
228
|
+
apply_fdr=apply_fdr,
|
229
|
+
fdr_cutoff=fdr_cutoff,
|
230
|
+
)
|
231
|
+
|
232
|
+
print_header("Processing neighborhoods")
|
233
|
+
# Process neighborhoods by imputing and pruning based on the given settings
|
234
|
+
processed_neighborhoods = process_neighborhoods(
|
235
|
+
network=network,
|
236
|
+
neighborhoods=significant_neighborhoods,
|
237
|
+
impute_depth=impute_depth,
|
238
|
+
prune_threshold=prune_threshold,
|
239
|
+
)
|
240
|
+
|
241
|
+
print_header("Finding top annotations")
|
242
|
+
print(f"Min cluster size: {min_cluster_size}")
|
243
|
+
print(f"Max cluster size: {max_cluster_size}")
|
244
|
+
# Define top annotations based on processed neighborhoods
|
245
|
+
top_annotations = self._define_top_annotations(
|
246
|
+
network=network,
|
247
|
+
annotations=annotations,
|
248
|
+
neighborhoods=processed_neighborhoods,
|
249
|
+
min_cluster_size=min_cluster_size,
|
250
|
+
max_cluster_size=max_cluster_size,
|
251
|
+
)
|
252
|
+
|
253
|
+
print_header(f"Optimizing distance threshold for domains")
|
254
|
+
# Define domains in the network using the specified clustering settings
|
255
|
+
domains = self._define_domains(
|
256
|
+
neighborhoods=processed_neighborhoods,
|
257
|
+
top_annotations=top_annotations,
|
258
|
+
linkage_criterion=linkage_criterion,
|
259
|
+
linkage_method=linkage_method,
|
260
|
+
linkage_metric=linkage_metric,
|
261
|
+
)
|
262
|
+
# Trim domains and top annotations based on cluster size constraints
|
263
|
+
top_annotations, domains, trimmed_domains = trim_domains_and_top_annotations(
|
264
|
+
domains=domains,
|
265
|
+
top_annotations=top_annotations,
|
266
|
+
min_cluster_size=min_cluster_size,
|
267
|
+
max_cluster_size=max_cluster_size,
|
268
|
+
)
|
269
|
+
|
270
|
+
# Prepare node mapping and enrichment sums for the final NetworkGraph object
|
271
|
+
ordered_nodes = annotations["ordered_nodes"]
|
272
|
+
node_label_to_id = dict(zip(ordered_nodes, range(len(ordered_nodes))))
|
273
|
+
node_enrichment_sums = processed_neighborhoods["node_enrichment_sums"]
|
274
|
+
|
275
|
+
# Return the fully initialized NetworkGraph object
|
276
|
+
return NetworkGraph(
|
277
|
+
network=network,
|
278
|
+
top_annotations=top_annotations,
|
279
|
+
domains=domains,
|
280
|
+
trimmed_domains=trimmed_domains,
|
281
|
+
node_label_to_id_map=node_label_to_id,
|
282
|
+
node_enrichment_sums=node_enrichment_sums,
|
283
|
+
)
|
284
|
+
|
285
|
+
def load_plotter(
|
286
|
+
self,
|
287
|
+
graph: NetworkGraph,
|
288
|
+
figsize: tuple = (10, 10),
|
289
|
+
background_color: str = "white",
|
290
|
+
plot_outline: bool = True,
|
291
|
+
outline_color: str = "black",
|
292
|
+
outline_scale: float = 1.00,
|
293
|
+
) -> NetworkPlotter:
|
294
|
+
"""Get a NetworkPlotter object for plotting.
|
295
|
+
|
296
|
+
Args:
|
297
|
+
graph (NetworkGraph): The graph to plot.
|
298
|
+
figsize (tuple, optional): Size of the figure. Defaults to (10, 10).
|
299
|
+
background_color (str, optional): Background color of the plot. Defaults to "white".
|
300
|
+
plot_outline (bool, optional): Whether to plot the network outline. Defaults to True.
|
301
|
+
outline_color (str, optional): Color of the outline. Defaults to "black".
|
302
|
+
outline_scale (float, optional): Scaling factor for the outline. Defaults to 1.00.
|
303
|
+
|
304
|
+
Returns:
|
305
|
+
NetworkPlotter: A NetworkPlotter object configured with the given parameters.
|
306
|
+
"""
|
307
|
+
print_header("Loading plotter")
|
308
|
+
# Log the plotter settings
|
309
|
+
params.log_plotter(
|
310
|
+
figsize=figsize,
|
311
|
+
background_color=background_color,
|
312
|
+
plot_outline=plot_outline,
|
313
|
+
outline_color=outline_color,
|
314
|
+
outline_scale=outline_scale,
|
315
|
+
)
|
316
|
+
# Initialize and return a NetworkPlotter object
|
317
|
+
return NetworkPlotter(
|
318
|
+
graph,
|
319
|
+
figsize=figsize,
|
320
|
+
background_color=background_color,
|
321
|
+
plot_outline=plot_outline,
|
322
|
+
outline_color=outline_color,
|
323
|
+
outline_scale=outline_scale,
|
324
|
+
)
|
325
|
+
|
326
|
+
def _define_top_annotations(
|
327
|
+
self,
|
328
|
+
network: nx.Graph,
|
329
|
+
annotations: Dict[str, Any],
|
330
|
+
neighborhoods: Dict[str, Any],
|
331
|
+
min_cluster_size: int = 5,
|
332
|
+
max_cluster_size: int = 1000,
|
333
|
+
) -> pd.DataFrame:
|
334
|
+
"""Define top annotations for the network.
|
335
|
+
|
336
|
+
Args:
|
337
|
+
network (nx.Graph): The network graph.
|
338
|
+
annotations (dict): Annotations data for the network.
|
339
|
+
neighborhoods (dict): Neighborhood enrichment data.
|
340
|
+
min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
|
341
|
+
max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
|
342
|
+
|
343
|
+
Returns:
|
344
|
+
dict: Top annotations identified within the network.
|
345
|
+
"""
|
346
|
+
# Extract necessary data from annotations and neighborhoods
|
347
|
+
ordered_annotations = annotations["ordered_annotations"]
|
348
|
+
neighborhood_enrichment_sums = neighborhoods["neighborhood_enrichment_counts"]
|
349
|
+
neighborhoods_binary_enrichment_matrix = neighborhoods["binary_enrichment_matrix"]
|
350
|
+
# Call external function to define top annotations
|
351
|
+
return define_top_annotations(
|
352
|
+
network=network,
|
353
|
+
ordered_annotation_labels=ordered_annotations,
|
354
|
+
neighborhood_enrichment_sums=neighborhood_enrichment_sums,
|
355
|
+
binary_enrichment_matrix=neighborhoods_binary_enrichment_matrix,
|
356
|
+
min_cluster_size=min_cluster_size,
|
357
|
+
max_cluster_size=max_cluster_size,
|
358
|
+
)
|
359
|
+
|
360
|
+
def _define_domains(
|
361
|
+
self,
|
362
|
+
neighborhoods: Dict[str, Any],
|
363
|
+
top_annotations: pd.DataFrame,
|
364
|
+
linkage_criterion: str,
|
365
|
+
linkage_method: str,
|
366
|
+
linkage_metric: str,
|
367
|
+
) -> pd.DataFrame:
|
368
|
+
"""Define domains in the network based on enrichment data.
|
369
|
+
|
370
|
+
Args:
|
371
|
+
neighborhoods (dict): Enrichment data for neighborhoods.
|
372
|
+
top_annotations (pd.DataFrame): Enrichment matrix for top annotations.
|
373
|
+
linkage_criterion (str): Clustering criterion for defining domains.
|
374
|
+
linkage_method (str): Clustering method to use.
|
375
|
+
linkage_metric (str): Metric to use for calculating distances.
|
376
|
+
|
377
|
+
Returns:
|
378
|
+
pd.DataFrame: Matrix of defined domains.
|
379
|
+
"""
|
380
|
+
# Extract the significant enrichment matrix from the neighborhoods data
|
381
|
+
significant_neighborhoods_enrichment = neighborhoods["significant_enrichment_matrix"]
|
382
|
+
# Call external function to define domains based on the extracted data
|
383
|
+
return define_domains(
|
384
|
+
top_annotations=top_annotations,
|
385
|
+
significant_neighborhoods_enrichment=significant_neighborhoods_enrichment,
|
386
|
+
linkage_criterion=linkage_criterion,
|
387
|
+
linkage_method=linkage_method,
|
388
|
+
linkage_metric=linkage_metric,
|
389
|
+
)
|
risk/stats/__init__.py
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
"""
|
2
|
+
risk/stats/permutation
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
from risk.stats.permutation._cython.permutation import (
|
7
|
+
compute_neighborhood_score_by_sum_cython,
|
8
|
+
compute_neighborhood_score_by_stdev_cython,
|
9
|
+
compute_neighborhood_score_by_z_score_cython,
|
10
|
+
)
|
11
|
+
from risk.stats.permutation._python.permutation import (
|
12
|
+
compute_neighborhood_score_by_sum_python,
|
13
|
+
compute_neighborhood_score_by_stdev_python,
|
14
|
+
compute_neighborhood_score_by_z_score_python,
|
15
|
+
)
|
Binary file
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# cython: language_level=3
|
2
|
+
import numpy as np
|
3
|
+
cimport numpy as np
|
4
|
+
cimport cython
|
5
|
+
from threadpoolctl import threadpool_limits
|
6
|
+
|
7
|
+
|
8
|
+
@cython.boundscheck(False) # Disable bounds checking for entire function
|
9
|
+
@cython.wraparound(False) # Disable negative index wrapping for entire function
|
10
|
+
def compute_neighborhood_score_by_sum_cython(
|
11
|
+
np.ndarray[np.float32_t, ndim=2] neighborhoods,
|
12
|
+
np.ndarray[np.float32_t, ndim=2] annotation_matrix,
|
13
|
+
):
|
14
|
+
cdef np.float32_t[:, :] neighborhood_score
|
15
|
+
# Limit the number of threads used by np.dot
|
16
|
+
with threadpool_limits(limits=1, user_api='blas'):
|
17
|
+
neighborhood_score = np.dot(neighborhoods, annotation_matrix)
|
18
|
+
|
19
|
+
return np.asarray(neighborhood_score)
|
20
|
+
|
21
|
+
|
22
|
+
@cython.boundscheck(False)
|
23
|
+
@cython.wraparound(False)
|
24
|
+
def compute_neighborhood_score_by_stdev_cython(
|
25
|
+
np.ndarray[np.float32_t, ndim=2] neighborhoods,
|
26
|
+
np.ndarray[np.float32_t, ndim=2] annotation_matrix,
|
27
|
+
):
|
28
|
+
cdef np.ndarray[np.float32_t, ndim=2] neighborhood_score
|
29
|
+
cdef np.ndarray[np.float32_t, ndim=2] EXX
|
30
|
+
# Perform dot product directly using the inputs with limited threads
|
31
|
+
with threadpool_limits(limits=1, user_api='blas'):
|
32
|
+
neighborhood_score = np.dot(neighborhoods, annotation_matrix)
|
33
|
+
|
34
|
+
# Sum across rows for neighborhoods to get N, reshape for broadcasting
|
35
|
+
cdef np.ndarray[np.float32_t, ndim=1] N = np.sum(neighborhoods, axis=1)
|
36
|
+
cdef np.ndarray[np.float32_t, ndim=2] N_reshaped = N[:, None]
|
37
|
+
# Mean of the dot product
|
38
|
+
cdef np.ndarray[np.float32_t, ndim=2] M = neighborhood_score / N_reshaped
|
39
|
+
# Compute the mean of squares (EXX) with limited threads
|
40
|
+
with threadpool_limits(limits=1, user_api='blas'):
|
41
|
+
EXX = np.dot(neighborhoods, np.power(annotation_matrix, 2)) / N_reshaped
|
42
|
+
|
43
|
+
# Variance computation
|
44
|
+
cdef np.ndarray[np.float32_t, ndim=2] variance = EXX - M**2
|
45
|
+
# Standard deviation computation
|
46
|
+
cdef np.ndarray[np.float32_t, ndim=2] stdev = np.sqrt(variance)
|
47
|
+
|
48
|
+
return stdev
|
49
|
+
|
50
|
+
|
51
|
+
@cython.boundscheck(False)
|
52
|
+
@cython.wraparound(False)
|
53
|
+
def compute_neighborhood_score_by_z_score_cython(
|
54
|
+
np.ndarray[np.float32_t, ndim=2] neighborhoods,
|
55
|
+
np.ndarray[np.float32_t, ndim=2] annotation_matrix,
|
56
|
+
):
|
57
|
+
cdef np.ndarray[np.float32_t, ndim=2] neighborhood_score
|
58
|
+
cdef np.ndarray[np.float32_t, ndim=2] EXX
|
59
|
+
# Perform dot product directly using the inputs with limited threads
|
60
|
+
with threadpool_limits(limits=1, user_api='blas'):
|
61
|
+
neighborhood_score = np.dot(neighborhoods, annotation_matrix)
|
62
|
+
|
63
|
+
# Sum across rows for neighborhoods to get N, reshape for broadcasting
|
64
|
+
cdef np.ndarray[np.float32_t, ndim=1] N = np.sum(neighborhoods, axis=1)
|
65
|
+
cdef np.ndarray[np.float32_t, ndim=2] N_reshaped = N[:, None]
|
66
|
+
# Mean of the dot product
|
67
|
+
cdef np.ndarray[np.float32_t, ndim=2] M = neighborhood_score / N_reshaped
|
68
|
+
# Compute the mean of squares (EXX) with limited threads
|
69
|
+
with threadpool_limits(limits=1, user_api='blas'):
|
70
|
+
EXX = np.dot(neighborhoods, np.power(annotation_matrix, 2)) / N_reshaped
|
71
|
+
|
72
|
+
# Variance computation
|
73
|
+
cdef np.ndarray[np.float32_t, ndim=2] variance = EXX - M**2
|
74
|
+
# Standard deviation computation
|
75
|
+
cdef np.ndarray[np.float32_t, ndim=2] stdev = np.sqrt(variance)
|
76
|
+
# Z-score computation with error handling
|
77
|
+
with np.errstate(divide='ignore', invalid='ignore'):
|
78
|
+
neighborhood_score = np.divide(M, stdev)
|
79
|
+
# Handle divisions by zero or stdev == 0
|
80
|
+
neighborhood_score[np.isnan(neighborhood_score)] = 0 # Assuming requirement to reset NaN results to 0
|
81
|
+
|
82
|
+
return neighborhood_score
|
@@ -0,0 +1,11 @@
|
|
1
|
+
"""
|
2
|
+
risk/stats/permutation/_cython/setup
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
# setup.py
|
7
|
+
from setuptools import setup
|
8
|
+
from Cython.Build import cythonize
|
9
|
+
import numpy as np
|
10
|
+
|
11
|
+
setup(ext_modules=cythonize("permutation.pyx"), include_dirs=[np.get_include()])
|
@@ -0,0 +1,83 @@
|
|
1
|
+
"""
|
2
|
+
risk/stats/permutation/_python/permutation
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
|
8
|
+
|
9
|
+
def compute_neighborhood_score_by_sum_python(
|
10
|
+
neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
|
11
|
+
) -> np.ndarray:
|
12
|
+
"""Compute the sum of attribute values for each neighborhood.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
neighborhoods_matrix (np.ndarray): Binary matrix representing neighborhoods.
|
16
|
+
annotation_matrix (np.ndarray): Matrix representing annotation values.
|
17
|
+
|
18
|
+
Returns:
|
19
|
+
np.ndarray: Sum of attribute values for each neighborhood.
|
20
|
+
"""
|
21
|
+
# Directly compute the dot product to get the sum of attribute values in each neighborhood
|
22
|
+
neighborhood_score = np.dot(neighborhoods_matrix, annotation_matrix)
|
23
|
+
return neighborhood_score
|
24
|
+
|
25
|
+
|
26
|
+
def compute_neighborhood_score_by_stdev_python(
|
27
|
+
neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
|
28
|
+
) -> np.ndarray:
|
29
|
+
"""Compute the standard deviation of neighborhood scores.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
neighborhoods_matrix (np.ndarray): Binary matrix representing neighborhoods.
|
33
|
+
annotation_matrix (np.ndarray): Matrix representing annotation values.
|
34
|
+
|
35
|
+
Returns:
|
36
|
+
np.ndarray: Standard deviation of the neighborhood scores.
|
37
|
+
"""
|
38
|
+
# Calculate the neighborhood score as the dot product of neighborhoods and annotations
|
39
|
+
neighborhood_score = np.dot(neighborhoods_matrix, annotation_matrix)
|
40
|
+
# Calculate the number of elements in each neighborhood and reshape for broadcasting
|
41
|
+
N = np.sum(neighborhoods_matrix, axis=1)
|
42
|
+
N_reshaped = N[:, None]
|
43
|
+
# Compute the mean of the neighborhood scores
|
44
|
+
M = neighborhood_score / N_reshaped
|
45
|
+
# Compute the mean of squares (EXX) for annotation values
|
46
|
+
EXX = np.dot(neighborhoods_matrix, np.power(annotation_matrix, 2)) / N_reshaped
|
47
|
+
# Calculate variance as EXX - M^2
|
48
|
+
variance = EXX - np.power(M, 2)
|
49
|
+
# Compute the standard deviation as the square root of the variance
|
50
|
+
stdev = np.sqrt(variance)
|
51
|
+
return stdev
|
52
|
+
|
53
|
+
|
54
|
+
def compute_neighborhood_score_by_z_score_python(
|
55
|
+
neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
|
56
|
+
) -> np.ndarray:
|
57
|
+
"""Compute Z-scores for neighborhood scores.
|
58
|
+
|
59
|
+
Args:
|
60
|
+
neighborhoods_matrix (np.ndarray): Binary matrix representing neighborhoods.
|
61
|
+
annotation_matrix (np.ndarray): Matrix representing annotation values.
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
np.ndarray: Z-scores for each neighborhood.
|
65
|
+
"""
|
66
|
+
# Calculate the neighborhood score as the dot product of neighborhoods and annotations
|
67
|
+
neighborhood_score = np.dot(neighborhoods_matrix, annotation_matrix)
|
68
|
+
# Calculate the number of elements in each neighborhood
|
69
|
+
N = np.dot(neighborhoods_matrix, np.ones(annotation_matrix.shape))
|
70
|
+
# Compute the mean of the neighborhood scores
|
71
|
+
M = neighborhood_score / N
|
72
|
+
# Compute the mean of squares (EXX) and the squared mean (EEX)
|
73
|
+
EXX = np.dot(neighborhoods_matrix, np.power(annotation_matrix, 2)) / N
|
74
|
+
EEX = np.power(M, 2)
|
75
|
+
# Calculate the standard deviation for each neighborhood
|
76
|
+
std = np.sqrt(EXX - EEX)
|
77
|
+
# Calculate Z-scores, handling cases where std is 0 or N is less than 3
|
78
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
79
|
+
z_scores = np.divide(M, std)
|
80
|
+
z_scores[std == 0] = np.nan # Handle division by zero
|
81
|
+
z_scores[N < 3] = np.nan # Apply threshold for minimum number of elements
|
82
|
+
|
83
|
+
return z_scores
|