risk-network 0.0.4b1__py3-none-any.whl → 0.0.4b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -4
- risk/annotations/annotations.py +1 -1
- risk/neighborhoods/neighborhoods.py +5 -1
- risk/network/geometry.py +2 -2
- risk/network/io.py +45 -30
- risk/network/plot.py +43 -8
- risk/risk.py +171 -19
- risk/stats/__init__.py +4 -1
- risk/stats/fisher_exact.py +132 -0
- risk/stats/hypergeom.py +131 -0
- risk/stats/permutation/__init__.py +6 -0
- risk/stats/permutation/permutation.py +212 -0
- risk/stats/{permutation.py → permutation/test_functions.py} +12 -39
- risk/stats/stats.py +1 -212
- {risk_network-0.0.4b1.dist-info → risk_network-0.0.4b3.dist-info}/METADATA +2 -2
- risk_network-0.0.4b3.dist-info/RECORD +30 -0
- {risk_network-0.0.4b1.dist-info → risk_network-0.0.4b3.dist-info}/WHEEL +1 -1
- risk_network-0.0.4b1.dist-info/RECORD +0 -26
- {risk_network-0.0.4b1.dist-info → risk_network-0.0.4b3.dist-info}/LICENSE +0 -0
- {risk_network-0.0.4b1.dist-info → risk_network-0.0.4b3.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/annotations/annotations.py
CHANGED
@@ -197,7 +197,7 @@ def _simplify_word_list(words: List[str], threshold: float = 0.80) -> List[str]:
|
|
197
197
|
word_counts = Counter(words)
|
198
198
|
filtered_words = []
|
199
199
|
used_words = set()
|
200
|
-
|
200
|
+
# Iterate through the words to find similar words
|
201
201
|
for word in word_counts:
|
202
202
|
if word in used_words:
|
203
203
|
continue
|
@@ -321,7 +321,11 @@ def _calculate_threshold(average_distances: list, distance_threshold: float) ->
|
|
321
321
|
rank_percentiles = np.linspace(0, 1, len(sorted_distances))
|
322
322
|
# Interpolating the ranks to 1000 evenly spaced percentiles
|
323
323
|
interpolated_percentiles = np.linspace(0, 1, 1000)
|
324
|
-
|
324
|
+
try:
|
325
|
+
smoothed_distances = np.interp(interpolated_percentiles, rank_percentiles, sorted_distances)
|
326
|
+
except ValueError as e:
|
327
|
+
raise ValueError("No significant annotations found.") from e
|
328
|
+
|
325
329
|
# Determine the index corresponding to the distance threshold
|
326
330
|
threshold_index = int(np.ceil(distance_threshold * len(smoothed_distances))) - 1
|
327
331
|
# Return the smoothed distance at the calculated index
|
risk/network/geometry.py
CHANGED
@@ -7,13 +7,13 @@ import networkx as nx
|
|
7
7
|
import numpy as np
|
8
8
|
|
9
9
|
|
10
|
-
def
|
10
|
+
def assign_edge_lengths(
|
11
11
|
G: nx.Graph,
|
12
12
|
compute_sphere: bool = True,
|
13
13
|
surface_depth: float = 0.0,
|
14
14
|
include_edge_weight: bool = False,
|
15
15
|
) -> nx.Graph:
|
16
|
-
"""
|
16
|
+
"""Assign edge lengths in the graph, optionally mapping nodes to a sphere and including edge weights.
|
17
17
|
|
18
18
|
Args:
|
19
19
|
G (nx.Graph): The input graph.
|
risk/network/io.py
CHANGED
@@ -6,6 +6,7 @@ This file contains the code for the RISK class and command-line access.
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
import json
|
9
|
+
import os
|
9
10
|
import pickle
|
10
11
|
import shutil
|
11
12
|
import zipfile
|
@@ -14,7 +15,7 @@ from xml.dom import minidom
|
|
14
15
|
import networkx as nx
|
15
16
|
import pandas as pd
|
16
17
|
|
17
|
-
from risk.network.geometry import
|
18
|
+
from risk.network.geometry import assign_edge_lengths
|
18
19
|
from risk.log import params, print_header
|
19
20
|
|
20
21
|
|
@@ -215,14 +216,20 @@ class NetworkIO:
|
|
215
216
|
params.log_network(filetype=filetype, filepath=str(filepath))
|
216
217
|
self._log_loading(filetype, filepath=filepath)
|
217
218
|
cys_files = []
|
219
|
+
tmp_dir = ".tmp_cytoscape"
|
218
220
|
# Try / finally to remove unzipped files
|
219
221
|
try:
|
220
|
-
#
|
222
|
+
# Create the temporary directory if it doesn't exist
|
223
|
+
if not os.path.exists(tmp_dir):
|
224
|
+
os.makedirs(tmp_dir)
|
225
|
+
|
226
|
+
# Unzip CYS file into the temporary directory
|
221
227
|
with zipfile.ZipFile(filepath, "r") as zip_ref:
|
222
228
|
cys_files = zip_ref.namelist()
|
223
|
-
zip_ref.extractall(
|
229
|
+
zip_ref.extractall(tmp_dir)
|
230
|
+
|
224
231
|
# Get first view and network instances
|
225
|
-
cys_view_files = [cf for cf in cys_files if "/views/" in cf]
|
232
|
+
cys_view_files = [os.path.join(tmp_dir, cf) for cf in cys_files if "/views/" in cf]
|
226
233
|
cys_view_file = (
|
227
234
|
cys_view_files[0]
|
228
235
|
if not view_name
|
@@ -244,7 +251,7 @@ class NetworkIO:
|
|
244
251
|
# Read the node attributes (from /tables/)
|
245
252
|
attribute_metadata_keywords = ["/tables/", "SHARED_ATTRS", "edge.cytable"]
|
246
253
|
attribute_metadata = [
|
247
|
-
cf
|
254
|
+
os.path.join(tmp_dir, cf)
|
248
255
|
for cf in cys_files
|
249
256
|
if all(keyword in cf for keyword in attribute_metadata_keywords)
|
250
257
|
][0]
|
@@ -291,10 +298,9 @@ class NetworkIO:
|
|
291
298
|
return self._initialize_graph(G)
|
292
299
|
|
293
300
|
finally:
|
294
|
-
# Remove
|
295
|
-
|
296
|
-
|
297
|
-
shutil.rmtree(dirname)
|
301
|
+
# Remove the temporary directory and its contents
|
302
|
+
if os.path.exists(tmp_dir):
|
303
|
+
shutil.rmtree(tmp_dir)
|
298
304
|
|
299
305
|
@classmethod
|
300
306
|
def load_cytoscape_json_network(
|
@@ -402,12 +408,13 @@ class NetworkIO:
|
|
402
408
|
Returns:
|
403
409
|
nx.Graph: The processed and validated graph.
|
404
410
|
"""
|
411
|
+
self._validate_nodes(G)
|
412
|
+
self._assign_edge_weights(G)
|
413
|
+
self._assign_edge_lengths(G)
|
414
|
+
self._remove_invalid_graph_properties(G)
|
405
415
|
# IMPORTANT: This is where the graph node labels are converted to integers
|
416
|
+
# Make sure to perform this step after all other processing
|
406
417
|
G = nx.relabel_nodes(G, {node: idx for idx, node in enumerate(G.nodes)})
|
407
|
-
self._remove_invalid_graph_properties(G)
|
408
|
-
self._validate_edges(G)
|
409
|
-
self._validate_nodes(G)
|
410
|
-
self._process_graph(G)
|
411
418
|
return G
|
412
419
|
|
413
420
|
def _remove_invalid_graph_properties(self, G: nx.Graph) -> None:
|
@@ -416,18 +423,26 @@ class NetworkIO:
|
|
416
423
|
Args:
|
417
424
|
G (nx.Graph): A NetworkX graph object.
|
418
425
|
"""
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
426
|
+
# First, Remove self-loop edges to ensure correct edge count
|
427
|
+
G.remove_edges_from(list(nx.selfloop_edges(G)))
|
428
|
+
# Then, iteratively remove nodes with fewer edges than the specified threshold
|
429
|
+
while True:
|
430
|
+
nodes_to_remove = [
|
431
|
+
node for node in G.nodes() if G.degree(node) < self.min_edges_per_node
|
432
|
+
]
|
433
|
+
if not nodes_to_remove:
|
434
|
+
break # Exit loop if no more nodes to remove
|
435
|
+
|
436
|
+
# Remove the nodes and their associated edges
|
437
|
+
G.remove_nodes_from(nodes_to_remove)
|
438
|
+
|
439
|
+
# Optionally: Remove any isolated nodes if needed
|
440
|
+
isolated_nodes = list(nx.isolates(G))
|
441
|
+
if isolated_nodes:
|
442
|
+
G.remove_nodes_from(isolated_nodes)
|
443
|
+
|
444
|
+
def _assign_edge_weights(self, G: nx.Graph) -> None:
|
445
|
+
"""Assign weights to the edges in the graph.
|
431
446
|
|
432
447
|
Args:
|
433
448
|
G (nx.Graph): A NetworkX graph object.
|
@@ -456,13 +471,13 @@ class NetworkIO:
|
|
456
471
|
), f"Node {node} is missing 'x' or 'y' position attributes."
|
457
472
|
assert "label" in attrs, f"Node {node} is missing a 'label' attribute."
|
458
473
|
|
459
|
-
def
|
474
|
+
def _assign_edge_lengths(self, G: nx.Graph) -> None:
|
460
475
|
"""Prepare the network by adjusting surface depth and calculating edge lengths.
|
461
476
|
|
462
477
|
Args:
|
463
478
|
G (nx.Graph): The input network graph.
|
464
479
|
"""
|
465
|
-
|
480
|
+
assign_edge_lengths(
|
466
481
|
G,
|
467
482
|
compute_sphere=self.compute_sphere,
|
468
483
|
surface_depth=self.surface_depth,
|
@@ -484,9 +499,9 @@ class NetworkIO:
|
|
484
499
|
print(f"Filetype: {filetype}")
|
485
500
|
if filepath:
|
486
501
|
print(f"Filepath: {filepath}")
|
487
|
-
print(f"Projection: {'Sphere' if self.compute_sphere else 'Plane'}")
|
488
|
-
if self.compute_sphere:
|
489
|
-
print(f"Surface depth: {self.surface_depth}")
|
490
502
|
print(f"Edge weight: {'Included' if self.include_edge_weight else 'Excluded'}")
|
491
503
|
if self.include_edge_weight:
|
492
504
|
print(f"Weight label: {self.weight_label}")
|
505
|
+
print(f"Projection: {'Sphere' if self.compute_sphere else 'Plane'}")
|
506
|
+
if self.compute_sphere:
|
507
|
+
print(f"Surface depth: {self.surface_depth}")
|
risk/network/plot.py
CHANGED
@@ -400,8 +400,10 @@ class NetworkPlotter:
|
|
400
400
|
fontcolor: Union[str, np.ndarray] = "black",
|
401
401
|
arrow_linewidth: float = 1,
|
402
402
|
arrow_color: Union[str, np.ndarray] = "black",
|
403
|
+
max_labels: Union[int, None] = None,
|
403
404
|
max_words: int = 10,
|
404
405
|
min_words: int = 1,
|
406
|
+
words_to_omit: Union[List[str], None] = None,
|
405
407
|
) -> None:
|
406
408
|
"""Annotate the network graph with labels for different domains, positioned around the network for clarity.
|
407
409
|
|
@@ -413,8 +415,10 @@ class NetworkPlotter:
|
|
413
415
|
fontcolor (str or np.ndarray, optional): Color of the label text. Can be a string or RGBA array. Defaults to "black".
|
414
416
|
arrow_linewidth (float, optional): Line width of the arrows pointing to centroids. Defaults to 1.
|
415
417
|
arrow_color (str or np.ndarray, optional): Color of the arrows. Can be a string or RGBA array. Defaults to "black".
|
418
|
+
max_labels (int, optional): Maximum number of labels to plot. Defaults to None (no limit).
|
416
419
|
max_words (int, optional): Maximum number of words in a label. Defaults to 10.
|
417
420
|
min_words (int, optional): Minimum number of words required to display a label. Defaults to 1.
|
421
|
+
words_to_omit (List[str], optional): List of words to omit from the labels. Defaults to None.
|
418
422
|
"""
|
419
423
|
# Log the plotting parameters
|
420
424
|
params.log_plotter(
|
@@ -425,14 +429,20 @@ class NetworkPlotter:
|
|
425
429
|
label_fontcolor="custom" if isinstance(fontcolor, np.ndarray) else fontcolor,
|
426
430
|
label_arrow_linewidth=arrow_linewidth,
|
427
431
|
label_arrow_color="custom" if isinstance(arrow_color, np.ndarray) else arrow_color,
|
432
|
+
label_max_labels=max_labels,
|
428
433
|
label_max_words=max_words,
|
429
434
|
label_min_words=min_words,
|
435
|
+
label_words_to_omit=words_to_omit, # Log words_to_omit parameter
|
430
436
|
)
|
437
|
+
|
431
438
|
# Convert color strings to RGBA arrays if necessary
|
432
439
|
if isinstance(fontcolor, str):
|
433
|
-
fontcolor = self.
|
440
|
+
fontcolor = self.get_annotated_label_colors(color=fontcolor)
|
434
441
|
if isinstance(arrow_color, str):
|
435
|
-
arrow_color = self.
|
442
|
+
arrow_color = self.get_annotated_label_colors(color=arrow_color)
|
443
|
+
# Normalize words_to_omit to lowercase
|
444
|
+
if words_to_omit:
|
445
|
+
words_to_omit = set(word.lower() for word in words_to_omit)
|
436
446
|
|
437
447
|
# Calculate the center and radius of the network
|
438
448
|
domain_centroids = {}
|
@@ -443,18 +453,45 @@ class NetworkPlotter:
|
|
443
453
|
# Initialize empty lists to collect valid indices
|
444
454
|
valid_indices = []
|
445
455
|
filtered_domain_centroids = {}
|
456
|
+
filtered_domain_terms = {}
|
446
457
|
# Loop through domain_centroids with index
|
447
458
|
for idx, (domain, centroid) in enumerate(domain_centroids.items()):
|
459
|
+
# Process the domain term
|
460
|
+
terms = self.graph.trimmed_domain_to_term[domain].split(" ")
|
461
|
+
# Remove words_to_omit
|
462
|
+
if words_to_omit:
|
463
|
+
terms = [term for term in terms if term.lower() not in words_to_omit]
|
464
|
+
# Trim to max_words
|
465
|
+
terms = terms[:max_words]
|
448
466
|
# Check if the domain passes the word count condition
|
449
|
-
if len(
|
467
|
+
if len(terms) >= min_words:
|
450
468
|
# Add to filtered_domain_centroids
|
451
469
|
filtered_domain_centroids[domain] = centroid
|
470
|
+
# Store the trimmed terms
|
471
|
+
filtered_domain_terms[domain] = " ".join(terms)
|
452
472
|
# Keep track of the valid index
|
453
473
|
valid_indices.append(idx)
|
454
474
|
|
455
|
-
#
|
456
|
-
|
457
|
-
|
475
|
+
# If max_labels is specified and less than the available labels
|
476
|
+
if max_labels is not None and max_labels < len(filtered_domain_centroids):
|
477
|
+
step = len(filtered_domain_centroids) / max_labels
|
478
|
+
selected_indices = [int(i * step) for i in range(max_labels)]
|
479
|
+
filtered_domain_centroids = {
|
480
|
+
k: v
|
481
|
+
for i, (k, v) in enumerate(filtered_domain_centroids.items())
|
482
|
+
if i in selected_indices
|
483
|
+
}
|
484
|
+
filtered_domain_terms = {
|
485
|
+
k: v
|
486
|
+
for i, (k, v) in enumerate(filtered_domain_terms.items())
|
487
|
+
if i in selected_indices
|
488
|
+
}
|
489
|
+
fontcolor = fontcolor[selected_indices]
|
490
|
+
arrow_color = arrow_color[selected_indices]
|
491
|
+
|
492
|
+
# Update the terms in the graph after omitting words and filtering
|
493
|
+
for domain, terms in filtered_domain_terms.items():
|
494
|
+
self.graph.trimmed_domain_to_term[domain] = terms
|
458
495
|
|
459
496
|
# Calculate the bounding box around the network
|
460
497
|
center, radius = _calculate_bounding_box(
|
@@ -531,12 +568,10 @@ class NetworkPlotter:
|
|
531
568
|
|
532
569
|
# Calculate the centroid of the provided nodes
|
533
570
|
centroid = self._calculate_domain_centroid(node_ids)
|
534
|
-
|
535
571
|
# Calculate the bounding box around the network
|
536
572
|
center, radius = _calculate_bounding_box(
|
537
573
|
self.graph.node_coordinates, radius_margin=perimeter_scale
|
538
574
|
)
|
539
|
-
|
540
575
|
# Convert radial position to radians, adjusting for a 90-degree rotation
|
541
576
|
radial_radians = np.deg2rad(radial_position - 90)
|
542
577
|
label_position = (
|
risk/risk.py
CHANGED
@@ -6,6 +6,7 @@ risk/risk
|
|
6
6
|
from typing import Any, Dict
|
7
7
|
|
8
8
|
import networkx as nx
|
9
|
+
import numpy as np
|
9
10
|
import pandas as pd
|
10
11
|
|
11
12
|
from risk.annotations import AnnotationsIO, define_top_annotations
|
@@ -17,7 +18,12 @@ from risk.neighborhoods import (
|
|
17
18
|
trim_domains_and_top_annotations,
|
18
19
|
)
|
19
20
|
from risk.network import NetworkIO, NetworkGraph, NetworkPlotter
|
20
|
-
from risk.stats import
|
21
|
+
from risk.stats import (
|
22
|
+
calculate_significance_matrices,
|
23
|
+
compute_fisher_exact_test,
|
24
|
+
compute_hypergeom_test,
|
25
|
+
compute_permutation_test,
|
26
|
+
)
|
21
27
|
|
22
28
|
|
23
29
|
class RISK(NetworkIO, AnnotationsIO):
|
@@ -39,7 +45,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
39
45
|
"""Access the logged parameters."""
|
40
46
|
return params
|
41
47
|
|
42
|
-
def
|
48
|
+
def load_neighborhoods_by_permutation(
|
43
49
|
self,
|
44
50
|
network: nx.Graph,
|
45
51
|
annotations: Dict[str, Any],
|
@@ -52,7 +58,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
52
58
|
random_seed: int = 888,
|
53
59
|
max_workers: int = 1,
|
54
60
|
) -> Dict[str, Any]:
|
55
|
-
"""Load significant neighborhoods for the network.
|
61
|
+
"""Load significant neighborhoods for the network using the permutation test.
|
56
62
|
|
57
63
|
Args:
|
58
64
|
network (nx.Graph): The network graph.
|
@@ -75,6 +81,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
75
81
|
distance_metric=distance_metric,
|
76
82
|
louvain_resolution=louvain_resolution,
|
77
83
|
edge_length_threshold=edge_length_threshold,
|
84
|
+
statistical_test_function="permutation",
|
78
85
|
score_metric=score_metric,
|
79
86
|
null_distribution=null_distribution,
|
80
87
|
num_permutations=num_permutations,
|
@@ -82,30 +89,22 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
82
89
|
max_workers=max_workers,
|
83
90
|
)
|
84
91
|
|
85
|
-
#
|
86
|
-
|
87
|
-
for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
|
88
|
-
else:
|
89
|
-
for_print_distance_metric = distance_metric
|
90
|
-
print(f"Distance metric: '{for_print_distance_metric}'")
|
91
|
-
print(f"Edge length threshold: {edge_length_threshold}")
|
92
|
-
# Compute neighborhoods based on the network and distance metric
|
93
|
-
neighborhoods = get_network_neighborhoods(
|
92
|
+
# Load neighborhoods based on the network and distance metric
|
93
|
+
neighborhoods = self._load_neighborhoods(
|
94
94
|
network,
|
95
95
|
distance_metric,
|
96
|
-
edge_length_threshold,
|
97
96
|
louvain_resolution=louvain_resolution,
|
97
|
+
edge_length_threshold=edge_length_threshold,
|
98
98
|
random_seed=random_seed,
|
99
99
|
)
|
100
100
|
|
101
101
|
# Log and display permutation test settings
|
102
|
-
print(f"Null distribution: '{null_distribution}'")
|
103
102
|
print(f"Neighborhood scoring metric: '{score_metric}'")
|
103
|
+
print(f"Null distribution: '{null_distribution}'")
|
104
104
|
print(f"Number of permutations: {num_permutations}")
|
105
|
-
print(f"Random seed: {random_seed}")
|
106
105
|
print(f"Maximum workers: {max_workers}")
|
107
|
-
# Run
|
108
|
-
neighborhood_significance =
|
106
|
+
# Run permutation test to compute neighborhood significance
|
107
|
+
neighborhood_significance = compute_permutation_test(
|
109
108
|
neighborhoods=neighborhoods,
|
110
109
|
annotations=annotations["matrix"],
|
111
110
|
score_metric=score_metric,
|
@@ -117,6 +116,116 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
117
116
|
|
118
117
|
return neighborhood_significance
|
119
118
|
|
119
|
+
def load_neighborhoods_by_fisher_exact(
|
120
|
+
self,
|
121
|
+
network: nx.Graph,
|
122
|
+
annotations: Dict[str, Any],
|
123
|
+
distance_metric: str = "dijkstra",
|
124
|
+
louvain_resolution: float = 0.1,
|
125
|
+
edge_length_threshold: float = 0.5,
|
126
|
+
random_seed: int = 888,
|
127
|
+
max_workers: int = 1,
|
128
|
+
) -> Dict[str, Any]:
|
129
|
+
"""Load significant neighborhoods for the network using the Fisher's exact test.
|
130
|
+
|
131
|
+
Args:
|
132
|
+
network (nx.Graph): The network graph.
|
133
|
+
annotations (pd.DataFrame): The matrix of annotations associated with the network.
|
134
|
+
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
|
135
|
+
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
136
|
+
edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
|
137
|
+
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
138
|
+
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
139
|
+
|
140
|
+
Returns:
|
141
|
+
dict: Computed significance of neighborhoods.
|
142
|
+
"""
|
143
|
+
print_header("Running Fisher's exact test")
|
144
|
+
# Log neighborhood analysis parameters
|
145
|
+
params.log_neighborhoods(
|
146
|
+
distance_metric=distance_metric,
|
147
|
+
louvain_resolution=louvain_resolution,
|
148
|
+
edge_length_threshold=edge_length_threshold,
|
149
|
+
statistical_test_function="fisher_exact",
|
150
|
+
random_seed=random_seed,
|
151
|
+
max_workers=max_workers,
|
152
|
+
)
|
153
|
+
|
154
|
+
# Load neighborhoods based on the network and distance metric
|
155
|
+
neighborhoods = self._load_neighborhoods(
|
156
|
+
network,
|
157
|
+
distance_metric,
|
158
|
+
louvain_resolution=louvain_resolution,
|
159
|
+
edge_length_threshold=edge_length_threshold,
|
160
|
+
random_seed=random_seed,
|
161
|
+
)
|
162
|
+
|
163
|
+
# Log and display Fisher's exact test settings
|
164
|
+
print(f"Maximum workers: {max_workers}")
|
165
|
+
# Run Fisher's exact test to compute neighborhood significance
|
166
|
+
neighborhood_significance = compute_fisher_exact_test(
|
167
|
+
neighborhoods=neighborhoods,
|
168
|
+
annotations=annotations["matrix"],
|
169
|
+
max_workers=max_workers,
|
170
|
+
)
|
171
|
+
|
172
|
+
return neighborhood_significance
|
173
|
+
|
174
|
+
def load_neighborhoods_by_hypergeom(
|
175
|
+
self,
|
176
|
+
network: nx.Graph,
|
177
|
+
annotations: Dict[str, Any],
|
178
|
+
distance_metric: str = "dijkstra",
|
179
|
+
louvain_resolution: float = 0.1,
|
180
|
+
edge_length_threshold: float = 0.5,
|
181
|
+
random_seed: int = 888,
|
182
|
+
max_workers: int = 1,
|
183
|
+
) -> Dict[str, Any]:
|
184
|
+
"""Load significant neighborhoods for the network using the hypergeometric test.
|
185
|
+
|
186
|
+
Args:
|
187
|
+
network (nx.Graph): The network graph.
|
188
|
+
annotations (pd.DataFrame): The matrix of annotations associated with the network.
|
189
|
+
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
|
190
|
+
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
191
|
+
edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
|
192
|
+
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
193
|
+
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
194
|
+
|
195
|
+
Returns:
|
196
|
+
dict: Computed significance of neighborhoods.
|
197
|
+
"""
|
198
|
+
print_header("Running hypergeometric test")
|
199
|
+
# Log neighborhood analysis parameters
|
200
|
+
params.log_neighborhoods(
|
201
|
+
distance_metric=distance_metric,
|
202
|
+
louvain_resolution=louvain_resolution,
|
203
|
+
edge_length_threshold=edge_length_threshold,
|
204
|
+
statistical_test_function="hypergeom",
|
205
|
+
random_seed=random_seed,
|
206
|
+
max_workers=max_workers,
|
207
|
+
)
|
208
|
+
|
209
|
+
# Load neighborhoods based on the network and distance metric
|
210
|
+
neighborhoods = self._load_neighborhoods(
|
211
|
+
network,
|
212
|
+
distance_metric,
|
213
|
+
louvain_resolution=louvain_resolution,
|
214
|
+
edge_length_threshold=edge_length_threshold,
|
215
|
+
random_seed=random_seed,
|
216
|
+
)
|
217
|
+
|
218
|
+
# Log and display hypergeometric test settings
|
219
|
+
print(f"Maximum workers: {max_workers}")
|
220
|
+
# Run hypergeometric test to compute neighborhood significance
|
221
|
+
neighborhood_significance = compute_hypergeom_test(
|
222
|
+
neighborhoods=neighborhoods,
|
223
|
+
annotations=annotations["matrix"],
|
224
|
+
max_workers=max_workers,
|
225
|
+
)
|
226
|
+
|
227
|
+
return neighborhood_significance
|
228
|
+
|
120
229
|
def load_graph(
|
121
230
|
self,
|
122
231
|
network: nx.Graph,
|
@@ -140,7 +249,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
140
249
|
annotations (pd.DataFrame): DataFrame containing annotation data for the network.
|
141
250
|
neighborhoods (dict): Neighborhood enrichment data.
|
142
251
|
tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
|
143
|
-
pval_cutoff (float, optional):
|
252
|
+
pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
|
144
253
|
fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
|
145
254
|
impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
|
146
255
|
prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
|
@@ -168,7 +277,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
168
277
|
max_cluster_size=max_cluster_size,
|
169
278
|
)
|
170
279
|
|
171
|
-
print(f"
|
280
|
+
print(f"p-value cutoff: {pval_cutoff}")
|
172
281
|
print(f"FDR BH cutoff: {fdr_cutoff}")
|
173
282
|
print(
|
174
283
|
f"Significance tail: '{tail}' ({'enrichment' if tail == 'right' else 'depletion' if tail == 'left' else 'both'})"
|
@@ -266,6 +375,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
266
375
|
outline_color=outline_color,
|
267
376
|
outline_scale=outline_scale,
|
268
377
|
)
|
378
|
+
|
269
379
|
# Initialize and return a NetworkPlotter object
|
270
380
|
return NetworkPlotter(
|
271
381
|
graph,
|
@@ -276,6 +386,48 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
276
386
|
outline_scale=outline_scale,
|
277
387
|
)
|
278
388
|
|
389
|
+
def _load_neighborhoods(
|
390
|
+
self,
|
391
|
+
network: nx.Graph,
|
392
|
+
distance_metric: str = "dijkstra",
|
393
|
+
louvain_resolution: float = 0.1,
|
394
|
+
edge_length_threshold: float = 0.5,
|
395
|
+
random_seed: int = 888,
|
396
|
+
) -> np.ndarray:
|
397
|
+
"""Load significant neighborhoods for the network.
|
398
|
+
|
399
|
+
Args:
|
400
|
+
network (nx.Graph): The network graph.
|
401
|
+
annotations (pd.DataFrame): The matrix of annotations associated with the network.
|
402
|
+
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
|
403
|
+
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
404
|
+
edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
|
405
|
+
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
406
|
+
|
407
|
+
Returns:
|
408
|
+
np.ndarray: Neighborhood matrix calculated based on the selected distance metric.
|
409
|
+
"""
|
410
|
+
# Display the chosen distance metric
|
411
|
+
if distance_metric == "louvain":
|
412
|
+
for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
|
413
|
+
else:
|
414
|
+
for_print_distance_metric = distance_metric
|
415
|
+
# Log and display neighborhood settings
|
416
|
+
print(f"Distance metric: '{for_print_distance_metric}'")
|
417
|
+
print(f"Edge length threshold: {edge_length_threshold}")
|
418
|
+
print(f"Random seed: {random_seed}")
|
419
|
+
|
420
|
+
# Compute neighborhoods based on the network and distance metric
|
421
|
+
neighborhoods = get_network_neighborhoods(
|
422
|
+
network,
|
423
|
+
distance_metric,
|
424
|
+
edge_length_threshold,
|
425
|
+
louvain_resolution=louvain_resolution,
|
426
|
+
random_seed=random_seed,
|
427
|
+
)
|
428
|
+
|
429
|
+
return neighborhoods
|
430
|
+
|
279
431
|
def _define_top_annotations(
|
280
432
|
self,
|
281
433
|
network: nx.Graph,
|
risk/stats/__init__.py
CHANGED
@@ -3,4 +3,7 @@ risk/stats
|
|
3
3
|
~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from .stats import calculate_significance_matrices
|
6
|
+
from .stats import calculate_significance_matrices
|
7
|
+
from .fisher_exact import compute_fisher_exact_test
|
8
|
+
from .hypergeom import compute_hypergeom_test
|
9
|
+
from .permutation import compute_permutation_test
|