risk-network 0.0.3b0__tar.gz → 0.0.3b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/MANIFEST.in +1 -2
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/PKG-INFO +14 -8
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/README.md +12 -6
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/pyproject.toml +2 -2
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/__init__.py +1 -1
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/annotations/annotations.py +9 -9
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/annotations/io.py +62 -49
- risk_network-0.0.3b0/risk/neighborhoods/graph.py → risk_network-0.0.3b2/risk/neighborhoods/community.py +2 -2
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/neighborhoods/neighborhoods.py +1 -1
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/network/io.py +2 -2
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/network/plot.py +33 -42
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/risk.py +1 -8
- {risk_network-0.0.3b0/risk/stats/permutation/_python → risk_network-0.0.3b2/risk/stats}/permutation.py +25 -20
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/stats/stats.py +74 -146
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk_network.egg-info/PKG-INFO +14 -8
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk_network.egg-info/SOURCES.txt +2 -5
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk_network.egg-info/requires.txt +1 -1
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/setup.py +3 -15
- risk_network-0.0.3b0/risk/stats/permutation/__init__.py +0 -15
- risk_network-0.0.3b0/risk/stats/permutation/_cython/permutation.pyx +0 -82
- risk_network-0.0.3b0/risk/stats/permutation/_cython/setup.py +0 -11
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/LICENSE +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/annotations/__init__.py +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/constants.py +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/log/__init__.py +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/log/console.py +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/log/params.py +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/neighborhoods/__init__.py +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/neighborhoods/domains.py +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/network/__init__.py +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/network/geometry.py +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/network/graph.py +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk/stats/__init__.py +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk_network.egg-info/dependency_links.txt +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/risk_network.egg-info/top_level.txt +0 -0
- {risk_network-0.0.3b0 → risk_network-0.0.3b2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: risk-network
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.3b2
|
4
4
|
Summary: A Python package for biological network analysis
|
5
5
|
Author: Ira Horecka
|
6
6
|
Author-email: Ira Horecka <ira89@icloud.com>
|
@@ -694,13 +694,13 @@ Classifier: Development Status :: 4 - Beta
|
|
694
694
|
Requires-Python: >=3.7
|
695
695
|
Description-Content-Type: text/markdown
|
696
696
|
License-File: LICENSE
|
697
|
-
Requires-Dist: cython
|
698
697
|
Requires-Dist: ipywidgets
|
699
698
|
Requires-Dist: markov_clustering
|
700
699
|
Requires-Dist: matplotlib
|
701
700
|
Requires-Dist: networkx
|
702
701
|
Requires-Dist: nltk==3.8.1
|
703
702
|
Requires-Dist: numpy
|
703
|
+
Requires-Dist: openpyxl
|
704
704
|
Requires-Dist: pandas
|
705
705
|
Requires-Dist: python-louvain
|
706
706
|
Requires-Dist: scikit-learn
|
@@ -709,15 +709,21 @@ Requires-Dist: statsmodels
|
|
709
709
|
Requires-Dist: threadpoolctl
|
710
710
|
Requires-Dist: tqdm
|
711
711
|
|
712
|
-
|
713
|
-
|
714
|
-
<
|
712
|
+
<p align="center">
|
713
|
+
<img src="./docs/github/risk-logo-dark.png#gh-dark-mode-only" width="400" />
|
714
|
+
<img src="./docs/github/risk-logo-light.png#gh-light-mode-only" width="400" />
|
715
|
+
</p>
|
715
716
|
|
716
|
-
<p align="
|
717
|
-
|
718
|
-
|
717
|
+
<p align="center">
|
718
|
+
<a href="https://pypi.python.org/pypi/risk-network"><img src="https://img.shields.io/pypi/v/risk-network.svg" alt="pypiv"></a>
|
719
|
+
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.7+-blue.svg" alt="Python 3.7+"></a>
|
720
|
+
<a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
|
719
721
|
</p>
|
720
722
|
|
723
|
+
## RISK
|
724
|
+
|
725
|
+
#### Regional Inference of Significant Kinships
|
726
|
+
|
721
727
|
RISK is a software tool for visualizing spatial relationships in networks. It aims to enhance network analysis by integrating advanced network annotation algorithms, such as Louvain and Markov Clustering, to identify key functional modules and pathways.
|
722
728
|
|
723
729
|
## Features
|
@@ -1,12 +1,18 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
<
|
1
|
+
<p align="center">
|
2
|
+
<img src="./docs/github/risk-logo-dark.png#gh-dark-mode-only" width="400" />
|
3
|
+
<img src="./docs/github/risk-logo-light.png#gh-light-mode-only" width="400" />
|
4
|
+
</p>
|
4
5
|
|
5
|
-
<p align="
|
6
|
-
|
7
|
-
|
6
|
+
<p align="center">
|
7
|
+
<a href="https://pypi.python.org/pypi/risk-network"><img src="https://img.shields.io/pypi/v/risk-network.svg" alt="pypiv"></a>
|
8
|
+
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.7+-blue.svg" alt="Python 3.7+"></a>
|
9
|
+
<a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
|
8
10
|
</p>
|
9
11
|
|
12
|
+
## RISK
|
13
|
+
|
14
|
+
#### Regional Inference of Significant Kinships
|
15
|
+
|
10
16
|
RISK is a software tool for visualizing spatial relationships in networks. It aims to enhance network analysis by integrating advanced network annotation algorithms, such as Louvain and Markov Clustering, to identify key functional modules and pathways.
|
11
17
|
|
12
18
|
## Features
|
@@ -1,5 +1,5 @@
|
|
1
1
|
[build-system]
|
2
|
-
requires = ["setuptools", "wheel", "
|
2
|
+
requires = ["setuptools", "wheel", "numpy"]
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
|
5
5
|
[project]
|
@@ -26,13 +26,13 @@ classifiers = [
|
|
26
26
|
"Development Status :: 4 - Beta",
|
27
27
|
]
|
28
28
|
dependencies = [
|
29
|
-
"cython",
|
30
29
|
"ipywidgets",
|
31
30
|
"markov_clustering",
|
32
31
|
"matplotlib",
|
33
32
|
"networkx",
|
34
33
|
"nltk==3.8.1",
|
35
34
|
"numpy",
|
35
|
+
"openpyxl",
|
36
36
|
"pandas",
|
37
37
|
"python-louvain",
|
38
38
|
"scikit-learn",
|
@@ -139,15 +139,15 @@ def define_top_annotations(
|
|
139
139
|
size_connected_components <= max_cluster_size,
|
140
140
|
)
|
141
141
|
)
|
142
|
-
annotations_enrichment_matrix.loc[
|
143
|
-
|
144
|
-
|
145
|
-
annotations_enrichment_matrix.at[
|
146
|
-
|
147
|
-
|
148
|
-
annotations_enrichment_matrix.loc[
|
149
|
-
|
150
|
-
|
142
|
+
annotations_enrichment_matrix.loc[attribute, "num connected components"] = (
|
143
|
+
num_connected_components
|
144
|
+
)
|
145
|
+
annotations_enrichment_matrix.at[attribute, "size connected components"] = (
|
146
|
+
size_connected_components
|
147
|
+
)
|
148
|
+
annotations_enrichment_matrix.loc[attribute, "num large connected components"] = (
|
149
|
+
num_large_connected_components
|
150
|
+
)
|
151
151
|
|
152
152
|
# Filter out attributes with more than one connected component
|
153
153
|
annotations_enrichment_matrix.loc[
|
@@ -45,66 +45,70 @@ class AnnotationsIO:
|
|
45
45
|
# Process the JSON data and return it in the context of the network
|
46
46
|
return load_annotations(network, annotations_input)
|
47
47
|
|
48
|
-
def
|
48
|
+
def load_excel_annotation(
|
49
49
|
self,
|
50
50
|
filepath: str,
|
51
51
|
network: nx.Graph,
|
52
52
|
label_colname: str = "label",
|
53
53
|
nodes_colname: str = "nodes",
|
54
|
-
|
54
|
+
sheet_name: str = "Sheet1",
|
55
|
+
nodes_delimiter: str = ";",
|
55
56
|
) -> Dict[str, Any]:
|
56
|
-
"""Load annotations from
|
57
|
+
"""Load annotations from an Excel file and associate them with the network.
|
57
58
|
|
58
59
|
Args:
|
59
|
-
filepath (str): Path to the
|
60
|
-
network (
|
61
|
-
label_colname (str): Name of the column containing the labels.
|
62
|
-
nodes_colname (str): Name of the column containing the nodes.
|
63
|
-
|
60
|
+
filepath (str): Path to the Excel annotations file.
|
61
|
+
network (nx.Graph): The NetworkX graph to which the annotations are related.
|
62
|
+
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
63
|
+
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
64
|
+
sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
|
65
|
+
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
64
66
|
|
65
67
|
Returns:
|
66
|
-
|
68
|
+
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
69
|
+
linked to the provided network.
|
67
70
|
"""
|
68
|
-
filetype = "
|
71
|
+
filetype = "Excel"
|
69
72
|
params.log_annotations(filepath=filepath, filetype=filetype)
|
70
73
|
_log_loading(filetype, filepath=filepath)
|
71
|
-
# Load the
|
72
|
-
|
73
|
-
#
|
74
|
-
|
74
|
+
# Load the specified sheet from the Excel file
|
75
|
+
df = pd.read_excel(filepath, sheet_name=sheet_name)
|
76
|
+
# Split the nodes column by the specified nodes_delimiter
|
77
|
+
df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
|
78
|
+
# Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
|
79
|
+
label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
|
80
|
+
return load_annotations(network, label_node_dict)
|
75
81
|
|
76
|
-
def
|
82
|
+
def load_csv_annotation(
|
77
83
|
self,
|
78
84
|
filepath: str,
|
79
85
|
network: nx.Graph,
|
80
86
|
label_colname: str = "label",
|
81
87
|
nodes_colname: str = "nodes",
|
82
|
-
|
83
|
-
delimiter: str = ";",
|
88
|
+
nodes_delimiter: str = ";",
|
84
89
|
) -> Dict[str, Any]:
|
85
|
-
"""Load annotations from
|
90
|
+
"""Load annotations from a CSV file and associate them with the network.
|
86
91
|
|
87
92
|
Args:
|
88
|
-
filepath (str): Path to the
|
89
|
-
network (
|
90
|
-
label_colname (str): Name of the column containing the labels.
|
91
|
-
nodes_colname (str): Name of the column containing the nodes.
|
92
|
-
|
93
|
-
delimiter (str): Delimiter used to parse the nodes column (default is ';').
|
93
|
+
filepath (str): Path to the CSV annotations file.
|
94
|
+
network (nx.Graph): The NetworkX graph to which the annotations are related.
|
95
|
+
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
96
|
+
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
97
|
+
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
94
98
|
|
95
99
|
Returns:
|
96
|
-
|
100
|
+
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
101
|
+
linked to the provided network.
|
97
102
|
"""
|
98
|
-
filetype = "
|
103
|
+
filetype = "CSV"
|
99
104
|
params.log_annotations(filepath=filepath, filetype=filetype)
|
100
105
|
_log_loading(filetype, filepath=filepath)
|
101
|
-
# Load the
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
#
|
106
|
-
|
107
|
-
return load_annotations(network, label_node_dict)
|
106
|
+
# Load the CSV file into a dictionary
|
107
|
+
annotations_input = _load_matrix_file(
|
108
|
+
filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
|
109
|
+
)
|
110
|
+
# Process and return the annotations in the context of the network
|
111
|
+
return load_annotations(network, annotations_input)
|
108
112
|
|
109
113
|
def load_tsv_annotation(
|
110
114
|
self,
|
@@ -112,47 +116,56 @@ class AnnotationsIO:
|
|
112
116
|
network: nx.Graph,
|
113
117
|
label_colname: str = "label",
|
114
118
|
nodes_colname: str = "nodes",
|
119
|
+
nodes_delimiter: str = ";",
|
115
120
|
) -> Dict[str, Any]:
|
116
|
-
"""Load annotations from a TSV file and
|
121
|
+
"""Load annotations from a TSV file and associate them with the network.
|
117
122
|
|
118
123
|
Args:
|
119
124
|
filepath (str): Path to the TSV annotations file.
|
120
|
-
network (
|
121
|
-
label_colname (str): Name of the column containing the labels.
|
122
|
-
nodes_colname (str): Name of the column containing the nodes.
|
125
|
+
network (nx.Graph): The NetworkX graph to which the annotations are related.
|
126
|
+
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
127
|
+
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
128
|
+
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
123
129
|
|
124
130
|
Returns:
|
125
|
-
|
131
|
+
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
132
|
+
linked to the provided network.
|
126
133
|
"""
|
127
134
|
filetype = "TSV"
|
128
135
|
params.log_annotations(filepath=filepath, filetype=filetype)
|
129
136
|
_log_loading(filetype, filepath=filepath)
|
130
|
-
# Load the TSV file
|
137
|
+
# Load the TSV file into a dictionary
|
131
138
|
annotations_input = _load_matrix_file(
|
132
|
-
filepath, label_colname, nodes_colname, delimiter="\t"
|
139
|
+
filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
|
133
140
|
)
|
134
141
|
# Process and return the annotations in the context of the network
|
135
142
|
return load_annotations(network, annotations_input)
|
136
143
|
|
137
144
|
|
138
145
|
def _load_matrix_file(
|
139
|
-
filepath: str,
|
146
|
+
filepath: str,
|
147
|
+
label_colname: str,
|
148
|
+
nodes_colname: str,
|
149
|
+
delimiter: str = ",",
|
150
|
+
nodes_delimiter: str = ";",
|
140
151
|
) -> Dict[str, Any]:
|
141
152
|
"""Load annotations from a CSV or TSV file and convert them to a dictionary.
|
142
153
|
|
143
154
|
Args:
|
144
155
|
filepath (str): Path to the annotation file.
|
145
|
-
label_colname (str): Name of the column containing the labels.
|
146
|
-
nodes_colname (str): Name of the column containing the nodes.
|
147
|
-
delimiter (str): Delimiter used to
|
156
|
+
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
157
|
+
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
158
|
+
delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
|
159
|
+
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
148
160
|
|
149
161
|
Returns:
|
150
|
-
|
162
|
+
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
|
151
163
|
"""
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
164
|
+
# Load the CSV or TSV file into a DataFrame
|
165
|
+
df = pd.read_csv(filepath, delimiter=delimiter)
|
166
|
+
# Split the nodes column by the nodes_delimiter to handle multiple nodes per label
|
167
|
+
df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
|
168
|
+
# Create a dictionary pairing labels with their corresponding list of nodes
|
156
169
|
label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
|
157
170
|
return label_node_dict
|
158
171
|
|
@@ -10,7 +10,7 @@ import networkx as nx
|
|
10
10
|
import numpy as np
|
11
11
|
from sklearn.exceptions import DataConversionWarning
|
12
12
|
|
13
|
-
from risk.neighborhoods.
|
13
|
+
from risk.neighborhoods.community import (
|
14
14
|
calculate_dijkstra_neighborhoods,
|
15
15
|
calculate_label_propagation_neighborhoods,
|
16
16
|
calculate_louvain_neighborhoods,
|
@@ -317,10 +317,10 @@ class NetworkIO:
|
|
317
317
|
print(f"Filetype: {filetype}")
|
318
318
|
if filepath:
|
319
319
|
print(f"Filepath: {filepath}")
|
320
|
-
print(f"
|
320
|
+
print(f"Projection: {'Sphere' if self.compute_sphere else 'Plane'}")
|
321
321
|
if self.compute_sphere:
|
322
322
|
print(f"Surface depth: {self.surface_depth}")
|
323
323
|
print(f"Edge length threshold: {self.edge_length_threshold}")
|
324
|
-
print(f"
|
324
|
+
print(f"Edge weight: {'Included' if self.include_edge_weight else 'Excluded'}")
|
325
325
|
if self.include_edge_weight:
|
326
326
|
print(f"Weight label: {self.weight_label}")
|
@@ -45,21 +45,24 @@ class NetworkPlotter:
|
|
45
45
|
outline_scale (float, optional): Outline scaling factor for the perimeter diameter. Defaults to 1.0.
|
46
46
|
"""
|
47
47
|
self.network_graph = network_graph
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
# Initialize the plot with the specified parameters
|
49
|
+
self.ax = self._initialize_plot(
|
50
|
+
network_graph, figsize, background_color, plot_outline, outline_color, outline_scale
|
51
|
+
)
|
51
52
|
|
52
53
|
def _initialize_plot(
|
53
54
|
self,
|
55
|
+
network_graph: NetworkGraph,
|
54
56
|
figsize: tuple,
|
55
57
|
background_color: str,
|
56
58
|
plot_outline: bool,
|
57
59
|
outline_color: str,
|
58
60
|
outline_scale: float,
|
59
|
-
) ->
|
61
|
+
) -> plt.Axes:
|
60
62
|
"""Set up the plot with figure size, optional circle perimeter, and background color.
|
61
63
|
|
62
64
|
Args:
|
65
|
+
network_graph (NetworkGraph): The network data and attributes to be visualized.
|
63
66
|
figsize (tuple): Size of the figure in inches (width, height).
|
64
67
|
background_color (str): Background color of the plot.
|
65
68
|
plot_outline (bool): Whether to plot the network perimeter circle.
|
@@ -67,10 +70,10 @@ class NetworkPlotter:
|
|
67
70
|
outline_scale (float): Outline scaling factor for the perimeter diameter.
|
68
71
|
|
69
72
|
Returns:
|
70
|
-
|
73
|
+
plt.Axes: The axis object for the plot.
|
71
74
|
"""
|
72
75
|
# Extract node coordinates from the network graph
|
73
|
-
node_coordinates =
|
76
|
+
node_coordinates = network_graph.node_coordinates
|
74
77
|
# Calculate the center and radius of the bounding box around the network
|
75
78
|
center, radius = _calculate_bounding_box(node_coordinates)
|
76
79
|
# Scale the radius by the outline_scale factor
|
@@ -107,9 +110,7 @@ class NetworkPlotter:
|
|
107
110
|
ax.set_yticks([])
|
108
111
|
ax.patch.set_visible(False) # Hide the axis background
|
109
112
|
|
110
|
-
|
111
|
-
self.ax = ax
|
112
|
-
return fig, ax
|
113
|
+
return ax
|
113
114
|
|
114
115
|
def plot_network(
|
115
116
|
self,
|
@@ -436,7 +437,12 @@ class NetworkPlotter:
|
|
436
437
|
arrow_color = self.get_annotated_contour_colors(color=arrow_color)
|
437
438
|
|
438
439
|
# Calculate the center and radius of the network
|
439
|
-
domain_centroids =
|
440
|
+
domain_centroids = {}
|
441
|
+
for domain, nodes in self.network_graph.domain_to_nodes.items():
|
442
|
+
if nodes: # Skip if the domain has no nodes
|
443
|
+
domain_centroids[domain] = self._calculate_domain_centroid(nodes)
|
444
|
+
|
445
|
+
# Calculate the bounding box around the network
|
440
446
|
center, radius = _calculate_bounding_box(
|
441
447
|
self.network_graph.node_coordinates, radius_margin=perimeter_scale
|
442
448
|
)
|
@@ -467,31 +473,26 @@ class NetworkPlotter:
|
|
467
473
|
arrowprops=dict(arrowstyle="->", color=arrow_color[idx], linewidth=arrow_linewidth),
|
468
474
|
)
|
469
475
|
|
470
|
-
def
|
471
|
-
"""Calculate the most centrally located node
|
476
|
+
def _calculate_domain_centroid(self, nodes: list) -> tuple:
|
477
|
+
"""Calculate the most centrally located node in .
|
478
|
+
|
479
|
+
Args:
|
480
|
+
nodes (list): List of node labels to include in the subnetwork.
|
472
481
|
|
473
482
|
Returns:
|
474
|
-
|
483
|
+
tuple: A tuple containing the domain's central node coordinates.
|
475
484
|
"""
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
# Sum the distances for each node to all other nodes in the domain
|
488
|
-
sum_distances = np.sum(distances_matrix, axis=1)
|
489
|
-
# Identify the node with the smallest total distance to others (the centroid)
|
490
|
-
central_node_idx = np.argmin(sum_distances)
|
491
|
-
# Map the domain to the coordinates of its central node
|
492
|
-
domain_central_nodes[domain] = node_positions[central_node_idx]
|
493
|
-
|
494
|
-
return domain_central_nodes
|
485
|
+
# Extract positions of all nodes in the domain
|
486
|
+
node_positions = self.network_graph.node_coordinates[nodes, :]
|
487
|
+
# Calculate the pairwise distance matrix between all nodes in the domain
|
488
|
+
distances_matrix = np.linalg.norm(node_positions[:, np.newaxis] - node_positions, axis=2)
|
489
|
+
# Sum the distances for each node to all other nodes in the domain
|
490
|
+
sum_distances = np.sum(distances_matrix, axis=1)
|
491
|
+
# Identify the node with the smallest total distance to others (the centroid)
|
492
|
+
central_node_idx = np.argmin(sum_distances)
|
493
|
+
# Map the domain to the coordinates of its central node
|
494
|
+
domain_central_node = node_positions[central_node_idx]
|
495
|
+
return domain_central_node
|
495
496
|
|
496
497
|
def get_annotated_node_colors(
|
497
498
|
self, nonenriched_color: str = "white", random_seed: int = 888, **kwargs
|
@@ -604,16 +605,6 @@ class NetworkPlotter:
|
|
604
605
|
|
605
606
|
return np.array(annotated_colors)
|
606
607
|
|
607
|
-
@staticmethod
|
608
|
-
def close(*args, **kwargs) -> None:
|
609
|
-
"""Close the current plot.
|
610
|
-
|
611
|
-
Args:
|
612
|
-
*args: Positional arguments passed to `plt.close`.
|
613
|
-
**kwargs: Keyword arguments passed to `plt.close`.
|
614
|
-
"""
|
615
|
-
plt.close(*args, **kwargs)
|
616
|
-
|
617
608
|
@staticmethod
|
618
609
|
def savefig(*args, **kwargs) -> None:
|
619
610
|
"""Save the current plot to a file.
|
@@ -98,7 +98,6 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
98
98
|
score_metric: str = "sum",
|
99
99
|
null_distribution: str = "network",
|
100
100
|
num_permutations: int = 1000,
|
101
|
-
use_cython=True,
|
102
101
|
random_seed: int = 888,
|
103
102
|
max_workers: int = 1,
|
104
103
|
) -> Dict[str, Any]:
|
@@ -122,7 +121,6 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
122
121
|
score_metric=score_metric,
|
123
122
|
null_distribution=null_distribution,
|
124
123
|
num_permutations=num_permutations,
|
125
|
-
use_cython=use_cython,
|
126
124
|
random_seed=random_seed,
|
127
125
|
max_workers=max_workers,
|
128
126
|
)
|
@@ -153,7 +151,6 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
153
151
|
score_metric=score_metric,
|
154
152
|
null_distribution=null_distribution,
|
155
153
|
num_permutations=num_permutations,
|
156
|
-
use_cython=use_cython,
|
157
154
|
random_seed=random_seed,
|
158
155
|
max_workers=max_workers,
|
159
156
|
)
|
@@ -167,7 +164,6 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
167
164
|
neighborhoods: Dict[str, Any],
|
168
165
|
tail: str = "right", # OPTIONS: "right" (enrichment), "left" (depletion), "both"
|
169
166
|
pval_cutoff: float = 0.01, # OPTIONS: Any value between 0 to 1
|
170
|
-
apply_fdr: bool = False,
|
171
167
|
fdr_cutoff: float = 0.9999, # OPTIONS: Any value between 0 to 1
|
172
168
|
impute_depth: int = 1,
|
173
169
|
prune_threshold: float = 0.0,
|
@@ -185,7 +181,6 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
185
181
|
neighborhoods (dict): Neighborhood enrichment data.
|
186
182
|
tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
|
187
183
|
pval_cutoff (float, optional): P-value cutoff for significance. Defaults to 0.01.
|
188
|
-
apply_fdr (bool, optional): Whether to apply FDR correction. Defaults to False.
|
189
184
|
fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
|
190
185
|
impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
|
191
186
|
prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
|
@@ -203,7 +198,6 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
203
198
|
params.log_graph(
|
204
199
|
tail=tail,
|
205
200
|
pval_cutoff=pval_cutoff,
|
206
|
-
apply_fdr=apply_fdr,
|
207
201
|
fdr_cutoff=fdr_cutoff,
|
208
202
|
impute_depth=impute_depth,
|
209
203
|
prune_threshold=prune_threshold,
|
@@ -215,7 +209,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
215
209
|
)
|
216
210
|
|
217
211
|
print(f"P-value cutoff: {pval_cutoff}")
|
218
|
-
print(f"FDR cutoff: {
|
212
|
+
print(f"FDR BH cutoff: {fdr_cutoff}")
|
219
213
|
print(
|
220
214
|
f"Significance tail: '{tail}' ({'enrichment' if tail == 'right' else 'depletion' if tail == 'left' else 'both'})"
|
221
215
|
)
|
@@ -225,7 +219,6 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
225
219
|
neighborhoods["enrichment_pvals"],
|
226
220
|
tail=tail,
|
227
221
|
pval_cutoff=pval_cutoff,
|
228
|
-
apply_fdr=apply_fdr,
|
229
222
|
fdr_cutoff=fdr_cutoff,
|
230
223
|
)
|
231
224
|
|
@@ -1,12 +1,15 @@
|
|
1
1
|
"""
|
2
|
-
risk/stats/permutation
|
3
|
-
|
2
|
+
risk/stats/permutation
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
import numpy as np
|
7
7
|
|
8
|
+
# Note: Cython optimizations provided minimal performance benefits.
|
9
|
+
# The final version with Cython is archived in the `cython_permutation` branch.
|
8
10
|
|
9
|
-
|
11
|
+
|
12
|
+
def compute_neighborhood_score_by_sum(
|
10
13
|
neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
|
11
14
|
) -> np.ndarray:
|
12
15
|
"""Compute the sum of attribute values for each neighborhood.
|
@@ -18,12 +21,12 @@ def compute_neighborhood_score_by_sum_python(
|
|
18
21
|
Returns:
|
19
22
|
np.ndarray: Sum of attribute values for each neighborhood.
|
20
23
|
"""
|
21
|
-
#
|
24
|
+
# Calculate the neighborhood score as the dot product of neighborhoods and annotations
|
22
25
|
neighborhood_score = np.dot(neighborhoods_matrix, annotation_matrix)
|
23
26
|
return neighborhood_score
|
24
27
|
|
25
28
|
|
26
|
-
def
|
29
|
+
def compute_neighborhood_score_by_stdev(
|
27
30
|
neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
|
28
31
|
) -> np.ndarray:
|
29
32
|
"""Compute the standard deviation of neighborhood scores.
|
@@ -37,21 +40,20 @@ def compute_neighborhood_score_by_stdev_python(
|
|
37
40
|
"""
|
38
41
|
# Calculate the neighborhood score as the dot product of neighborhoods and annotations
|
39
42
|
neighborhood_score = np.dot(neighborhoods_matrix, annotation_matrix)
|
40
|
-
# Calculate the number of elements in each neighborhood
|
43
|
+
# Calculate the number of elements in each neighborhood
|
41
44
|
N = np.sum(neighborhoods_matrix, axis=1)
|
42
|
-
N_reshaped = N[:, None]
|
43
45
|
# Compute the mean of the neighborhood scores
|
44
|
-
M = neighborhood_score /
|
45
|
-
# Compute the mean of squares (EXX)
|
46
|
-
EXX = np.dot(neighborhoods_matrix,
|
46
|
+
M = neighborhood_score / N[:, None]
|
47
|
+
# Compute the mean of squares (EXX) directly using squared annotation matrix
|
48
|
+
EXX = np.dot(neighborhoods_matrix, annotation_matrix**2) / N[:, None]
|
47
49
|
# Calculate variance as EXX - M^2
|
48
|
-
variance = EXX -
|
50
|
+
variance = EXX - M**2
|
49
51
|
# Compute the standard deviation as the square root of the variance
|
50
52
|
stdev = np.sqrt(variance)
|
51
53
|
return stdev
|
52
54
|
|
53
55
|
|
54
|
-
def
|
56
|
+
def compute_neighborhood_score_by_z_score(
|
55
57
|
neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
|
56
58
|
) -> np.ndarray:
|
57
59
|
"""Compute Z-scores for neighborhood scores.
|
@@ -66,18 +68,21 @@ def compute_neighborhood_score_by_z_score_python(
|
|
66
68
|
# Calculate the neighborhood score as the dot product of neighborhoods and annotations
|
67
69
|
neighborhood_score = np.dot(neighborhoods_matrix, annotation_matrix)
|
68
70
|
# Calculate the number of elements in each neighborhood
|
69
|
-
N = np.dot(
|
71
|
+
N = np.dot(
|
72
|
+
neighborhoods_matrix, np.ones(annotation_matrix.shape[1], dtype=annotation_matrix.dtype)
|
73
|
+
)
|
70
74
|
# Compute the mean of the neighborhood scores
|
71
75
|
M = neighborhood_score / N
|
72
|
-
# Compute the mean of squares (EXX)
|
73
|
-
EXX = np.dot(neighborhoods_matrix,
|
74
|
-
EEX = np.power(M, 2)
|
76
|
+
# Compute the mean of squares (EXX)
|
77
|
+
EXX = np.dot(neighborhoods_matrix, annotation_matrix**2) / N
|
75
78
|
# Calculate the standard deviation for each neighborhood
|
76
|
-
|
79
|
+
variance = EXX - M**2
|
80
|
+
std = np.sqrt(variance)
|
77
81
|
# Calculate Z-scores, handling cases where std is 0 or N is less than 3
|
78
82
|
with np.errstate(divide="ignore", invalid="ignore"):
|
79
|
-
z_scores =
|
80
|
-
z_scores[std == 0
|
81
|
-
|
83
|
+
z_scores = M / std
|
84
|
+
z_scores[(std == 0) | (N < 3)] = (
|
85
|
+
np.nan
|
86
|
+
) # Handle division by zero and apply minimum threshold
|
82
87
|
|
83
88
|
return z_scores
|