risk-network 0.0.8b26__py3-none-any.whl → 0.0.9b26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +2 -2
- risk/annotations/__init__.py +2 -2
- risk/annotations/annotations.py +74 -47
- risk/annotations/io.py +47 -31
- risk/log/__init__.py +4 -2
- risk/log/{config.py → console.py} +5 -3
- risk/log/{params.py → parameters.py} +17 -42
- risk/neighborhoods/__init__.py +3 -5
- risk/neighborhoods/api.py +446 -0
- risk/neighborhoods/community.py +255 -77
- risk/neighborhoods/domains.py +62 -31
- risk/neighborhoods/neighborhoods.py +156 -160
- risk/network/__init__.py +1 -3
- risk/network/geometry.py +65 -57
- risk/network/graph/__init__.py +6 -0
- risk/network/graph/api.py +194 -0
- risk/network/{graph.py → graph/network.py} +87 -37
- risk/network/graph/summary.py +254 -0
- risk/network/io.py +56 -47
- risk/network/plotter/__init__.py +6 -0
- risk/network/plotter/api.py +54 -0
- risk/network/{plot → plotter}/canvas.py +7 -4
- risk/network/{plot → plotter}/contour.py +22 -19
- risk/network/{plot → plotter}/labels.py +69 -74
- risk/network/{plot → plotter}/network.py +170 -34
- risk/network/{plot/utils/color.py → plotter/utils/colors.py} +104 -112
- risk/network/{plot → plotter}/utils/layout.py +8 -5
- risk/risk.py +11 -500
- risk/stats/__init__.py +8 -4
- risk/stats/binom.py +51 -0
- risk/stats/chi2.py +69 -0
- risk/stats/hypergeom.py +27 -17
- risk/stats/permutation/__init__.py +1 -1
- risk/stats/permutation/permutation.py +44 -38
- risk/stats/permutation/test_functions.py +25 -17
- risk/stats/poisson.py +15 -9
- risk/stats/stats.py +15 -13
- risk/stats/zscore.py +68 -0
- {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
- risk_network-0.0.9b26.dist-info/RECORD +44 -0
- {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
- risk/network/plot/__init__.py +0 -6
- risk/network/plot/plotter.py +0 -137
- risk_network-0.0.8b26.dist-info/RECORD +0 -37
- {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
- {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0
risk/network/geometry.py
CHANGED
@@ -27,55 +27,50 @@ def assign_edge_lengths(
|
|
27
27
|
nx.Graph: The graph with applied edge lengths.
|
28
28
|
"""
|
29
29
|
|
30
|
-
def
|
31
|
-
|
32
|
-
|
33
|
-
"""Compute the distance between two coordinate vectors.
|
34
|
-
|
35
|
-
Args:
|
36
|
-
u_coords (np.ndarray): Coordinates of the first point.
|
37
|
-
v_coords (np.ndarray): Coordinates of the second point.
|
38
|
-
is_sphere (bool, optional): If True, compute spherical distance. Defaults to False.
|
39
|
-
|
40
|
-
Returns:
|
41
|
-
float: The computed distance between the two points.
|
42
|
-
"""
|
30
|
+
def compute_distance_vectorized(coords, is_sphere):
|
31
|
+
"""Compute distances between pairs of coordinates."""
|
32
|
+
u_coords, v_coords = coords[:, 0, :], coords[:, 1, :]
|
43
33
|
if is_sphere:
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
34
|
+
u_norm = np.linalg.norm(u_coords, axis=1, keepdims=True)
|
35
|
+
v_norm = np.linalg.norm(v_coords, axis=1, keepdims=True)
|
36
|
+
u_coords /= u_norm
|
37
|
+
v_coords /= v_norm
|
38
|
+
dot_products = np.einsum("ij,ij->i", u_coords, v_coords)
|
39
|
+
return np.arccos(np.clip(dot_products, -1.0, 1.0))
|
40
|
+
|
41
|
+
return np.linalg.norm(u_coords - v_coords, axis=1)
|
51
42
|
|
52
|
-
# Normalize graph coordinates
|
43
|
+
# Normalize graph coordinates and weights
|
53
44
|
_normalize_graph_coordinates(G)
|
54
|
-
# Normalize weights
|
55
45
|
_normalize_weights(G)
|
56
|
-
#
|
46
|
+
# Map nodes to sphere and adjust depth if required
|
57
47
|
if compute_sphere:
|
58
|
-
# Map to sphere and adjust depth
|
59
48
|
_map_to_sphere(G)
|
60
49
|
G_depth = _create_depth(copy.deepcopy(G), surface_depth=surface_depth)
|
61
50
|
else:
|
62
|
-
# Calculate edge lengths directly on the plane
|
63
51
|
G_depth = copy.deepcopy(G)
|
64
52
|
|
65
|
-
|
53
|
+
# Precompute edge coordinate arrays for vectorized computation
|
54
|
+
edge_data = []
|
55
|
+
for u, v in G_depth.edges:
|
66
56
|
u_coords = np.array([G_depth.nodes[u]["x"], G_depth.nodes[u]["y"]])
|
67
57
|
v_coords = np.array([G_depth.nodes[v]["x"], G_depth.nodes[v]["y"]])
|
68
58
|
if compute_sphere:
|
69
59
|
u_coords = np.append(u_coords, G_depth.nodes[u].get("z", 0))
|
70
60
|
v_coords = np.append(v_coords, G_depth.nodes[v].get("z", 0))
|
71
|
-
|
72
|
-
|
73
|
-
|
61
|
+
edge_data.append([u_coords, v_coords, (u, v)])
|
62
|
+
|
63
|
+
# Convert to numpy for faster processing
|
64
|
+
edge_coords = np.array([(e[0], e[1]) for e in edge_data])
|
65
|
+
edge_indices = [e[2] for e in edge_data]
|
66
|
+
# Compute distances in bulk
|
67
|
+
distances = compute_distance_vectorized(edge_coords, compute_sphere)
|
68
|
+
# Assign distances back to the graph
|
69
|
+
for (u, v), distance in zip(edge_indices, distances):
|
74
70
|
if include_edge_weight:
|
75
|
-
|
76
|
-
G.edges[u, v]["length"] = distance / np.sqrt(
|
71
|
+
weight = G.edges[u, v].get("normalized_weight", 0) + 1e-6
|
72
|
+
G.edges[u, v]["length"] = distance / np.sqrt(weight)
|
77
73
|
else:
|
78
|
-
# Use calculated distance directly
|
79
74
|
G.edges[u, v]["length"] = distance
|
80
75
|
|
81
76
|
return G
|
@@ -87,23 +82,23 @@ def _map_to_sphere(G: nx.Graph) -> None:
|
|
87
82
|
Args:
|
88
83
|
G (nx.Graph): The input graph with nodes having 'x' and 'y' coordinates.
|
89
84
|
"""
|
90
|
-
# Extract x, y coordinates
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
85
|
+
# Extract x, y coordinates as a NumPy array
|
86
|
+
nodes = list(G.nodes)
|
87
|
+
xy_coords = np.array([[G.nodes[node]["x"], G.nodes[node]["y"]] for node in nodes])
|
88
|
+
# Normalize coordinates between [0, 1]
|
89
|
+
min_vals = xy_coords.min(axis=0)
|
90
|
+
max_vals = xy_coords.max(axis=0)
|
95
91
|
normalized_xy = (xy_coords - min_vals) / (max_vals - min_vals)
|
96
|
-
#
|
92
|
+
# Convert normalized coordinates to spherical coordinates
|
97
93
|
theta = normalized_xy[:, 0] * np.pi * 2
|
98
94
|
phi = normalized_xy[:, 1] * np.pi
|
99
|
-
#
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
G.nodes[node]["z"] = z
|
95
|
+
# Compute 3D Cartesian coordinates
|
96
|
+
x = np.sin(phi) * np.cos(theta)
|
97
|
+
y = np.sin(phi) * np.sin(theta)
|
98
|
+
z = np.cos(phi)
|
99
|
+
# Assign coordinates back to graph nodes in bulk
|
100
|
+
xyz_coords = {node: {"x": x[i], "y": y[i], "z": z[i]} for i, node in enumerate(nodes)}
|
101
|
+
nx.set_node_attributes(G, xyz_coords)
|
107
102
|
|
108
103
|
|
109
104
|
def _normalize_graph_coordinates(G: nx.Graph) -> None:
|
@@ -151,18 +146,31 @@ def _create_depth(G: nx.Graph, surface_depth: float = 0.0) -> nx.Graph:
|
|
151
146
|
nx.Graph: The graph with adjusted 'z' attribute for each node.
|
152
147
|
"""
|
153
148
|
if surface_depth >= 1.0:
|
154
|
-
surface_depth
|
155
|
-
|
156
|
-
# Compute subclusters as connected components
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
#
|
161
|
-
|
162
|
-
|
149
|
+
surface_depth -= 1e-6 # Cap the surface depth to prevent a value of 1.0
|
150
|
+
|
151
|
+
# Compute subclusters as connected components
|
152
|
+
connected_components = list(nx.connected_components(G))
|
153
|
+
subcluster_strengths = {}
|
154
|
+
max_strength = 0
|
155
|
+
# Precompute strengths and track the maximum strength
|
156
|
+
for component in connected_components:
|
157
|
+
size = len(component)
|
158
|
+
max_strength = max(max_strength, size)
|
159
|
+
for node in component:
|
160
|
+
subcluster_strengths[node] = size
|
161
|
+
|
162
|
+
# Avoid repeated lookups and computations by pre-fetching node data
|
163
|
+
nodes = list(G.nodes(data=True))
|
164
|
+
node_updates = {}
|
165
|
+
for node, attrs in nodes:
|
166
|
+
strength = subcluster_strengths[node]
|
163
167
|
normalized_surface_depth = (strength / max_strength) * surface_depth
|
164
|
-
x, y, z =
|
168
|
+
x, y, z = attrs["x"], attrs["y"], attrs["z"]
|
165
169
|
norm = np.sqrt(x**2 + y**2 + z**2)
|
166
|
-
|
170
|
+
adjusted_z = z - (z / norm) * normalized_surface_depth
|
171
|
+
node_updates[node] = {"z": adjusted_z}
|
172
|
+
|
173
|
+
# Batch update node attributes
|
174
|
+
nx.set_node_attributes(G, node_updates)
|
167
175
|
|
168
176
|
return G
|
@@ -0,0 +1,194 @@
|
|
1
|
+
"""
|
2
|
+
risk/network/graph/api
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
import copy
|
7
|
+
from typing import Any, Dict
|
8
|
+
|
9
|
+
import networkx as nx
|
10
|
+
import pandas as pd
|
11
|
+
|
12
|
+
from risk.annotations import define_top_annotations
|
13
|
+
from risk.log import logger, log_header, params
|
14
|
+
from risk.neighborhoods import (
|
15
|
+
define_domains,
|
16
|
+
process_neighborhoods,
|
17
|
+
trim_domains,
|
18
|
+
)
|
19
|
+
from risk.network.graph.network import NetworkGraph
|
20
|
+
from risk.stats import calculate_significance_matrices
|
21
|
+
|
22
|
+
|
23
|
+
class GraphAPI:
|
24
|
+
"""Handles the loading of network graphs and associated data.
|
25
|
+
|
26
|
+
The GraphAPI class provides methods to load and process network graphs, annotations, and neighborhoods.
|
27
|
+
"""
|
28
|
+
|
29
|
+
def __init__() -> None:
|
30
|
+
pass
|
31
|
+
|
32
|
+
def load_graph(
|
33
|
+
self,
|
34
|
+
network: nx.Graph,
|
35
|
+
annotations: Dict[str, Any],
|
36
|
+
neighborhoods: Dict[str, Any],
|
37
|
+
tail: str = "right",
|
38
|
+
pval_cutoff: float = 0.01,
|
39
|
+
fdr_cutoff: float = 0.9999,
|
40
|
+
impute_depth: int = 0,
|
41
|
+
prune_threshold: float = 0.0,
|
42
|
+
linkage_criterion: str = "distance",
|
43
|
+
linkage_method: str = "average",
|
44
|
+
linkage_metric: str = "yule",
|
45
|
+
min_cluster_size: int = 5,
|
46
|
+
max_cluster_size: int = 1000,
|
47
|
+
) -> NetworkGraph:
|
48
|
+
"""Load and process the network graph, defining top annotations and domains.
|
49
|
+
|
50
|
+
Args:
|
51
|
+
network (nx.Graph): The network graph.
|
52
|
+
annotations (Dict[str, Any]): The annotations associated with the network.
|
53
|
+
neighborhoods (Dict[str, Any]): Neighborhood significance data.
|
54
|
+
tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
|
55
|
+
pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
|
56
|
+
fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
|
57
|
+
impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
|
58
|
+
prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
|
59
|
+
linkage_criterion (str, optional): Clustering criterion for defining domains. Defaults to "distance".
|
60
|
+
linkage_method (str, optional): Clustering method to use. Defaults to "average".
|
61
|
+
linkage_metric (str, optional): Metric to use for calculating distances. Defaults to "yule".
|
62
|
+
min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
|
63
|
+
max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
NetworkGraph: A fully initialized and processed NetworkGraph object.
|
67
|
+
"""
|
68
|
+
# Log the parameters and display headers
|
69
|
+
log_header("Finding significant neighborhoods")
|
70
|
+
params.log_graph(
|
71
|
+
tail=tail,
|
72
|
+
pval_cutoff=pval_cutoff,
|
73
|
+
fdr_cutoff=fdr_cutoff,
|
74
|
+
impute_depth=impute_depth,
|
75
|
+
prune_threshold=prune_threshold,
|
76
|
+
linkage_criterion=linkage_criterion,
|
77
|
+
linkage_method=linkage_method,
|
78
|
+
linkage_metric=linkage_metric,
|
79
|
+
min_cluster_size=min_cluster_size,
|
80
|
+
max_cluster_size=max_cluster_size,
|
81
|
+
)
|
82
|
+
|
83
|
+
# Make a copy of the network to avoid modifying the original
|
84
|
+
network = copy.deepcopy(network)
|
85
|
+
|
86
|
+
logger.debug(f"p-value cutoff: {pval_cutoff}")
|
87
|
+
logger.debug(f"FDR BH cutoff: {fdr_cutoff}")
|
88
|
+
logger.debug(
|
89
|
+
f"Significance tail: '{tail}' ({'enrichment' if tail == 'right' else 'depletion' if tail == 'left' else 'both'})"
|
90
|
+
)
|
91
|
+
# Calculate significant neighborhoods based on the provided parameters
|
92
|
+
significant_neighborhoods = calculate_significance_matrices(
|
93
|
+
neighborhoods["depletion_pvals"],
|
94
|
+
neighborhoods["enrichment_pvals"],
|
95
|
+
tail=tail,
|
96
|
+
pval_cutoff=pval_cutoff,
|
97
|
+
fdr_cutoff=fdr_cutoff,
|
98
|
+
)
|
99
|
+
|
100
|
+
log_header("Processing neighborhoods")
|
101
|
+
# Process neighborhoods by imputing and pruning based on the given settings
|
102
|
+
processed_neighborhoods = process_neighborhoods(
|
103
|
+
network=network,
|
104
|
+
neighborhoods=significant_neighborhoods,
|
105
|
+
impute_depth=impute_depth,
|
106
|
+
prune_threshold=prune_threshold,
|
107
|
+
)
|
108
|
+
|
109
|
+
log_header("Finding top annotations")
|
110
|
+
logger.debug(f"Min cluster size: {min_cluster_size}")
|
111
|
+
logger.debug(f"Max cluster size: {max_cluster_size}")
|
112
|
+
# Define top annotations based on processed neighborhoods
|
113
|
+
top_annotations = self._define_top_annotations(
|
114
|
+
network=network,
|
115
|
+
annotations=annotations,
|
116
|
+
neighborhoods=processed_neighborhoods,
|
117
|
+
min_cluster_size=min_cluster_size,
|
118
|
+
max_cluster_size=max_cluster_size,
|
119
|
+
)
|
120
|
+
|
121
|
+
log_header("Optimizing distance threshold for domains")
|
122
|
+
# Extract the significant significance matrix from the neighborhoods data
|
123
|
+
significant_neighborhoods_significance = processed_neighborhoods[
|
124
|
+
"significant_significance_matrix"
|
125
|
+
]
|
126
|
+
# Define domains in the network using the specified clustering settings
|
127
|
+
domains = define_domains(
|
128
|
+
top_annotations=top_annotations,
|
129
|
+
significant_neighborhoods_significance=significant_neighborhoods_significance,
|
130
|
+
linkage_criterion=linkage_criterion,
|
131
|
+
linkage_method=linkage_method,
|
132
|
+
linkage_metric=linkage_metric,
|
133
|
+
)
|
134
|
+
# Trim domains and top annotations based on cluster size constraints
|
135
|
+
domains, trimmed_domains = trim_domains(
|
136
|
+
domains=domains,
|
137
|
+
top_annotations=top_annotations,
|
138
|
+
min_cluster_size=min_cluster_size,
|
139
|
+
max_cluster_size=max_cluster_size,
|
140
|
+
)
|
141
|
+
|
142
|
+
# Prepare node mapping and significance sums for the final NetworkGraph object
|
143
|
+
ordered_nodes = annotations["ordered_nodes"]
|
144
|
+
node_label_to_id = dict(zip(ordered_nodes, range(len(ordered_nodes))))
|
145
|
+
node_significance_sums = processed_neighborhoods["node_significance_sums"]
|
146
|
+
|
147
|
+
# Return the fully initialized NetworkGraph object
|
148
|
+
return NetworkGraph(
|
149
|
+
network=network,
|
150
|
+
annotations=annotations,
|
151
|
+
neighborhoods=neighborhoods,
|
152
|
+
domains=domains,
|
153
|
+
trimmed_domains=trimmed_domains,
|
154
|
+
node_label_to_node_id_map=node_label_to_id,
|
155
|
+
node_significance_sums=node_significance_sums,
|
156
|
+
)
|
157
|
+
|
158
|
+
def _define_top_annotations(
|
159
|
+
self,
|
160
|
+
network: nx.Graph,
|
161
|
+
annotations: Dict[str, Any],
|
162
|
+
neighborhoods: Dict[str, Any],
|
163
|
+
min_cluster_size: int = 5,
|
164
|
+
max_cluster_size: int = 1000,
|
165
|
+
) -> pd.DataFrame:
|
166
|
+
"""Define top annotations for the network.
|
167
|
+
|
168
|
+
Args:
|
169
|
+
network (nx.Graph): The network graph.
|
170
|
+
annotations (Dict[str, Any]): Annotations data for the network.
|
171
|
+
neighborhoods (Dict[str, Any]): Neighborhood significance data.
|
172
|
+
min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
|
173
|
+
max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
|
174
|
+
|
175
|
+
Returns:
|
176
|
+
Dict[str, Any]: Top annotations identified within the network.
|
177
|
+
"""
|
178
|
+
# Extract necessary data from annotations and neighborhoods
|
179
|
+
ordered_annotations = annotations["ordered_annotations"]
|
180
|
+
neighborhood_significance_sums = neighborhoods["neighborhood_significance_counts"]
|
181
|
+
significant_significance_matrix = neighborhoods["significant_significance_matrix"]
|
182
|
+
significant_binary_significance_matrix = neighborhoods[
|
183
|
+
"significant_binary_significance_matrix"
|
184
|
+
]
|
185
|
+
# Call external function to define top annotations
|
186
|
+
return define_top_annotations(
|
187
|
+
network=network,
|
188
|
+
ordered_annotation_labels=ordered_annotations,
|
189
|
+
neighborhood_significance_sums=neighborhood_significance_sums,
|
190
|
+
significant_significance_matrix=significant_significance_matrix,
|
191
|
+
significant_binary_significance_matrix=significant_binary_significance_matrix,
|
192
|
+
min_cluster_size=min_cluster_size,
|
193
|
+
max_cluster_size=max_cluster_size,
|
194
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
"""
|
2
|
-
risk/network/graph
|
3
|
-
|
2
|
+
risk/network/graph/network
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
from collections import defaultdict
|
@@ -10,60 +10,93 @@ import networkx as nx
|
|
10
10
|
import numpy as np
|
11
11
|
import pandas as pd
|
12
12
|
|
13
|
+
from risk.network.graph.summary import AnalysisSummary
|
14
|
+
|
13
15
|
|
14
16
|
class NetworkGraph:
|
15
17
|
"""A class to represent a network graph and process its nodes and edges.
|
16
18
|
|
17
19
|
The NetworkGraph class provides functionality to handle and manipulate a network graph,
|
18
|
-
including managing domains, annotations, and node
|
20
|
+
including managing domains, annotations, and node significance data. It also includes methods
|
19
21
|
for transforming and mapping graph coordinates, as well as generating colors based on node
|
20
|
-
|
22
|
+
significance.
|
21
23
|
"""
|
22
24
|
|
23
25
|
def __init__(
|
24
26
|
self,
|
25
27
|
network: nx.Graph,
|
26
|
-
|
28
|
+
annotations: Dict[str, Any],
|
29
|
+
neighborhoods: Dict[str, Any],
|
27
30
|
domains: pd.DataFrame,
|
28
31
|
trimmed_domains: pd.DataFrame,
|
29
32
|
node_label_to_node_id_map: Dict[str, Any],
|
30
|
-
|
33
|
+
node_significance_sums: np.ndarray,
|
31
34
|
):
|
32
35
|
"""Initialize the NetworkGraph object.
|
33
36
|
|
34
37
|
Args:
|
35
38
|
network (nx.Graph): The network graph.
|
36
|
-
|
39
|
+
annotations (Dict[str, Any]): The annotations associated with the network.
|
40
|
+
neighborhoods (Dict[str, Any]): Neighborhood significance data.
|
37
41
|
domains (pd.DataFrame): DataFrame containing domain data for the network nodes.
|
38
42
|
trimmed_domains (pd.DataFrame): DataFrame containing trimmed domain data for the network nodes.
|
39
43
|
node_label_to_node_id_map (Dict[str, Any]): A dictionary mapping node labels to their corresponding IDs.
|
40
|
-
|
44
|
+
node_significance_sums (np.ndarray): Array containing the significant sums for the nodes.
|
41
45
|
"""
|
42
|
-
self.
|
46
|
+
# Initialize self.network downstream of the other attributes
|
47
|
+
# All public attributes can be accessed after initialization
|
43
48
|
self.domain_id_to_node_ids_map = self._create_domain_id_to_node_ids_map(domains)
|
44
|
-
self.domains = domains
|
45
49
|
self.domain_id_to_domain_terms_map = self._create_domain_id_to_domain_terms_map(
|
46
50
|
trimmed_domains
|
47
51
|
)
|
48
52
|
self.domain_id_to_domain_info_map = self._create_domain_id_to_domain_info_map(
|
49
53
|
trimmed_domains
|
50
54
|
)
|
51
|
-
self.
|
52
|
-
|
53
|
-
self.node_id_to_domain_ids_and_enrichments_map = (
|
54
|
-
self._create_node_id_to_domain_ids_and_enrichments(domains)
|
55
|
+
self.node_id_to_domain_ids_and_significance_map = (
|
56
|
+
self._create_node_id_to_domain_ids_and_significances(domains)
|
55
57
|
)
|
56
58
|
self.node_id_to_node_label_map = {v: k for k, v in node_label_to_node_id_map.items()}
|
57
|
-
self.
|
58
|
-
zip(node_label_to_node_id_map.keys(),
|
59
|
+
self.node_label_to_significance_map = dict(
|
60
|
+
zip(node_label_to_node_id_map.keys(), node_significance_sums)
|
59
61
|
)
|
62
|
+
self.node_significance_sums = node_significance_sums
|
60
63
|
self.node_label_to_node_id_map = node_label_to_node_id_map
|
64
|
+
|
61
65
|
# NOTE: Below this point, instance attributes (i.e., self) will be used!
|
62
66
|
self.domain_id_to_node_labels_map = self._create_domain_id_to_node_labels_map()
|
63
67
|
# Unfold the network's 3D coordinates to 2D and extract node coordinates
|
64
68
|
self.network = _unfold_sphere_to_plane(network)
|
65
69
|
self.node_coordinates = _extract_node_coordinates(self.network)
|
66
70
|
|
71
|
+
# NOTE: Only after the above attributes are initialized, we can create the summary
|
72
|
+
self.summary = AnalysisSummary(annotations, neighborhoods, self)
|
73
|
+
|
74
|
+
def pop(self, domain_id: str) -> None:
|
75
|
+
"""Remove domain ID from instance domain ID mappings. This can be useful for cleaning up
|
76
|
+
domain-specific mappings based on a given criterion, as domain attributes are stored and
|
77
|
+
accessed only in dictionaries modified by this method.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
key (str): The domain ID key to be removed from each mapping.
|
81
|
+
"""
|
82
|
+
# Define the domain mappings to be updated
|
83
|
+
domain_mappings = [
|
84
|
+
self.domain_id_to_node_ids_map,
|
85
|
+
self.domain_id_to_domain_terms_map,
|
86
|
+
self.domain_id_to_domain_info_map,
|
87
|
+
self.domain_id_to_node_labels_map,
|
88
|
+
]
|
89
|
+
# Remove the specified domain_id key from each mapping if it exists
|
90
|
+
for mapping in domain_mappings:
|
91
|
+
if domain_id in mapping:
|
92
|
+
mapping.pop(domain_id)
|
93
|
+
|
94
|
+
# Remove the domain_id from the node_id_to_domain_ids_and_significance_map
|
95
|
+
for _, domain_info in self.node_id_to_domain_ids_and_significance_map.items():
|
96
|
+
if domain_id in domain_info["domains"]:
|
97
|
+
domain_info["domains"].remove(domain_id)
|
98
|
+
domain_info["significances"].pop(domain_id)
|
99
|
+
|
67
100
|
@staticmethod
|
68
101
|
def _create_domain_id_to_node_ids_map(domains: pd.DataFrame) -> Dict[int, Any]:
|
69
102
|
"""Create a mapping from domains to the list of node IDs belonging to each domain.
|
@@ -103,25 +136,42 @@ class NetworkGraph:
|
|
103
136
|
def _create_domain_id_to_domain_info_map(
|
104
137
|
trimmed_domains: pd.DataFrame,
|
105
138
|
) -> Dict[int, Dict[str, Any]]:
|
106
|
-
"""Create a mapping from domain IDs to their corresponding full description and
|
139
|
+
"""Create a mapping from domain IDs to their corresponding full description and significance score,
|
140
|
+
with scores sorted in descending order.
|
107
141
|
|
108
142
|
Args:
|
109
|
-
trimmed_domains (pd.DataFrame): DataFrame containing domain IDs, full descriptions, and
|
143
|
+
trimmed_domains (pd.DataFrame): DataFrame containing domain IDs, full descriptions, and significance scores.
|
110
144
|
|
111
145
|
Returns:
|
112
|
-
Dict[int, Dict[str, Any]]: A dictionary mapping domain IDs (int) to a dictionary with 'full_descriptions' and
|
146
|
+
Dict[int, Dict[str, Any]]: A dictionary mapping domain IDs (int) to a dictionary with 'full_descriptions' and
|
147
|
+
'significance_scores', both sorted by significance score in descending order.
|
113
148
|
"""
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
149
|
+
# Initialize an empty dictionary to store full descriptions and significance scores of domains
|
150
|
+
domain_info_map = {}
|
151
|
+
# Domain IDs are the index of the DataFrame (it's common for some IDs to be missing)
|
152
|
+
for domain_id in trimmed_domains.index:
|
153
|
+
# Sort full_descriptions and significance_scores by significance_scores in descending order
|
154
|
+
descriptions_and_scores = sorted(
|
155
|
+
zip(
|
156
|
+
trimmed_domains.at[domain_id, "full_descriptions"],
|
157
|
+
trimmed_domains.at[domain_id, "significance_scores"],
|
158
|
+
),
|
159
|
+
key=lambda x: x[1], # Sort by significance score
|
160
|
+
reverse=True, # Descending order
|
161
|
+
)
|
162
|
+
# Unzip the sorted tuples back into separate lists
|
163
|
+
sorted_descriptions, sorted_scores = zip(*descriptions_and_scores)
|
164
|
+
# Assign to the domain info map
|
165
|
+
domain_info_map[int(domain_id)] = {
|
166
|
+
"full_descriptions": list(sorted_descriptions),
|
167
|
+
"significance_scores": list(sorted_scores),
|
118
168
|
}
|
119
|
-
|
120
|
-
|
169
|
+
|
170
|
+
return domain_info_map
|
121
171
|
|
122
172
|
@staticmethod
|
123
|
-
def
|
124
|
-
"""Creates a dictionary mapping each node ID to its corresponding domain IDs and
|
173
|
+
def _create_node_id_to_domain_ids_and_significances(domains: pd.DataFrame) -> Dict[int, Dict]:
|
174
|
+
"""Creates a dictionary mapping each node ID to its corresponding domain IDs and significance values.
|
125
175
|
|
126
176
|
Args:
|
127
177
|
domains (pd.DataFrame): A DataFrame containing domain information for each node. Assumes the last
|
@@ -129,28 +179,28 @@ class NetworkGraph:
|
|
129
179
|
|
130
180
|
Returns:
|
131
181
|
Dict[int, Dict]: A dictionary where the key is the node ID (index of the DataFrame), and the value is another dictionary
|
132
|
-
with 'domain' (a list of domain IDs with non-zero
|
133
|
-
(a dict of domain IDs and their corresponding
|
182
|
+
with 'domain' (a list of domain IDs with non-zero significance) and 'significance'
|
183
|
+
(a dict of domain IDs and their corresponding significance values).
|
134
184
|
"""
|
135
185
|
# Initialize an empty dictionary to store the result
|
136
|
-
|
186
|
+
node_id_to_domain_ids_and_significances = {}
|
137
187
|
# Get the list of domain columns (excluding 'all domains' and 'primary domain')
|
138
188
|
domain_columns = domains.columns[
|
139
189
|
:-2
|
140
190
|
] # The last two columns are 'all domains' and 'primary domain'
|
141
191
|
# Iterate over each row in the dataframe
|
142
192
|
for idx, row in domains.iterrows():
|
143
|
-
# Get the domains (column names) where the
|
193
|
+
# Get the domains (column names) where the significance score is greater than 0
|
144
194
|
all_domains = domain_columns[row[domain_columns] > 0].tolist()
|
145
|
-
# Get the
|
146
|
-
|
195
|
+
# Get the significance values for those domains
|
196
|
+
significance_values = row[all_domains].to_dict()
|
147
197
|
# Store the result in the dictionary with index as the key
|
148
|
-
|
149
|
-
"domains": all_domains, # The column names where
|
150
|
-
"
|
198
|
+
node_id_to_domain_ids_and_significances[idx] = {
|
199
|
+
"domains": all_domains, # The column names where significance > 0
|
200
|
+
"significances": significance_values, # The actual significance values for those columns
|
151
201
|
}
|
152
202
|
|
153
|
-
return
|
203
|
+
return node_id_to_domain_ids_and_significances
|
154
204
|
|
155
205
|
def _create_domain_id_to_node_labels_map(self) -> Dict[int, List[str]]:
|
156
206
|
"""Create a map from domain IDs to node labels.
|