risk-network 0.0.12b0__py3-none-any.whl → 0.0.12b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/__init__.py +10 -0
- risk/annotations/annotations.py +354 -0
- risk/annotations/io.py +241 -0
- risk/annotations/nltk_setup.py +86 -0
- risk/log/__init__.py +11 -0
- risk/log/console.py +141 -0
- risk/log/parameters.py +171 -0
- risk/neighborhoods/__init__.py +7 -0
- risk/neighborhoods/api.py +442 -0
- risk/neighborhoods/community.py +441 -0
- risk/neighborhoods/domains.py +360 -0
- risk/neighborhoods/neighborhoods.py +514 -0
- risk/neighborhoods/stats/__init__.py +13 -0
- risk/neighborhoods/stats/permutation/__init__.py +6 -0
- risk/neighborhoods/stats/permutation/permutation.py +240 -0
- risk/neighborhoods/stats/permutation/test_functions.py +70 -0
- risk/neighborhoods/stats/tests.py +275 -0
- risk/network/__init__.py +4 -0
- risk/network/graph/__init__.py +4 -0
- risk/network/graph/api.py +200 -0
- risk/network/graph/graph.py +268 -0
- risk/network/graph/stats.py +166 -0
- risk/network/graph/summary.py +253 -0
- risk/network/io.py +693 -0
- risk/network/plotter/__init__.py +4 -0
- risk/network/plotter/api.py +54 -0
- risk/network/plotter/canvas.py +291 -0
- risk/network/plotter/contour.py +329 -0
- risk/network/plotter/labels.py +935 -0
- risk/network/plotter/network.py +294 -0
- risk/network/plotter/plotter.py +141 -0
- risk/network/plotter/utils/colors.py +419 -0
- risk/network/plotter/utils/layout.py +94 -0
- risk_network-0.0.12b1.dist-info/METADATA +122 -0
- risk_network-0.0.12b1.dist-info/RECORD +40 -0
- {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/WHEEL +1 -1
- risk_network-0.0.12b0.dist-info/METADATA +0 -796
- risk_network-0.0.12b0.dist-info/RECORD +0 -7
- {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/licenses/LICENSE +0 -0
- {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/top_level.txt +0 -0
risk/network/io.py
ADDED
@@ -0,0 +1,693 @@
|
|
1
|
+
"""
|
2
|
+
risk/network/io
|
3
|
+
~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
import copy
|
7
|
+
import json
|
8
|
+
import os
|
9
|
+
import pickle
|
10
|
+
import shutil
|
11
|
+
import zipfile
|
12
|
+
from xml.dom import minidom
|
13
|
+
|
14
|
+
import networkx as nx
|
15
|
+
import numpy as np
|
16
|
+
import pandas as pd
|
17
|
+
|
18
|
+
from risk.log import log_header, logger, params
|
19
|
+
|
20
|
+
|
21
|
+
class NetworkIO:
|
22
|
+
"""A class for loading, processing, and managing network data.
|
23
|
+
|
24
|
+
The NetworkIO class provides methods to load network data from various formats (e.g., GPickle, NetworkX)
|
25
|
+
and process the network by adjusting node coordinates, calculating edge lengths, and validating graph structure.
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
compute_sphere: bool = True,
|
31
|
+
surface_depth: float = 0.0,
|
32
|
+
min_edges_per_node: int = 0,
|
33
|
+
):
|
34
|
+
"""Initialize the NetworkIO class.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
compute_sphere (bool, optional): Whether to map nodes to a sphere. Defaults to True.
|
38
|
+
surface_depth (float, optional): Surface depth for the sphere. Defaults to 0.0.
|
39
|
+
min_edges_per_node (int, optional): Minimum number of edges per node. Defaults to 0.
|
40
|
+
"""
|
41
|
+
self.compute_sphere = compute_sphere
|
42
|
+
self.surface_depth = surface_depth
|
43
|
+
self.min_edges_per_node = min_edges_per_node
|
44
|
+
# Log the initialization of the NetworkIO class
|
45
|
+
params.log_network(
|
46
|
+
compute_sphere=compute_sphere,
|
47
|
+
surface_depth=surface_depth,
|
48
|
+
min_edges_per_node=min_edges_per_node,
|
49
|
+
)
|
50
|
+
|
51
|
+
def load_gpickle_network(
|
52
|
+
self,
|
53
|
+
filepath: str,
|
54
|
+
compute_sphere: bool = True,
|
55
|
+
surface_depth: float = 0.0,
|
56
|
+
min_edges_per_node: int = 0,
|
57
|
+
) -> nx.Graph:
|
58
|
+
"""Load a network from a GPickle file.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
filepath (str): Path to the GPickle file.
|
62
|
+
compute_sphere (bool, optional): Whether to map nodes to a sphere. Defaults to True.
|
63
|
+
surface_depth (float, optional): Surface depth for the sphere. Defaults to 0.0.
|
64
|
+
min_edges_per_node (int, optional): Minimum number of edges per node. Defaults to 0.
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
nx.Graph: Loaded and processed network.
|
68
|
+
"""
|
69
|
+
networkio = NetworkIO(
|
70
|
+
compute_sphere=compute_sphere,
|
71
|
+
surface_depth=surface_depth,
|
72
|
+
min_edges_per_node=min_edges_per_node,
|
73
|
+
)
|
74
|
+
return networkio._load_gpickle_network(filepath=filepath)
|
75
|
+
|
76
|
+
def _load_gpickle_network(self, filepath: str) -> nx.Graph:
|
77
|
+
"""Private method to load a network from a GPickle file.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
filepath (str): Path to the GPickle file.
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
nx.Graph: Loaded and processed network.
|
84
|
+
"""
|
85
|
+
filetype = "GPickle"
|
86
|
+
# Log the loading of the GPickle file
|
87
|
+
params.log_network(filetype=filetype, filepath=filepath)
|
88
|
+
self._log_loading(filetype, filepath=filepath)
|
89
|
+
|
90
|
+
with open(filepath, "rb") as f:
|
91
|
+
G = pickle.load(f)
|
92
|
+
|
93
|
+
# Initialize the graph
|
94
|
+
return self._initialize_graph(G)
|
95
|
+
|
96
|
+
def load_networkx_network(
|
97
|
+
self,
|
98
|
+
network: nx.Graph,
|
99
|
+
compute_sphere: bool = True,
|
100
|
+
surface_depth: float = 0.0,
|
101
|
+
min_edges_per_node: int = 0,
|
102
|
+
) -> nx.Graph:
|
103
|
+
"""Load a NetworkX graph.
|
104
|
+
|
105
|
+
Args:
|
106
|
+
network (nx.Graph): A NetworkX graph object.
|
107
|
+
compute_sphere (bool, optional): Whether to map nodes to a sphere. Defaults to True.
|
108
|
+
surface_depth (float, optional): Surface depth for the sphere. Defaults to 0.0.
|
109
|
+
min_edges_per_node (int, optional): Minimum number of edges per node. Defaults to 0.
|
110
|
+
|
111
|
+
Returns:
|
112
|
+
nx.Graph: Loaded and processed network.
|
113
|
+
"""
|
114
|
+
networkio = NetworkIO(
|
115
|
+
compute_sphere=compute_sphere,
|
116
|
+
surface_depth=surface_depth,
|
117
|
+
min_edges_per_node=min_edges_per_node,
|
118
|
+
)
|
119
|
+
return networkio._load_networkx_network(network=network)
|
120
|
+
|
121
|
+
def _load_networkx_network(self, network: nx.Graph) -> nx.Graph:
|
122
|
+
"""Private method to load a NetworkX graph.
|
123
|
+
|
124
|
+
Args:
|
125
|
+
network (nx.Graph): A NetworkX graph object.
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
nx.Graph: Processed network.
|
129
|
+
"""
|
130
|
+
filetype = "NetworkX"
|
131
|
+
# Log the loading of the NetworkX graph
|
132
|
+
params.log_network(filetype=filetype)
|
133
|
+
self._log_loading(filetype)
|
134
|
+
|
135
|
+
# Important: Make a copy of the network to avoid modifying the original
|
136
|
+
network_copy = copy.deepcopy(network)
|
137
|
+
# Initialize the graph
|
138
|
+
return self._initialize_graph(network_copy)
|
139
|
+
|
140
|
+
def load_cytoscape_network(
|
141
|
+
self,
|
142
|
+
filepath: str,
|
143
|
+
source_label: str = "source",
|
144
|
+
target_label: str = "target",
|
145
|
+
view_name: str = "",
|
146
|
+
compute_sphere: bool = True,
|
147
|
+
surface_depth: float = 0.0,
|
148
|
+
min_edges_per_node: int = 0,
|
149
|
+
) -> nx.Graph:
|
150
|
+
"""Load a network from a Cytoscape file.
|
151
|
+
|
152
|
+
Args:
|
153
|
+
filepath (str): Path to the Cytoscape file.
|
154
|
+
source_label (str, optional): Source node label. Defaults to "source".
|
155
|
+
target_label (str, optional): Target node label. Defaults to "target".
|
156
|
+
view_name (str, optional): Specific view name to load. Defaults to "".
|
157
|
+
compute_sphere (bool, optional): Whether to map nodes to a sphere. Defaults to True.
|
158
|
+
surface_depth (float, optional): Surface depth for the sphere. Defaults to 0.0.
|
159
|
+
min_edges_per_node (int, optional): Minimum number of edges per node. Defaults to 0.
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
nx.Graph: Loaded and processed network.
|
163
|
+
"""
|
164
|
+
networkio = NetworkIO(
|
165
|
+
compute_sphere=compute_sphere,
|
166
|
+
surface_depth=surface_depth,
|
167
|
+
min_edges_per_node=min_edges_per_node,
|
168
|
+
)
|
169
|
+
return networkio._load_cytoscape_network(
|
170
|
+
filepath=filepath,
|
171
|
+
source_label=source_label,
|
172
|
+
target_label=target_label,
|
173
|
+
view_name=view_name,
|
174
|
+
)
|
175
|
+
|
176
|
+
def _load_cytoscape_network(
|
177
|
+
self,
|
178
|
+
filepath: str,
|
179
|
+
source_label: str = "source",
|
180
|
+
target_label: str = "target",
|
181
|
+
view_name: str = "",
|
182
|
+
) -> nx.Graph:
|
183
|
+
"""Private method to load a network from a Cytoscape file.
|
184
|
+
|
185
|
+
Args:
|
186
|
+
filepath (str): Path to the Cytoscape file.
|
187
|
+
source_label (str, optional): Source node label. Defaults to "source".
|
188
|
+
target_label (str, optional): Target node label. Defaults to "target".
|
189
|
+
view_name (str, optional): Specific view name to load. Defaults to "".
|
190
|
+
|
191
|
+
Returns:
|
192
|
+
nx.Graph: Loaded and processed network.
|
193
|
+
|
194
|
+
Raises:
|
195
|
+
ValueError: If no matching attribute metadata file is found.
|
196
|
+
KeyError: If the source or target label is not found in the attribute table.
|
197
|
+
"""
|
198
|
+
filetype = "Cytoscape"
|
199
|
+
# Log the loading of the Cytoscape file
|
200
|
+
params.log_network(filetype=filetype, filepath=str(filepath))
|
201
|
+
self._log_loading(filetype, filepath=filepath)
|
202
|
+
|
203
|
+
cys_files = []
|
204
|
+
tmp_dir = ".tmp_cytoscape"
|
205
|
+
# Try / finally to remove unzipped files
|
206
|
+
try:
|
207
|
+
# Create the temporary directory if it doesn't exist
|
208
|
+
if not os.path.exists(tmp_dir):
|
209
|
+
os.makedirs(tmp_dir)
|
210
|
+
|
211
|
+
# Unzip CYS file into the temporary directory
|
212
|
+
with zipfile.ZipFile(filepath, "r") as zip_ref:
|
213
|
+
cys_files = zip_ref.namelist()
|
214
|
+
zip_ref.extractall(tmp_dir)
|
215
|
+
|
216
|
+
# Get first view and network instances
|
217
|
+
cys_view_files = [os.path.join(tmp_dir, cf) for cf in cys_files if "/views/" in cf]
|
218
|
+
cys_view_file = (
|
219
|
+
cys_view_files[0]
|
220
|
+
if not view_name
|
221
|
+
else [cvf for cvf in cys_view_files if cvf.endswith(view_name + ".xgmml")][0]
|
222
|
+
)
|
223
|
+
# Parse nodes
|
224
|
+
cys_view_dom = minidom.parse(cys_view_file)
|
225
|
+
cys_nodes = cys_view_dom.getElementsByTagName("node")
|
226
|
+
node_x_positions = {}
|
227
|
+
node_y_positions = {}
|
228
|
+
for node in cys_nodes:
|
229
|
+
# Node ID is found in 'label'
|
230
|
+
node_id = str(node.attributes["label"].value)
|
231
|
+
for child in node.childNodes:
|
232
|
+
if child.nodeType == 1 and child.tagName == "graphics":
|
233
|
+
node_x_positions[node_id] = float(child.attributes["x"].value)
|
234
|
+
node_y_positions[node_id] = float(child.attributes["y"].value)
|
235
|
+
|
236
|
+
# Read the node attributes (from /tables/)
|
237
|
+
attribute_metadata_keywords = ["/tables/", "SHARED_ATTRS", "edge.cytable"]
|
238
|
+
# Use a generator to find the first matching file
|
239
|
+
attribute_metadata = next(
|
240
|
+
(
|
241
|
+
os.path.join(tmp_dir, cf)
|
242
|
+
for cf in cys_files
|
243
|
+
if all(keyword in cf for keyword in attribute_metadata_keywords)
|
244
|
+
),
|
245
|
+
None, # Default if no file matches
|
246
|
+
)
|
247
|
+
if attribute_metadata:
|
248
|
+
# Optimize `read_csv` by leveraging proper options
|
249
|
+
attribute_table = pd.read_csv(
|
250
|
+
attribute_metadata,
|
251
|
+
sep=",",
|
252
|
+
header=None,
|
253
|
+
skiprows=1,
|
254
|
+
dtype=str, # Use specific dtypes to reduce memory usage
|
255
|
+
engine="c", # Use the C engine for parsing if compatible
|
256
|
+
low_memory=False, # Optimize memory handling for large files
|
257
|
+
)
|
258
|
+
else:
|
259
|
+
raise ValueError("No matching attribute metadata file found.")
|
260
|
+
|
261
|
+
# Set columns
|
262
|
+
attribute_table.columns = attribute_table.iloc[0]
|
263
|
+
# Skip first four rows, select source and target columns, and reset index
|
264
|
+
attribute_table = attribute_table.iloc[4:, :]
|
265
|
+
try:
|
266
|
+
# Attempt to filter the attribute_table with the given labels
|
267
|
+
attribute_table = attribute_table[[source_label, target_label]]
|
268
|
+
except KeyError as e:
|
269
|
+
# Find which key(s) caused the issue
|
270
|
+
missing_keys = [
|
271
|
+
key
|
272
|
+
for key in [source_label, target_label]
|
273
|
+
if key not in attribute_table.columns
|
274
|
+
]
|
275
|
+
# Raise the KeyError with details about the issue and available options
|
276
|
+
available_columns = ", ".join(attribute_table.columns)
|
277
|
+
raise KeyError(
|
278
|
+
f"The column(s) '{', '.join(missing_keys)}' do not exist in the table. "
|
279
|
+
f"Available columns are: {available_columns}."
|
280
|
+
) from e
|
281
|
+
|
282
|
+
attribute_table = attribute_table.dropna().reset_index(drop=True)
|
283
|
+
|
284
|
+
# Create a graph
|
285
|
+
G = nx.Graph()
|
286
|
+
# Add edges and nodes
|
287
|
+
for _, row in attribute_table.iterrows():
|
288
|
+
source = row[source_label]
|
289
|
+
target = row[target_label]
|
290
|
+
G.add_edge(source, target)
|
291
|
+
if source not in G:
|
292
|
+
G.add_node(source) # Optionally add x, y coordinates here if available
|
293
|
+
if target not in G:
|
294
|
+
G.add_node(target) # Optionally add x, y coordinates here if available
|
295
|
+
|
296
|
+
# Add node attributes
|
297
|
+
for node in G.nodes():
|
298
|
+
G.nodes[node]["label"] = node
|
299
|
+
G.nodes[node]["x"] = node_x_positions[node]
|
300
|
+
G.nodes[node]["y"] = node_y_positions[node]
|
301
|
+
|
302
|
+
# Initialize the graph
|
303
|
+
return self._initialize_graph(G)
|
304
|
+
|
305
|
+
finally:
|
306
|
+
# Remove the temporary directory and its contents
|
307
|
+
if os.path.exists(tmp_dir):
|
308
|
+
shutil.rmtree(tmp_dir)
|
309
|
+
|
310
|
+
def load_cytoscape_json_network(
|
311
|
+
self,
|
312
|
+
filepath: str,
|
313
|
+
source_label: str = "source",
|
314
|
+
target_label: str = "target",
|
315
|
+
compute_sphere: bool = True,
|
316
|
+
surface_depth: float = 0.0,
|
317
|
+
min_edges_per_node: int = 0,
|
318
|
+
) -> nx.Graph:
|
319
|
+
"""Load a network from a Cytoscape JSON (.cyjs) file.
|
320
|
+
|
321
|
+
Args:
|
322
|
+
filepath (str): Path to the Cytoscape JSON file.
|
323
|
+
source_label (str, optional): Source node label. Default is "source".
|
324
|
+
target_label (str, optional): Target node label. Default is "target".
|
325
|
+
compute_sphere (bool, optional): Whether to map nodes to a sphere. Defaults to True.
|
326
|
+
surface_depth (float, optional): Surface depth for the sphere. Defaults to 0.0.
|
327
|
+
min_edges_per_node (int, optional): Minimum number of edges per node. Defaults to 0.
|
328
|
+
|
329
|
+
Returns:
|
330
|
+
NetworkX graph: Loaded and processed network.
|
331
|
+
"""
|
332
|
+
networkio = NetworkIO(
|
333
|
+
compute_sphere=compute_sphere,
|
334
|
+
surface_depth=surface_depth,
|
335
|
+
min_edges_per_node=min_edges_per_node,
|
336
|
+
)
|
337
|
+
return networkio._load_cytoscape_json_network(
|
338
|
+
filepath=filepath,
|
339
|
+
source_label=source_label,
|
340
|
+
target_label=target_label,
|
341
|
+
)
|
342
|
+
|
343
|
+
def _load_cytoscape_json_network(self, filepath, source_label="source", target_label="target"):
|
344
|
+
"""Private method to load a network from a Cytoscape JSON (.cyjs) file.
|
345
|
+
|
346
|
+
Args:
|
347
|
+
filepath (str): Path to the Cytoscape JSON file.
|
348
|
+
source_label (str, optional): Source node label. Default is "source".
|
349
|
+
target_label (str, optional): Target node label. Default is "target".
|
350
|
+
|
351
|
+
Returns:
|
352
|
+
NetworkX graph: Loaded and processed network.
|
353
|
+
"""
|
354
|
+
filetype = "Cytoscape JSON"
|
355
|
+
# Log the loading of the Cytoscape JSON file
|
356
|
+
params.log_network(filetype=filetype, filepath=str(filepath))
|
357
|
+
self._log_loading(filetype, filepath=filepath)
|
358
|
+
|
359
|
+
# Load the Cytoscape JSON file
|
360
|
+
with open(filepath, "r") as f:
|
361
|
+
cyjs_data = json.load(f)
|
362
|
+
|
363
|
+
# Create a graph
|
364
|
+
G = nx.Graph()
|
365
|
+
# Store node positions for later use
|
366
|
+
node_x_positions = {}
|
367
|
+
node_y_positions = {}
|
368
|
+
for node in cyjs_data["elements"]["nodes"]:
|
369
|
+
node_data = node["data"]
|
370
|
+
# Use the original node ID if available, otherwise use the default ID
|
371
|
+
node_id = node_data.get("id_original", node_data.get("id"))
|
372
|
+
node_x_positions[node_id] = node["position"]["x"]
|
373
|
+
node_y_positions[node_id] = node["position"]["y"]
|
374
|
+
|
375
|
+
# Process edges and add them to the graph
|
376
|
+
for edge in cyjs_data["elements"]["edges"]:
|
377
|
+
edge_data = edge["data"]
|
378
|
+
# Use the original source and target labels if available, otherwise fall back to default labels
|
379
|
+
source = edge_data.get(f"{source_label}_original", edge_data.get(source_label))
|
380
|
+
target = edge_data.get(f"{target_label}_original", edge_data.get(target_label))
|
381
|
+
G.add_edge(source, target)
|
382
|
+
|
383
|
+
# Ensure nodes exist in the graph and add them if not present
|
384
|
+
if source not in G:
|
385
|
+
G.add_node(source)
|
386
|
+
if target not in G:
|
387
|
+
G.add_node(target)
|
388
|
+
|
389
|
+
# Add node attributes (like label, x, y positions)
|
390
|
+
for node in G.nodes():
|
391
|
+
G.nodes[node]["label"] = node
|
392
|
+
G.nodes[node]["x"] = node_x_positions.get(node, 0) # Use stored positions
|
393
|
+
G.nodes[node]["y"] = node_y_positions.get(node, 0) # Use stored positions
|
394
|
+
|
395
|
+
# Initialize the graph
|
396
|
+
return self._initialize_graph(G)
|
397
|
+
|
398
|
+
def _initialize_graph(self, G: nx.Graph) -> nx.Graph:
|
399
|
+
"""Initialize the graph by processing and validating its nodes and edges.
|
400
|
+
|
401
|
+
Args:
|
402
|
+
G (nx.Graph): The input NetworkX graph.
|
403
|
+
|
404
|
+
Returns:
|
405
|
+
nx.Graph: The processed and validated graph.
|
406
|
+
"""
|
407
|
+
self._validate_nodes(G)
|
408
|
+
self._assign_edge_weights(G)
|
409
|
+
self._assign_edge_lengths(G)
|
410
|
+
self._remove_invalid_graph_properties(G)
|
411
|
+
# IMPORTANT: This is where the graph node labels are converted to integers
|
412
|
+
# Make sure to perform this step after all other processing
|
413
|
+
G = nx.convert_node_labels_to_integers(G)
|
414
|
+
return G
|
415
|
+
|
416
|
+
def _remove_invalid_graph_properties(self, G: nx.Graph) -> None:
|
417
|
+
"""Remove invalid properties from the graph, including self-loops, nodes with fewer edges than
|
418
|
+
the threshold, and isolated nodes.
|
419
|
+
|
420
|
+
Args:
|
421
|
+
G (nx.Graph): A NetworkX graph object.
|
422
|
+
"""
|
423
|
+
# Count the number of nodes and edges before cleaning
|
424
|
+
num_initial_nodes = G.number_of_nodes()
|
425
|
+
num_initial_edges = G.number_of_edges()
|
426
|
+
# Remove self-loops to ensure correct edge count
|
427
|
+
G.remove_edges_from(nx.selfloop_edges(G))
|
428
|
+
# Iteratively remove nodes with fewer edges than the threshold
|
429
|
+
while True:
|
430
|
+
nodes_to_remove = [
|
431
|
+
node
|
432
|
+
for node, degree in dict(G.degree()).items()
|
433
|
+
if degree < self.min_edges_per_node
|
434
|
+
]
|
435
|
+
if not nodes_to_remove:
|
436
|
+
break # Exit loop if no nodes meet the condition
|
437
|
+
G.remove_nodes_from(nodes_to_remove)
|
438
|
+
|
439
|
+
# Remove isolated nodes
|
440
|
+
isolates = list(nx.isolates(G))
|
441
|
+
G.remove_nodes_from(isolates)
|
442
|
+
|
443
|
+
# Log the number of nodes and edges before and after cleaning
|
444
|
+
num_final_nodes = G.number_of_nodes()
|
445
|
+
num_final_edges = G.number_of_edges()
|
446
|
+
logger.debug(f"Initial node count: {num_initial_nodes}")
|
447
|
+
logger.debug(f"Final node count: {num_final_nodes}")
|
448
|
+
logger.debug(f"Initial edge count: {num_initial_edges}")
|
449
|
+
logger.debug(f"Final edge count: {num_final_edges}")
|
450
|
+
|
451
|
+
def _assign_edge_weights(self, G: nx.Graph) -> None:
|
452
|
+
"""Assign default edge weights to the graph.
|
453
|
+
|
454
|
+
Args:
|
455
|
+
G (nx.Graph): A NetworkX graph object.
|
456
|
+
"""
|
457
|
+
# Set default weight for all edges in bulk
|
458
|
+
default_weight = 1
|
459
|
+
nx.set_edge_attributes(G, default_weight, "weight")
|
460
|
+
|
461
|
+
def _validate_nodes(self, G: nx.Graph) -> None:
|
462
|
+
"""Validate the graph structure and attributes with attribute fallback for positions and labels.
|
463
|
+
|
464
|
+
Args:
|
465
|
+
G (nx.Graph): A NetworkX graph object.
|
466
|
+
|
467
|
+
Raises:
|
468
|
+
ValueError: If a node is missing 'x', 'y', and a valid 'pos' attribute.
|
469
|
+
"""
|
470
|
+
# Retrieve all relevant attributes in bulk
|
471
|
+
pos_attrs = nx.get_node_attributes(G, "pos")
|
472
|
+
name_attrs = nx.get_node_attributes(G, "name")
|
473
|
+
id_attrs = nx.get_node_attributes(G, "id")
|
474
|
+
# Dictionaries to hold missing or fallback attributes
|
475
|
+
x_attrs = {}
|
476
|
+
y_attrs = {}
|
477
|
+
label_attrs = {}
|
478
|
+
nodes_with_missing_labels = []
|
479
|
+
|
480
|
+
# Iterate through nodes to validate and assign missing attributes
|
481
|
+
for node in G.nodes:
|
482
|
+
attrs = G.nodes[node]
|
483
|
+
# Validate and assign 'x' and 'y' attributes
|
484
|
+
if "x" not in attrs or "y" not in attrs:
|
485
|
+
if (
|
486
|
+
node in pos_attrs
|
487
|
+
and isinstance(pos_attrs[node], (list, tuple, np.ndarray))
|
488
|
+
and len(pos_attrs[node]) >= 2
|
489
|
+
):
|
490
|
+
x_attrs[node], y_attrs[node] = pos_attrs[node][:2]
|
491
|
+
else:
|
492
|
+
raise ValueError(
|
493
|
+
f"Node {node} is missing 'x', 'y', and a valid 'pos' attribute."
|
494
|
+
)
|
495
|
+
|
496
|
+
# Validate and assign 'label' attribute
|
497
|
+
if "label" not in attrs:
|
498
|
+
if node in name_attrs:
|
499
|
+
label_attrs[node] = name_attrs[node]
|
500
|
+
elif node in id_attrs:
|
501
|
+
label_attrs[node] = id_attrs[node]
|
502
|
+
else:
|
503
|
+
# Assign node ID as label and log the missing label
|
504
|
+
label_attrs[node] = str(node)
|
505
|
+
nodes_with_missing_labels.append(node)
|
506
|
+
|
507
|
+
# Batch update attributes in the graph
|
508
|
+
nx.set_node_attributes(G, x_attrs, "x")
|
509
|
+
nx.set_node_attributes(G, y_attrs, "y")
|
510
|
+
nx.set_node_attributes(G, label_attrs, "label")
|
511
|
+
|
512
|
+
# Log a warning if any labels were missing
|
513
|
+
if nodes_with_missing_labels:
|
514
|
+
total_nodes = G.number_of_nodes()
|
515
|
+
fraction_missing_labels = len(nodes_with_missing_labels) / total_nodes
|
516
|
+
logger.warning(
|
517
|
+
f"{len(nodes_with_missing_labels)} out of {total_nodes} nodes "
|
518
|
+
f"({fraction_missing_labels:.2%}) were missing 'label' attributes and were assigned node IDs."
|
519
|
+
)
|
520
|
+
|
521
|
+
def _assign_edge_lengths(self, G: nx.Graph) -> None:
|
522
|
+
"""Prepare the network by adjusting surface depth and calculating edge lengths.
|
523
|
+
|
524
|
+
Args:
|
525
|
+
G (nx.Graph): The input network graph.
|
526
|
+
"""
|
527
|
+
G_transformed = self._prepare_graph_for_edge_length_assignment(
|
528
|
+
G,
|
529
|
+
compute_sphere=self.compute_sphere,
|
530
|
+
surface_depth=self.surface_depth,
|
531
|
+
)
|
532
|
+
self._calculate_and_set_edge_lengths(G_transformed, self.compute_sphere)
|
533
|
+
|
534
|
+
def _prepare_graph_for_edge_length_assignment(
|
535
|
+
self,
|
536
|
+
G: nx.Graph,
|
537
|
+
compute_sphere: bool = True,
|
538
|
+
surface_depth: float = 0.0,
|
539
|
+
) -> nx.Graph:
|
540
|
+
"""Prepare the graph by normalizing coordinates and optionally mapping nodes to a sphere.
|
541
|
+
|
542
|
+
Args:
|
543
|
+
G (nx.Graph): The input graph.
|
544
|
+
compute_sphere (bool): Whether to map nodes to a sphere. Defaults to True.
|
545
|
+
surface_depth (float): The surface depth for mapping to a sphere. Defaults to 0.0.
|
546
|
+
|
547
|
+
Returns:
|
548
|
+
nx.Graph: The graph with transformed coordinates.
|
549
|
+
"""
|
550
|
+
self._normalize_graph_coordinates(G)
|
551
|
+
|
552
|
+
if compute_sphere:
|
553
|
+
self._map_to_sphere(G)
|
554
|
+
G_depth = self._create_depth(G, surface_depth=surface_depth)
|
555
|
+
else:
|
556
|
+
G_depth = G
|
557
|
+
|
558
|
+
return G_depth
|
559
|
+
|
560
|
+
def _calculate_and_set_edge_lengths(self, G: nx.Graph, compute_sphere: bool) -> None:
|
561
|
+
"""Compute and assign edge lengths in the graph.
|
562
|
+
|
563
|
+
Args:
|
564
|
+
G (nx.Graph): The input graph.
|
565
|
+
compute_sphere (bool): Whether to compute spherical distances.
|
566
|
+
"""
|
567
|
+
|
568
|
+
def compute_distance_vectorized(coords, is_sphere):
|
569
|
+
"""Compute Euclidean or spherical distances between edges in bulk."""
|
570
|
+
u_coords, v_coords = coords[:, 0, :], coords[:, 1, :]
|
571
|
+
if is_sphere:
|
572
|
+
u_coords /= np.linalg.norm(u_coords, axis=1, keepdims=True)
|
573
|
+
v_coords /= np.linalg.norm(v_coords, axis=1, keepdims=True)
|
574
|
+
dot_products = np.einsum("ij,ij->i", u_coords, v_coords)
|
575
|
+
return np.arccos(np.clip(dot_products, -1.0, 1.0))
|
576
|
+
return np.linalg.norm(u_coords - v_coords, axis=1)
|
577
|
+
|
578
|
+
# Precompute edge coordinate arrays and compute distances in bulk
|
579
|
+
edge_data = np.array(
|
580
|
+
[
|
581
|
+
[
|
582
|
+
np.array([G.nodes[u]["x"], G.nodes[u]["y"], G.nodes[u].get("z", 0)]),
|
583
|
+
np.array([G.nodes[v]["x"], G.nodes[v]["y"], G.nodes[v].get("z", 0)]),
|
584
|
+
]
|
585
|
+
for u, v in G.edges
|
586
|
+
]
|
587
|
+
)
|
588
|
+
# Compute distances
|
589
|
+
distances = compute_distance_vectorized(edge_data, compute_sphere)
|
590
|
+
# Assign Euclidean or spherical distances to edges
|
591
|
+
for (u, v), distance in zip(G.edges, distances):
|
592
|
+
G.edges[u, v]["length"] = distance
|
593
|
+
|
594
|
+
def _map_to_sphere(self, G: nx.Graph) -> None:
|
595
|
+
"""Map the x and y coordinates of graph nodes onto a 3D sphere.
|
596
|
+
|
597
|
+
Args:
|
598
|
+
G (nx.Graph): The input graph with nodes having 'x' and 'y' coordinates.
|
599
|
+
"""
|
600
|
+
# Extract x, y coordinates as a NumPy array
|
601
|
+
nodes = list(G.nodes)
|
602
|
+
xy_coords = np.array([[G.nodes[node]["x"], G.nodes[node]["y"]] for node in nodes])
|
603
|
+
# Normalize coordinates between [0, 1]
|
604
|
+
min_vals = xy_coords.min(axis=0)
|
605
|
+
max_vals = xy_coords.max(axis=0)
|
606
|
+
normalized_xy = (xy_coords - min_vals) / (max_vals - min_vals)
|
607
|
+
# Convert normalized coordinates to spherical coordinates
|
608
|
+
theta = normalized_xy[:, 0] * np.pi * 2
|
609
|
+
phi = normalized_xy[:, 1] * np.pi
|
610
|
+
# Compute 3D Cartesian coordinates
|
611
|
+
x = np.sin(phi) * np.cos(theta)
|
612
|
+
y = np.sin(phi) * np.sin(theta)
|
613
|
+
z = np.cos(phi)
|
614
|
+
# Assign coordinates back to graph nodes in bulk
|
615
|
+
xyz_coords = {node: {"x": x[i], "y": y[i], "z": z[i]} for i, node in enumerate(nodes)}
|
616
|
+
nx.set_node_attributes(G, xyz_coords)
|
617
|
+
|
618
|
+
def _normalize_graph_coordinates(self, G: nx.Graph) -> None:
|
619
|
+
"""Normalize the x and y coordinates of the nodes in the graph to the [0, 1] range.
|
620
|
+
|
621
|
+
Args:
|
622
|
+
G (nx.Graph): The input graph with nodes having 'x' and 'y' coordinates.
|
623
|
+
"""
|
624
|
+
# Extract x, y coordinates from the graph nodes
|
625
|
+
xy_coords = np.array([[G.nodes[node]["x"], G.nodes[node]["y"]] for node in G.nodes()])
|
626
|
+
# Calculate min and max values for x and y
|
627
|
+
min_vals = np.min(xy_coords, axis=0)
|
628
|
+
max_vals = np.max(xy_coords, axis=0)
|
629
|
+
# Normalize the coordinates to [0, 1]
|
630
|
+
normalized_xy = (xy_coords - min_vals) / (max_vals - min_vals)
|
631
|
+
# Update the node coordinates with the normalized values
|
632
|
+
for i, node in enumerate(G.nodes()):
|
633
|
+
G.nodes[node]["x"], G.nodes[node]["y"] = normalized_xy[i]
|
634
|
+
|
635
|
+
def _create_depth(self, G: nx.Graph, surface_depth: float = 0.0) -> nx.Graph:
|
636
|
+
"""Adjust the 'z' attribute of each node based on the subcluster strengths and normalized surface depth.
|
637
|
+
|
638
|
+
Args:
|
639
|
+
G (nx.Graph): The input graph.
|
640
|
+
surface_depth (float): The maximum surface depth to apply for the strongest subcluster.
|
641
|
+
|
642
|
+
Returns:
|
643
|
+
nx.Graph: The graph with adjusted 'z' attribute for each node.
|
644
|
+
"""
|
645
|
+
if surface_depth >= 1.0:
|
646
|
+
surface_depth -= 1e-6 # Cap the surface depth to prevent a value of 1.0
|
647
|
+
|
648
|
+
# Compute subclusters as connected components
|
649
|
+
connected_components = list(nx.connected_components(G))
|
650
|
+
subcluster_strengths = {}
|
651
|
+
max_strength = 0
|
652
|
+
# Precompute strengths and track the maximum strength
|
653
|
+
for component in connected_components:
|
654
|
+
size = len(component)
|
655
|
+
max_strength = max(max_strength, size)
|
656
|
+
for node in component:
|
657
|
+
subcluster_strengths[node] = size
|
658
|
+
|
659
|
+
# Avoid repeated lookups and computations by pre-fetching node data
|
660
|
+
nodes = list(G.nodes(data=True))
|
661
|
+
node_updates = {}
|
662
|
+
for node, attrs in nodes:
|
663
|
+
strength = subcluster_strengths[node]
|
664
|
+
normalized_surface_depth = (strength / max_strength) * surface_depth
|
665
|
+
x, y, z = attrs["x"], attrs["y"], attrs["z"]
|
666
|
+
norm = np.sqrt(x**2 + y**2 + z**2)
|
667
|
+
adjusted_z = z - (z / norm) * normalized_surface_depth
|
668
|
+
node_updates[node] = {"z": adjusted_z}
|
669
|
+
|
670
|
+
# Batch update node attributes
|
671
|
+
nx.set_node_attributes(G, node_updates)
|
672
|
+
|
673
|
+
return G
|
674
|
+
|
675
|
+
def _log_loading(
|
676
|
+
self,
|
677
|
+
filetype: str,
|
678
|
+
filepath: str = "",
|
679
|
+
) -> None:
|
680
|
+
"""Log the initialization details of the RISK class.
|
681
|
+
|
682
|
+
Args:
|
683
|
+
filetype (str): The type of the file being loaded (e.g., 'CSV', 'JSON').
|
684
|
+
filepath (str, optional): The path to the file being loaded. Defaults to "".
|
685
|
+
"""
|
686
|
+
log_header("Loading network")
|
687
|
+
logger.debug(f"Filetype: {filetype}")
|
688
|
+
if filepath:
|
689
|
+
logger.debug(f"Filepath: {filepath}")
|
690
|
+
logger.debug(f"Minimum edges per node: {self.min_edges_per_node}")
|
691
|
+
logger.debug(f"Projection: {'Sphere' if self.compute_sphere else 'Plane'}")
|
692
|
+
if self.compute_sphere:
|
693
|
+
logger.debug(f"Surface depth: {self.surface_depth}")
|