risk-network 0.0.3b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,319 @@
1
+ """
2
+ risk/neighborhoods/neighborhoods
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ import warnings
7
+ from typing import Any, Dict, Tuple
8
+
9
+ import networkx as nx
10
+ import numpy as np
11
+ from sklearn.exceptions import DataConversionWarning
12
+
13
+ from risk.neighborhoods.community import (
14
+ calculate_dijkstra_neighborhoods,
15
+ calculate_label_propagation_neighborhoods,
16
+ calculate_louvain_neighborhoods,
17
+ calculate_markov_clustering_neighborhoods,
18
+ calculate_spinglass_neighborhoods,
19
+ calculate_walktrap_neighborhoods,
20
+ )
21
+
22
+ # Suppress DataConversionWarning
23
+ warnings.filterwarnings(action="ignore", category=DataConversionWarning)
24
+
25
+
26
+ def get_network_neighborhoods(
27
+ network: nx.Graph,
28
+ distance_metric: str = "dijkstra",
29
+ edge_length_threshold: float = 1.0,
30
+ louvain_resolution: float = 1.0,
31
+ random_seed: int = 888,
32
+ ) -> np.ndarray:
33
+ """Calculate the neighborhoods for each node in the network based on the specified distance metric.
34
+
35
+ Args:
36
+ network (nx.Graph): The network graph.
37
+ distance_metric (str): The distance metric to use ('euclidean', 'dijkstra', 'louvain', 'affinity_propagation',
38
+ 'label_propagation', 'markov_clustering', 'walktrap', 'spinglass').
39
+ edge_length_threshold (float): The edge length threshold for the neighborhoods.
40
+ louvain_resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 1.0.
41
+ random_seed (int, optional): Random seed for methods requiring random initialization. Defaults to 888.
42
+
43
+ Returns:
44
+ np.ndarray: Neighborhood matrix calculated based on the selected distance metric.
45
+ """
46
+ network = _create_percentile_limited_subgraph(network, edge_length_threshold)
47
+
48
+ if distance_metric == "dijkstra":
49
+ return calculate_dijkstra_neighborhoods(network)
50
+ if distance_metric == "louvain":
51
+ return calculate_louvain_neighborhoods(network, louvain_resolution, random_seed=random_seed)
52
+ if distance_metric == "label_propagation":
53
+ return calculate_label_propagation_neighborhoods(network)
54
+ if distance_metric == "markov_clustering":
55
+ return calculate_markov_clustering_neighborhoods(network)
56
+ if distance_metric == "walktrap":
57
+ return calculate_walktrap_neighborhoods(network)
58
+ if distance_metric == "spinglass":
59
+ return calculate_spinglass_neighborhoods(network)
60
+
61
+ raise ValueError(
62
+ "Incorrect distance metric specified. Please choose from 'dijkstra', 'louvain',"
63
+ "'label_propagation', 'markov_clustering', 'walktrap', 'spinglass'."
64
+ )
65
+
66
+
67
+ def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: float) -> nx.Graph:
68
+ """Calculate the edge length corresponding to the given percentile of edge lengths in the graph
69
+ and create a subgraph with all nodes and edges below this length.
70
+
71
+ Args:
72
+ G (nx.Graph): The input graph.
73
+ edge_length_percentile (float): The percentile to calculate (between 0 and 1).
74
+
75
+ Returns:
76
+ nx.Graph: A subgraph with all nodes and edges below the edge length corresponding to the given percentile.
77
+ """
78
+ # Extract edge lengths from the graph
79
+ edge_lengths = [d["length"] for _, _, d in G.edges(data=True) if "length" in d]
80
+ # Calculate the specific edge length for the given percentile
81
+ percentile_length = np.percentile(edge_lengths, edge_length_percentile * 100)
82
+ # Create a new graph with all nodes from the original graph
83
+ subgraph = nx.Graph()
84
+ subgraph.add_nodes_from(G.nodes(data=True))
85
+ # Add edges to the subgraph if they are below the specified percentile length
86
+ for u, v, d in G.edges(data=True):
87
+ if d.get("length", 1) <= percentile_length:
88
+ subgraph.add_edge(u, v, **d)
89
+
90
+ return subgraph
91
+
92
+
93
+ def process_neighborhoods(
94
+ network: nx.Graph,
95
+ neighborhoods: Dict[str, Any],
96
+ impute_depth: int = 1,
97
+ prune_threshold: float = 0.0,
98
+ ) -> Dict[str, Any]:
99
+ """Process neighborhoods based on the imputation and pruning settings.
100
+
101
+ Args:
102
+ network (nx.Graph): The network data structure used for imputing and pruning neighbors.
103
+ neighborhoods (dict): Dictionary containing 'enrichment_matrix', 'binary_enrichment_matrix', and 'significant_enrichment_matrix'.
104
+ impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
105
+ prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
106
+
107
+ Returns:
108
+ dict: Processed neighborhoods data, including the updated matrices and enrichment counts.
109
+ """
110
+ enrichment_matrix = neighborhoods["enrichment_matrix"]
111
+ binary_enrichment_matrix = neighborhoods["binary_enrichment_matrix"]
112
+ significant_enrichment_matrix = neighborhoods["significant_enrichment_matrix"]
113
+ print(f"Imputation depth: {impute_depth}")
114
+ if impute_depth:
115
+ (
116
+ enrichment_matrix,
117
+ binary_enrichment_matrix,
118
+ significant_enrichment_matrix,
119
+ ) = _impute_neighbors(
120
+ network,
121
+ enrichment_matrix,
122
+ binary_enrichment_matrix,
123
+ max_depth=impute_depth,
124
+ )
125
+
126
+ print(f"Pruning threshold: {prune_threshold}")
127
+ if prune_threshold:
128
+ (
129
+ enrichment_matrix,
130
+ binary_enrichment_matrix,
131
+ significant_enrichment_matrix,
132
+ ) = _prune_neighbors(
133
+ network,
134
+ enrichment_matrix,
135
+ binary_enrichment_matrix,
136
+ distance_threshold=prune_threshold,
137
+ )
138
+
139
+ neighborhood_enrichment_counts = np.sum(binary_enrichment_matrix, axis=0)
140
+ node_enrichment_sums = np.sum(enrichment_matrix, axis=1)
141
+ return {
142
+ "enrichment_matrix": enrichment_matrix,
143
+ "binary_enrichment_matrix": binary_enrichment_matrix,
144
+ "significant_enrichment_matrix": significant_enrichment_matrix,
145
+ "neighborhood_enrichment_counts": neighborhood_enrichment_counts,
146
+ "node_enrichment_sums": node_enrichment_sums,
147
+ }
148
+
149
+
150
+ def _impute_neighbors(
151
+ network: nx.Graph,
152
+ enrichment_matrix: np.ndarray,
153
+ binary_enrichment_matrix: np.ndarray,
154
+ max_depth: int = 3,
155
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
156
+ """Impute rows with sums of zero in the enrichment matrix based on the closest non-zero neighbors in the network graph.
157
+
158
+ Args:
159
+ network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
160
+ enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
161
+ binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
162
+ max_depth (int): Maximum depth of nodes to traverse for imputing values.
163
+
164
+ Returns:
165
+ tuple: A tuple containing:
166
+ - np.ndarray: The imputed enrichment matrix.
167
+ - np.ndarray: The imputed alpha threshold matrix.
168
+ - np.ndarray: The significant enrichment matrix with non-significant entries set to zero.
169
+ """
170
+ # Calculate shortest distances for each node to determine the distance threshold
171
+ shortest_distances = []
172
+ for node in network.nodes():
173
+ neighbors = [n for n in network.neighbors(node) if binary_enrichment_matrix[n].sum() != 0]
174
+ if neighbors:
175
+ shortest_distance = min([_get_euclidean_distance(node, n, network) for n in neighbors])
176
+ shortest_distances.append(shortest_distance)
177
+
178
+ depth = 1
179
+ rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
180
+ while len(rows_to_impute) and depth <= max_depth:
181
+ next_rows_to_impute = []
182
+ for row_index in rows_to_impute:
183
+ neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
184
+ valid_neighbors = [
185
+ n
186
+ for n in neighbors
187
+ if n != row_index
188
+ and binary_enrichment_matrix[n].sum() != 0
189
+ and enrichment_matrix[n].sum() != 0
190
+ ]
191
+ if valid_neighbors:
192
+ closest_neighbor = min(
193
+ valid_neighbors, key=lambda n: _get_euclidean_distance(row_index, n, network)
194
+ )
195
+ # Impute the row with the closest valid neighbor's data
196
+ enrichment_matrix[row_index] = enrichment_matrix[closest_neighbor]
197
+ binary_enrichment_matrix[row_index] = binary_enrichment_matrix[
198
+ closest_neighbor
199
+ ] / np.sqrt(depth + 1)
200
+ else:
201
+ next_rows_to_impute.append(row_index)
202
+
203
+ rows_to_impute = next_rows_to_impute
204
+ depth += 1
205
+
206
+ # Create a matrix where non-significant entries are set to zero
207
+ significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
208
+
209
+ return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
210
+
211
+
212
+ def _prune_neighbors(
213
+ network: nx.Graph,
214
+ enrichment_matrix: np.ndarray,
215
+ binary_enrichment_matrix: np.ndarray,
216
+ distance_threshold: float = 0.9,
217
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
218
+ """Remove outliers based on their rank for edge lengths.
219
+
220
+ Args:
221
+ network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
222
+ enrichment_matrix (np.ndarray): The enrichment matrix.
223
+ binary_enrichment_matrix (np.ndarray): The alpha threshold matrix.
224
+ distance_threshold (float): Rank threshold (0 to 1) to determine outliers.
225
+
226
+ Returns:
227
+ tuple: A tuple containing:
228
+ - np.ndarray: The updated enrichment matrix with outliers set to zero.
229
+ - np.ndarray: The updated alpha threshold matrix with outliers set to zero.
230
+ - np.ndarray: The significant enrichment matrix, where non-significant entries are set to zero.
231
+ """
232
+ # Identify indices with non-zero rows in the binary enrichment matrix
233
+ non_zero_indices = np.where(binary_enrichment_matrix.sum(axis=1) != 0)[0]
234
+ average_distances = []
235
+ for node in non_zero_indices:
236
+ neighbors = [n for n in network.neighbors(node) if binary_enrichment_matrix[n].sum() != 0]
237
+ if neighbors:
238
+ average_distance = np.mean(
239
+ [_get_euclidean_distance(node, n, network) for n in neighbors]
240
+ )
241
+ average_distances.append(average_distance)
242
+
243
+ # Calculate the distance threshold value based on rank
244
+ distance_threshold_value = _calculate_threshold(average_distances, 1 - distance_threshold)
245
+ # Prune nodes that are outliers based on the distance threshold
246
+ for row_index in non_zero_indices:
247
+ neighbors = [
248
+ n for n in network.neighbors(row_index) if binary_enrichment_matrix[n].sum() != 0
249
+ ]
250
+ if neighbors:
251
+ average_distance = np.mean(
252
+ [_get_euclidean_distance(row_index, n, network) for n in neighbors]
253
+ )
254
+ if average_distance >= distance_threshold_value:
255
+ enrichment_matrix[row_index] = 0
256
+ binary_enrichment_matrix[row_index] = 0
257
+
258
+ # Create a matrix where non-significant entries are set to zero
259
+ significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
260
+
261
+ return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
262
+
263
+
264
+ def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
265
+ """Calculate the Euclidean distance between two nodes in the network.
266
+
267
+ Args:
268
+ node1 (Any): The first node.
269
+ node2 (Any): The second node.
270
+ network (nx.Graph): The network graph containing the nodes.
271
+
272
+ Returns:
273
+ float: The Euclidean distance between the two nodes.
274
+ """
275
+ pos1 = _get_node_position(network, node1)
276
+ pos2 = _get_node_position(network, node2)
277
+ return np.linalg.norm(pos1 - pos2)
278
+
279
+
280
+ def _get_node_position(network: nx.Graph, node: Any) -> np.ndarray:
281
+ """Retrieve the position of a node in the network as a numpy array.
282
+
283
+ Args:
284
+ network (nx.Graph): The network graph containing node positions.
285
+ node (Any): The node for which the position is being retrieved.
286
+
287
+ Returns:
288
+ np.ndarray: A numpy array representing the position of the node in the format [x, y, z].
289
+ """
290
+ return np.array(
291
+ [
292
+ network.nodes[node].get(coord, 0)
293
+ for coord in ["x", "y", "z"]
294
+ if coord in network.nodes[node]
295
+ ]
296
+ )
297
+
298
+
299
+ def _calculate_threshold(average_distances: list, distance_threshold: float) -> float:
300
+ """Calculate the distance threshold based on the given average distances and a percentile threshold.
301
+
302
+ Args:
303
+ average_distances (list): An array of average distances.
304
+ distance_threshold (float): A percentile threshold (0 to 1) used to determine the distance cutoff.
305
+
306
+ Returns:
307
+ float: The calculated distance threshold value.
308
+ """
309
+ # Sort the average distances
310
+ sorted_distances = np.sort(average_distances)
311
+ # Compute the rank percentiles for the sorted distances
312
+ rank_percentiles = np.linspace(0, 1, len(sorted_distances))
313
+ # Interpolating the ranks to 1000 evenly spaced percentiles
314
+ interpolated_percentiles = np.linspace(0, 1, 1000)
315
+ smoothed_distances = np.interp(interpolated_percentiles, rank_percentiles, sorted_distances)
316
+ # Determine the index corresponding to the distance threshold
317
+ threshold_index = int(np.ceil(distance_threshold * len(smoothed_distances))) - 1
318
+ # Return the smoothed distance at the calculated index
319
+ return smoothed_distances[threshold_index]
@@ -0,0 +1,8 @@
1
+ """
2
+ risk/network
3
+ ~~~~~~~~~~~~
4
+ """
5
+
6
+ from .graph import NetworkGraph
7
+ from .io import NetworkIO
8
+ from .plot import NetworkPlotter
@@ -0,0 +1,165 @@
1
+ """
2
+ risk/network/geometry
3
+ ~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ import networkx as nx
7
+ import numpy as np
8
+
9
+
10
+ def apply_edge_lengths(
11
+ G: nx.Graph,
12
+ compute_sphere: bool = True,
13
+ surface_depth: float = 0.0,
14
+ include_edge_weight: bool = False,
15
+ ) -> nx.Graph:
16
+ """Apply edge lengths in the graph, optionally mapping nodes to a sphere and including edge weights.
17
+
18
+ Args:
19
+ G (nx.Graph): The input graph.
20
+ compute_sphere (bool): Whether to map nodes to a sphere. Defaults to True.
21
+ surface_depth (float): The surface depth for mapping to a sphere. Defaults to 0.0.
22
+ include_edge_weight (bool): Whether to include edge weights in the calculation. Defaults to False.
23
+
24
+ Returns:
25
+ nx.Graph: The graph with applied edge lengths.
26
+ """
27
+
28
+ def compute_distance(
29
+ u_coords: np.ndarray, v_coords: np.ndarray, is_sphere: bool = False
30
+ ) -> float:
31
+ """Compute the distance between two coordinate vectors.
32
+
33
+ Args:
34
+ u_coords (np.ndarray): Coordinates of the first point.
35
+ v_coords (np.ndarray): Coordinates of the second point.
36
+ is_sphere (bool, optional): If True, compute spherical distance. Defaults to False.
37
+
38
+ Returns:
39
+ float: The computed distance between the two points.
40
+ """
41
+ if is_sphere:
42
+ # Normalize vectors and compute spherical distance using the dot product
43
+ u_coords /= np.linalg.norm(u_coords)
44
+ v_coords /= np.linalg.norm(v_coords)
45
+ return np.arccos(np.clip(np.dot(u_coords, v_coords), -1.0, 1.0))
46
+ else:
47
+ # Compute Euclidean distance
48
+ return np.linalg.norm(u_coords - v_coords)
49
+
50
+ # Normalize graph coordinates
51
+ _normalize_graph_coordinates(G)
52
+ # Normalize weights
53
+ _normalize_weights(G)
54
+ # Use G_depth for edge length calculation
55
+ if compute_sphere:
56
+ # Map to sphere and adjust depth
57
+ _map_to_sphere(G)
58
+ G_depth = _create_depth(G.copy(), surface_depth=surface_depth)
59
+ else:
60
+ # Calculate edge lengths directly on the plane
61
+ G_depth = G.copy()
62
+
63
+ for u, v, _ in G_depth.edges(data=True):
64
+ u_coords = np.array([G_depth.nodes[u]["x"], G_depth.nodes[u]["y"]])
65
+ v_coords = np.array([G_depth.nodes[v]["x"], G_depth.nodes[v]["y"]])
66
+ if compute_sphere:
67
+ u_coords = np.append(u_coords, G_depth.nodes[u].get("z", 0))
68
+ v_coords = np.append(v_coords, G_depth.nodes[v].get("z", 0))
69
+
70
+ distance = compute_distance(u_coords, v_coords, is_sphere=compute_sphere)
71
+ if include_edge_weight:
72
+ # Square root of the normalized weight is used to minimize the effect of large weights
73
+ G.edges[u, v]["length"] = distance / np.sqrt(G.edges[u, v]["normalized_weight"] + 1e-6)
74
+ else:
75
+ # Use calculated distance directly
76
+ G.edges[u, v]["length"] = distance
77
+
78
+ return G
79
+
80
+
81
+ def _map_to_sphere(G: nx.Graph) -> None:
82
+ """Map the x and y coordinates of graph nodes onto a 3D sphere.
83
+
84
+ Args:
85
+ G (nx.Graph): The input graph with nodes having 'x' and 'y' coordinates.
86
+ """
87
+ # Extract x, y coordinates from the graph nodes
88
+ xy_coords = np.array([[G.nodes[node]["x"], G.nodes[node]["y"]] for node in G.nodes()])
89
+ # Normalize the coordinates between [0, 1]
90
+ min_vals = np.min(xy_coords, axis=0)
91
+ max_vals = np.max(xy_coords, axis=0)
92
+ normalized_xy = (xy_coords - min_vals) / (max_vals - min_vals)
93
+ # Map normalized coordinates to theta and phi on a sphere
94
+ theta = normalized_xy[:, 0] * np.pi * 2
95
+ phi = normalized_xy[:, 1] * np.pi
96
+ # Convert spherical coordinates to Cartesian coordinates for 3D sphere
97
+ for i, node in enumerate(G.nodes()):
98
+ x = np.sin(phi[i]) * np.cos(theta[i])
99
+ y = np.sin(phi[i]) * np.sin(theta[i])
100
+ z = np.cos(phi[i])
101
+ G.nodes[node]["x"] = x
102
+ G.nodes[node]["y"] = y
103
+ G.nodes[node]["z"] = z
104
+
105
+
106
+ def _normalize_graph_coordinates(G: nx.Graph) -> None:
107
+ """Normalize the x and y coordinates of the nodes in the graph to the [0, 1] range.
108
+
109
+ Args:
110
+ G (nx.Graph): The input graph with nodes having 'x' and 'y' coordinates.
111
+ """
112
+ # Extract x, y coordinates from the graph nodes
113
+ xy_coords = np.array([[G.nodes[node]["x"], G.nodes[node]["y"]] for node in G.nodes()])
114
+ # Calculate min and max values for x and y
115
+ min_vals = np.min(xy_coords, axis=0)
116
+ max_vals = np.max(xy_coords, axis=0)
117
+ # Normalize the coordinates to [0, 1]
118
+ normalized_xy = (xy_coords - min_vals) / (max_vals - min_vals)
119
+ # Update the node coordinates with the normalized values
120
+ for i, node in enumerate(G.nodes()):
121
+ G.nodes[node]["x"], G.nodes[node]["y"] = normalized_xy[i]
122
+
123
+
124
+ def _normalize_weights(G: nx.Graph) -> None:
125
+ """Normalize the weights of the edges in the graph.
126
+
127
+ Args:
128
+ G (nx.Graph): The input graph with weighted edges.
129
+ """
130
+ # "weight" is present for all edges - weights are 1.0 if weight was not specified by the user
131
+ weights = [data["weight"] for _, _, data in G.edges(data=True)]
132
+ if weights: # Ensure there are weighted edges
133
+ min_weight = min(weights)
134
+ max_weight = max(weights)
135
+ range_weight = max_weight - min_weight if max_weight > min_weight else 1
136
+ for _, _, data in G.edges(data=True):
137
+ data["normalized_weight"] = (data["weight"] - min_weight) / range_weight
138
+
139
+
140
+ def _create_depth(G: nx.Graph, surface_depth: float = 0.0) -> nx.Graph:
141
+ """Adjust the 'z' attribute of each node based on the subcluster strengths and normalized surface depth.
142
+
143
+ Args:
144
+ G (nx.Graph): The input graph.
145
+ surface_depth (float): The maximum surface depth to apply for the strongest subcluster.
146
+
147
+ Returns:
148
+ nx.Graph: The graph with adjusted 'z' attribute for each node.
149
+ """
150
+ if surface_depth >= 1.0:
151
+ surface_depth = surface_depth - 1e-6 # Cap the surface depth to prevent value of 1.0
152
+
153
+ # Compute subclusters as connected components (subclusters can be any other method)
154
+ subclusters = {node: set(nx.node_connected_component(G, node)) for node in G.nodes}
155
+ # Create a strength metric for subclusters (here using size)
156
+ subcluster_strengths = {node: len(neighbors) for node, neighbors in subclusters.items()}
157
+ # Normalize the subcluster strengths and apply depths
158
+ max_strength = max(subcluster_strengths.values())
159
+ for node, strength in subcluster_strengths.items():
160
+ normalized_surface_depth = (strength / max_strength) * surface_depth
161
+ x, y, z = G.nodes[node]["x"], G.nodes[node]["y"], G.nodes[node]["z"]
162
+ norm = np.sqrt(x**2 + y**2 + z**2)
163
+ G.nodes[node]["z"] -= (z / norm) * normalized_surface_depth # Adjust Z for a depth
164
+
165
+ return G