risk-network 0.0.7b11__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ risk/neighborhoods/neighborhoods
5
5
 
6
6
  import random
7
7
  import warnings
8
- from typing import Any, Dict, List, Tuple
8
+ from typing import Any, Dict, List, Tuple, Union
9
9
 
10
10
  import networkx as nx
11
11
  import numpy as np
@@ -28,50 +28,82 @@ warnings.filterwarnings(action="ignore", category=DataConversionWarning)
28
28
 
29
29
  def get_network_neighborhoods(
30
30
  network: nx.Graph,
31
- distance_metric: str = "louvain",
32
- edge_length_threshold: float = 1.0,
31
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
32
+ edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 1.0,
33
33
  louvain_resolution: float = 1.0,
34
34
  random_seed: int = 888,
35
35
  ) -> np.ndarray:
36
- """Calculate the neighborhoods for each node in the network based on the specified distance metric.
36
+ """Calculate the combined neighborhoods for each node based on the specified community detection algorithm(s).
37
37
 
38
38
  Args:
39
39
  network (nx.Graph): The network graph.
40
- distance_metric (str): The distance metric to use ('greedy_modularity', 'louvain', 'label_propagation',
41
- 'markov_clustering', 'walktrap', 'spinglass').
42
- edge_length_threshold (float): The edge length threshold for the neighborhoods.
40
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
41
+ metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
42
+ 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
43
+ edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
44
+ Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
45
+ Defaults to 1.0.
43
46
  louvain_resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 1.0.
44
47
  random_seed (int, optional): Random seed for methods requiring random initialization. Defaults to 888.
45
48
 
46
49
  Returns:
47
- np.ndarray: Neighborhood matrix calculated based on the selected distance metric.
50
+ np.ndarray: Summed neighborhood matrix from all selected algorithms.
48
51
  """
49
- # Set random seed for reproducibility in all methods besides Louvain, which requires a separate seed
52
+ # Set random seed for reproducibility
50
53
  random.seed(random_seed)
51
54
  np.random.seed(random_seed)
52
55
 
53
- # Create a subgraph based on the edge length percentile threshold
54
- network = _create_percentile_limited_subgraph(
55
- network, edge_length_percentile=edge_length_threshold
56
- )
56
+ # Ensure distance_metric is a list/tuple for multi-algorithm handling
57
+ if isinstance(distance_metric, (str, np.ndarray)):
58
+ distance_metric = [distance_metric]
59
+ # Ensure edge_length_threshold is a list/tuple for multi-threshold handling
60
+ if isinstance(edge_length_threshold, (float, int)):
61
+ edge_length_threshold = [edge_length_threshold] * len(distance_metric)
62
+ # Check that the number of distance metrics matches the number of edge length thresholds
63
+ if len(distance_metric) != len(edge_length_threshold):
64
+ raise ValueError(
65
+ "The number of distance metrics must match the number of edge length thresholds."
66
+ )
57
67
 
58
- if distance_metric == "louvain":
59
- return calculate_louvain_neighborhoods(network, louvain_resolution, random_seed=random_seed)
60
- if distance_metric == "greedy_modularity":
61
- return calculate_greedy_modularity_neighborhoods(network)
62
- if distance_metric == "label_propagation":
63
- return calculate_label_propagation_neighborhoods(network)
64
- if distance_metric == "markov_clustering":
65
- return calculate_markov_clustering_neighborhoods(network)
66
- if distance_metric == "walktrap":
67
- return calculate_walktrap_neighborhoods(network)
68
- if distance_metric == "spinglass":
69
- return calculate_spinglass_neighborhoods(network)
70
-
71
- raise ValueError(
72
- "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'louvain',"
73
- "'label_propagation', 'markov_clustering', 'walktrap', 'spinglass'."
74
- )
68
+ # Initialize combined neighborhood matrix
69
+ num_nodes = network.number_of_nodes()
70
+ combined_neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
71
+
72
+ # Loop through each distance metric and corresponding edge length threshold
73
+ for metric, threshold in zip(distance_metric, edge_length_threshold):
74
+ # Create a subgraph based on the specific edge length threshold for this algorithm
75
+ subgraph = _create_percentile_limited_subgraph(network, edge_length_percentile=threshold)
76
+ # Call the appropriate neighborhood function based on the metric
77
+ if metric == "louvain":
78
+ neighborhoods = calculate_louvain_neighborhoods(
79
+ subgraph, louvain_resolution, random_seed=random_seed
80
+ )
81
+ elif metric == "greedy_modularity":
82
+ neighborhoods = calculate_greedy_modularity_neighborhoods(subgraph)
83
+ elif metric == "label_propagation":
84
+ neighborhoods = calculate_label_propagation_neighborhoods(subgraph)
85
+ elif metric == "markov_clustering":
86
+ neighborhoods = calculate_markov_clustering_neighborhoods(subgraph)
87
+ elif metric == "walktrap":
88
+ neighborhoods = calculate_walktrap_neighborhoods(subgraph)
89
+ elif metric == "spinglass":
90
+ neighborhoods = calculate_spinglass_neighborhoods(subgraph)
91
+ else:
92
+ raise ValueError(
93
+ "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'louvain',"
94
+ "'label_propagation', 'markov_clustering', 'walktrap', 'spinglass'."
95
+ )
96
+
97
+ # Sum the neighborhood matrices
98
+ combined_neighborhoods += neighborhoods
99
+
100
+ # Ensure that the maximum value in each row is set to 1
101
+ # This ensures that for each row, only the strongest relationship (the maximum value) is retained,
102
+ # while all other values are reset to 0. This transformation simplifies the neighborhood matrix by
103
+ # focusing on the most significant connection per row.
104
+ combined_neighborhoods = _set_max_to_one(combined_neighborhoods)
105
+
106
+ return combined_neighborhoods
75
107
 
76
108
 
77
109
  def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: float) -> nx.Graph:
@@ -110,6 +142,25 @@ def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: flo
110
142
  return subgraph
111
143
 
112
144
 
145
+ def _set_max_to_one(matrix: np.ndarray) -> np.ndarray:
146
+ """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0.
147
+
148
+ Args:
149
+ matrix (np.ndarray): A 2D numpy array representing the neighborhood matrix.
150
+
151
+ Returns:
152
+ np.ndarray: The modified matrix where only the maximum value(s) in each row is set to 1, and others are set to 0.
153
+ """
154
+ # Find the maximum value in each row (column-wise max operation)
155
+ max_values = np.max(matrix, axis=1, keepdims=True)
156
+ # Create a boolean mask where elements are True if they are the max value in their row
157
+ max_mask = matrix == max_values
158
+ # Set all elements to 0, and then set the maximum value positions to 1
159
+ matrix[:] = 0 # Set everything to 0
160
+ matrix[max_mask] = 1 # Set only the max values to 1
161
+ return matrix
162
+
163
+
113
164
  def process_neighborhoods(
114
165
  network: nx.Graph,
115
166
  neighborhoods: Dict[str, Any],
@@ -120,47 +171,47 @@ def process_neighborhoods(
120
171
 
121
172
  Args:
122
173
  network (nx.Graph): The network data structure used for imputing and pruning neighbors.
123
- neighborhoods (dict): Dictionary containing 'enrichment_matrix', 'binary_enrichment_matrix', and 'significant_enrichment_matrix'.
174
+ neighborhoods (Dict[str, Any]): Dictionary containing 'enrichment_matrix', 'significant_binary_enrichment_matrix', and 'significant_enrichment_matrix'.
124
175
  impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
125
176
  prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
126
177
 
127
178
  Returns:
128
- dict: Processed neighborhoods data, including the updated matrices and enrichment counts.
179
+ Dict[str, Any]: Processed neighborhoods data, including the updated matrices and enrichment counts.
129
180
  """
130
181
  enrichment_matrix = neighborhoods["enrichment_matrix"]
131
- binary_enrichment_matrix = neighborhoods["binary_enrichment_matrix"]
182
+ significant_binary_enrichment_matrix = neighborhoods["significant_binary_enrichment_matrix"]
132
183
  significant_enrichment_matrix = neighborhoods["significant_enrichment_matrix"]
133
- logger.info(f"Imputation depth: {impute_depth}")
184
+ logger.debug(f"Imputation depth: {impute_depth}")
134
185
  if impute_depth:
135
186
  (
136
187
  enrichment_matrix,
137
- binary_enrichment_matrix,
188
+ significant_binary_enrichment_matrix,
138
189
  significant_enrichment_matrix,
139
190
  ) = _impute_neighbors(
140
191
  network,
141
192
  enrichment_matrix,
142
- binary_enrichment_matrix,
193
+ significant_binary_enrichment_matrix,
143
194
  max_depth=impute_depth,
144
195
  )
145
196
 
146
- logger.info(f"Pruning threshold: {prune_threshold}")
197
+ logger.debug(f"Pruning threshold: {prune_threshold}")
147
198
  if prune_threshold:
148
199
  (
149
200
  enrichment_matrix,
150
- binary_enrichment_matrix,
201
+ significant_binary_enrichment_matrix,
151
202
  significant_enrichment_matrix,
152
203
  ) = _prune_neighbors(
153
204
  network,
154
205
  enrichment_matrix,
155
- binary_enrichment_matrix,
206
+ significant_binary_enrichment_matrix,
156
207
  distance_threshold=prune_threshold,
157
208
  )
158
209
 
159
- neighborhood_enrichment_counts = np.sum(binary_enrichment_matrix, axis=0)
210
+ neighborhood_enrichment_counts = np.sum(significant_binary_enrichment_matrix, axis=0)
160
211
  node_enrichment_sums = np.sum(enrichment_matrix, axis=1)
161
212
  return {
162
213
  "enrichment_matrix": enrichment_matrix,
163
- "binary_enrichment_matrix": binary_enrichment_matrix,
214
+ "significant_binary_enrichment_matrix": significant_binary_enrichment_matrix,
164
215
  "significant_enrichment_matrix": significant_enrichment_matrix,
165
216
  "neighborhood_enrichment_counts": neighborhood_enrichment_counts,
166
217
  "node_enrichment_sums": node_enrichment_sums,
@@ -170,7 +221,7 @@ def process_neighborhoods(
170
221
  def _impute_neighbors(
171
222
  network: nx.Graph,
172
223
  enrichment_matrix: np.ndarray,
173
- binary_enrichment_matrix: np.ndarray,
224
+ significant_binary_enrichment_matrix: np.ndarray,
174
225
  max_depth: int = 3,
175
226
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
176
227
  """Impute rows with sums of zero in the enrichment matrix based on the closest non-zero neighbors in the network graph.
@@ -178,7 +229,7 @@ def _impute_neighbors(
178
229
  Args:
179
230
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
180
231
  enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
181
- binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
232
+ significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
182
233
  max_depth (int): Maximum depth of nodes to traverse for imputing values.
183
234
 
184
235
  Returns:
@@ -188,19 +239,21 @@ def _impute_neighbors(
188
239
  - np.ndarray: The significant enrichment matrix with non-significant entries set to zero.
189
240
  """
190
241
  # Calculate the distance threshold value based on the shortest distances
191
- enrichment_matrix, binary_enrichment_matrix = _impute_neighbors_with_similarity(
192
- network, enrichment_matrix, binary_enrichment_matrix, max_depth=max_depth
242
+ enrichment_matrix, significant_binary_enrichment_matrix = _impute_neighbors_with_similarity(
243
+ network, enrichment_matrix, significant_binary_enrichment_matrix, max_depth=max_depth
193
244
  )
194
245
  # Create a matrix where non-significant entries are set to zero
195
- significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
246
+ significant_enrichment_matrix = np.where(
247
+ significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
248
+ )
196
249
 
197
- return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
250
+ return enrichment_matrix, significant_binary_enrichment_matrix, significant_enrichment_matrix
198
251
 
199
252
 
200
253
  def _impute_neighbors_with_similarity(
201
254
  network: nx.Graph,
202
255
  enrichment_matrix: np.ndarray,
203
- binary_enrichment_matrix: np.ndarray,
256
+ significant_binary_enrichment_matrix: np.ndarray,
204
257
  max_depth: int = 3,
205
258
  ) -> Tuple[np.ndarray, np.ndarray]:
206
259
  """Impute non-enriched nodes based on the closest enriched neighbors' profiles and their similarity.
@@ -208,7 +261,7 @@ def _impute_neighbors_with_similarity(
208
261
  Args:
209
262
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
210
263
  enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
211
- binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
264
+ significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
212
265
  max_depth (int): Maximum depth of nodes to traverse for imputing values.
213
266
 
214
267
  Returns:
@@ -217,27 +270,31 @@ def _impute_neighbors_with_similarity(
217
270
  - The imputed alpha threshold matrix.
218
271
  """
219
272
  depth = 1
220
- rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
273
+ rows_to_impute = np.where(significant_binary_enrichment_matrix.sum(axis=1) == 0)[0]
221
274
  while len(rows_to_impute) and depth <= max_depth:
222
275
  # Iterate over all enriched nodes
223
- for row_index in range(binary_enrichment_matrix.shape[0]):
224
- if binary_enrichment_matrix[row_index].sum() != 0:
225
- enrichment_matrix, binary_enrichment_matrix = _process_node_imputation(
226
- row_index, network, enrichment_matrix, binary_enrichment_matrix, depth
276
+ for row_index in range(significant_binary_enrichment_matrix.shape[0]):
277
+ if significant_binary_enrichment_matrix[row_index].sum() != 0:
278
+ enrichment_matrix, significant_binary_enrichment_matrix = _process_node_imputation(
279
+ row_index,
280
+ network,
281
+ enrichment_matrix,
282
+ significant_binary_enrichment_matrix,
283
+ depth,
227
284
  )
228
285
 
229
286
  # Update rows to impute for the next iteration
230
- rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
287
+ rows_to_impute = np.where(significant_binary_enrichment_matrix.sum(axis=1) == 0)[0]
231
288
  depth += 1
232
289
 
233
- return enrichment_matrix, binary_enrichment_matrix
290
+ return enrichment_matrix, significant_binary_enrichment_matrix
234
291
 
235
292
 
236
293
  def _process_node_imputation(
237
294
  row_index: int,
238
295
  network: nx.Graph,
239
296
  enrichment_matrix: np.ndarray,
240
- binary_enrichment_matrix: np.ndarray,
297
+ significant_binary_enrichment_matrix: np.ndarray,
241
298
  depth: int,
242
299
  ) -> Tuple[np.ndarray, np.ndarray]:
243
300
  """Process the imputation for a single node based on its enriched neighbors.
@@ -246,7 +303,7 @@ def _process_node_imputation(
246
303
  row_index (int): The index of the enriched node being processed.
247
304
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
248
305
  enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
249
- binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
306
+ significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
250
307
  depth (int): Current depth for traversal.
251
308
 
252
309
  Returns:
@@ -259,7 +316,7 @@ def _process_node_imputation(
259
316
  n
260
317
  for n in neighbors
261
318
  if n != row_index
262
- and binary_enrichment_matrix[n].sum() != 0
319
+ and significant_binary_enrichment_matrix[n].sum() != 0
263
320
  and enrichment_matrix[n].sum() != 0
264
321
  ]
265
322
  # Filter non-enriched neighbors
@@ -267,7 +324,7 @@ def _process_node_imputation(
267
324
  n
268
325
  for n in neighbors
269
326
  if n != row_index
270
- and binary_enrichment_matrix[n].sum() == 0
327
+ and significant_binary_enrichment_matrix[n].sum() == 0
271
328
  and enrichment_matrix[n].sum() == 0
272
329
  ]
273
330
  # If there are valid non-enriched neighbors
@@ -312,15 +369,17 @@ def _process_node_imputation(
312
369
  enrichment_matrix[most_similar_neighbor] = enrichment_matrix[row_index] / np.sqrt(
313
370
  depth + 1
314
371
  )
315
- binary_enrichment_matrix[most_similar_neighbor] = binary_enrichment_matrix[row_index]
372
+ significant_binary_enrichment_matrix[most_similar_neighbor] = (
373
+ significant_binary_enrichment_matrix[row_index]
374
+ )
316
375
 
317
- return enrichment_matrix, binary_enrichment_matrix
376
+ return enrichment_matrix, significant_binary_enrichment_matrix
318
377
 
319
378
 
320
379
  def _prune_neighbors(
321
380
  network: nx.Graph,
322
381
  enrichment_matrix: np.ndarray,
323
- binary_enrichment_matrix: np.ndarray,
382
+ significant_binary_enrichment_matrix: np.ndarray,
324
383
  distance_threshold: float = 0.9,
325
384
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
326
385
  """Remove outliers based on their rank for edge lengths.
@@ -328,7 +387,7 @@ def _prune_neighbors(
328
387
  Args:
329
388
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
330
389
  enrichment_matrix (np.ndarray): The enrichment matrix.
331
- binary_enrichment_matrix (np.ndarray): The alpha threshold matrix.
390
+ significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix.
332
391
  distance_threshold (float): Rank threshold (0 to 1) to determine outliers.
333
392
 
334
393
  Returns:
@@ -338,10 +397,12 @@ def _prune_neighbors(
338
397
  - np.ndarray: The significant enrichment matrix, where non-significant entries are set to zero.
339
398
  """
340
399
  # Identify indices with non-zero rows in the binary enrichment matrix
341
- non_zero_indices = np.where(binary_enrichment_matrix.sum(axis=1) != 0)[0]
400
+ non_zero_indices = np.where(significant_binary_enrichment_matrix.sum(axis=1) != 0)[0]
342
401
  median_distances = []
343
402
  for node in non_zero_indices:
344
- neighbors = [n for n in network.neighbors(node) if binary_enrichment_matrix[n].sum() != 0]
403
+ neighbors = [
404
+ n for n in network.neighbors(node) if significant_binary_enrichment_matrix[n].sum() != 0
405
+ ]
345
406
  if neighbors:
346
407
  median_distance = np.median(
347
408
  [_get_euclidean_distance(node, n, network) for n in neighbors]
@@ -353,7 +414,9 @@ def _prune_neighbors(
353
414
  # Prune nodes that are outliers based on the distance threshold
354
415
  for row_index in non_zero_indices:
355
416
  neighbors = [
356
- n for n in network.neighbors(row_index) if binary_enrichment_matrix[n].sum() != 0
417
+ n
418
+ for n in network.neighbors(row_index)
419
+ if significant_binary_enrichment_matrix[n].sum() != 0
357
420
  ]
358
421
  if neighbors:
359
422
  median_distance = np.median(
@@ -361,12 +424,14 @@ def _prune_neighbors(
361
424
  )
362
425
  if median_distance >= distance_threshold_value:
363
426
  enrichment_matrix[row_index] = 0
364
- binary_enrichment_matrix[row_index] = 0
427
+ significant_binary_enrichment_matrix[row_index] = 0
365
428
 
366
429
  # Create a matrix where non-significant entries are set to zero
367
- significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
430
+ significant_enrichment_matrix = np.where(
431
+ significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
432
+ )
368
433
 
369
- return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
434
+ return enrichment_matrix, significant_binary_enrichment_matrix, significant_enrichment_matrix
370
435
 
371
436
 
372
437
  def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
@@ -408,7 +473,7 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
408
473
  """Calculate the distance threshold based on the given median distances and a percentile threshold.
409
474
 
410
475
  Args:
411
- median_distances (list): An array of median distances.
476
+ median_distances (List): An array of median distances.
412
477
  distance_threshold (float): A percentile threshold (0 to 1) used to determine the distance cutoff.
413
478
 
414
479
  Returns:
risk/network/geometry.py CHANGED
@@ -3,6 +3,8 @@ risk/network/geometry
3
3
  ~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
+ import copy
7
+
6
8
  import networkx as nx
7
9
  import numpy as np
8
10
 
@@ -55,10 +57,10 @@ def assign_edge_lengths(
55
57
  if compute_sphere:
56
58
  # Map to sphere and adjust depth
57
59
  _map_to_sphere(G)
58
- G_depth = _create_depth(G.copy(), surface_depth=surface_depth)
60
+ G_depth = _create_depth(copy.deepcopy(G), surface_depth=surface_depth)
59
61
  else:
60
62
  # Calculate edge lengths directly on the plane
61
- G_depth = G.copy()
63
+ G_depth = copy.deepcopy(G)
62
64
 
63
65
  for u, v, _ in G_depth.edges(data=True):
64
66
  u_coords = np.array([G_depth.nodes[u]["x"], G_depth.nodes[u]["y"]])
@@ -68,6 +70,7 @@ def assign_edge_lengths(
68
70
  v_coords = np.append(v_coords, G_depth.nodes[v].get("z", 0))
69
71
 
70
72
  distance = compute_distance(u_coords, v_coords, is_sphere=compute_sphere)
73
+ # Assign edge lengths to the original graph
71
74
  if include_edge_weight:
72
75
  # Square root of the normalized weight is used to minimize the effect of large weights
73
76
  G.edges[u, v]["length"] = distance / np.sqrt(G.edges[u, v]["normalized_weight"] + 1e-6)