risk-network 0.0.8b27__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. risk/__init__.py +2 -2
  2. risk/annotations/__init__.py +2 -2
  3. risk/annotations/annotations.py +195 -118
  4. risk/annotations/io.py +47 -31
  5. risk/log/__init__.py +4 -2
  6. risk/log/console.py +3 -1
  7. risk/log/{params.py → parameters.py} +17 -42
  8. risk/neighborhoods/__init__.py +3 -5
  9. risk/neighborhoods/api.py +442 -0
  10. risk/neighborhoods/community.py +324 -101
  11. risk/neighborhoods/domains.py +125 -52
  12. risk/neighborhoods/neighborhoods.py +177 -165
  13. risk/network/__init__.py +1 -3
  14. risk/network/geometry.py +71 -89
  15. risk/network/graph/__init__.py +6 -0
  16. risk/network/graph/api.py +200 -0
  17. risk/network/{graph.py → graph/graph.py} +90 -40
  18. risk/network/graph/summary.py +254 -0
  19. risk/network/io.py +103 -114
  20. risk/network/plotter/__init__.py +6 -0
  21. risk/network/plotter/api.py +54 -0
  22. risk/network/{plot → plotter}/canvas.py +9 -8
  23. risk/network/{plot → plotter}/contour.py +27 -24
  24. risk/network/{plot → plotter}/labels.py +73 -78
  25. risk/network/{plot → plotter}/network.py +45 -39
  26. risk/network/{plot → plotter}/plotter.py +23 -17
  27. risk/network/{plot/utils/color.py → plotter/utils/colors.py} +114 -122
  28. risk/network/{plot → plotter}/utils/layout.py +10 -7
  29. risk/risk.py +11 -500
  30. risk/stats/__init__.py +10 -4
  31. risk/stats/permutation/__init__.py +1 -1
  32. risk/stats/permutation/permutation.py +44 -38
  33. risk/stats/permutation/test_functions.py +26 -18
  34. risk/stats/{stats.py → significance.py} +17 -15
  35. risk/stats/stat_tests.py +267 -0
  36. {risk_network-0.0.8b27.dist-info → risk_network-0.0.9.dist-info}/METADATA +31 -46
  37. risk_network-0.0.9.dist-info/RECORD +40 -0
  38. {risk_network-0.0.8b27.dist-info → risk_network-0.0.9.dist-info}/WHEEL +1 -1
  39. risk/constants.py +0 -31
  40. risk/network/plot/__init__.py +0 -6
  41. risk/stats/hypergeom.py +0 -54
  42. risk/stats/poisson.py +0 -44
  43. risk_network-0.0.8b27.dist-info/RECORD +0 -37
  44. {risk_network-0.0.8b27.dist-info → risk_network-0.0.9.dist-info}/LICENSE +0 -0
  45. {risk_network-0.0.8b27.dist-info → risk_network-0.0.9.dist-info}/top_level.txt +0 -0
@@ -9,12 +9,14 @@ from typing import Any, Dict, List, Tuple, Union
9
9
 
10
10
  import networkx as nx
11
11
  import numpy as np
12
+ from scipy.sparse import csr_matrix
12
13
  from sklearn.exceptions import DataConversionWarning
13
14
  from sklearn.metrics.pairwise import cosine_similarity
14
15
 
15
16
  from risk.neighborhoods.community import (
16
17
  calculate_greedy_modularity_neighborhoods,
17
18
  calculate_label_propagation_neighborhoods,
19
+ calculate_leiden_neighborhoods,
18
20
  calculate_louvain_neighborhoods,
19
21
  calculate_markov_clustering_neighborhoods,
20
22
  calculate_spinglass_neighborhoods,
@@ -29,121 +31,118 @@ warnings.filterwarnings(action="ignore", category=DataConversionWarning)
29
31
  def get_network_neighborhoods(
30
32
  network: nx.Graph,
31
33
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
32
- edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 1.0,
33
- louvain_resolution: float = 1.0,
34
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 1.0,
35
+ louvain_resolution: float = 0.1,
36
+ leiden_resolution: float = 1.0,
34
37
  random_seed: int = 888,
35
- ) -> np.ndarray:
36
- """Calculate the combined neighborhoods for each node based on the specified community detection algorithm(s).
38
+ ) -> csr_matrix:
39
+ """Calculate the combined neighborhoods for each node using sparse matrices.
37
40
 
38
41
  Args:
39
42
  network (nx.Graph): The network graph.
40
- distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
41
- metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
42
- 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
43
- edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
44
- Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
45
- Defaults to 1.0.
46
- louvain_resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 1.0.
47
- random_seed (int, optional): Random seed for methods requiring random initialization. Defaults to 888.
43
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use.
44
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction thresholds.
45
+ louvain_resolution (float, optional): Resolution parameter for the Louvain method.
46
+ leiden_resolution (float, optional): Resolution parameter for the Leiden method.
47
+ random_seed (int, optional): Random seed for methods requiring random initialization.
48
48
 
49
49
  Returns:
50
- np.ndarray: Summed neighborhood matrix from all selected algorithms.
50
+ csr_matrix: The combined neighborhood matrix.
51
51
  """
52
52
  # Set random seed for reproducibility
53
53
  random.seed(random_seed)
54
54
  np.random.seed(random_seed)
55
55
 
56
- # Ensure distance_metric is a list/tuple for multi-algorithm handling
56
+ # Ensure distance_metric is a list for multi-algorithm handling
57
57
  if isinstance(distance_metric, (str, np.ndarray)):
58
58
  distance_metric = [distance_metric]
59
- # Ensure edge_length_threshold is a list/tuple for multi-threshold handling
60
- if isinstance(edge_length_threshold, (float, int)):
61
- edge_length_threshold = [edge_length_threshold] * len(distance_metric)
62
- # Check that the number of distance metrics matches the number of edge length thresholds
63
- if len(distance_metric) != len(edge_length_threshold):
59
+ # Ensure fraction_shortest_edges is a list for multi-threshold handling
60
+ if isinstance(fraction_shortest_edges, (float, int)):
61
+ fraction_shortest_edges = [fraction_shortest_edges] * len(distance_metric)
62
+ # Validate matching lengths of distance metrics and thresholds
63
+ if len(distance_metric) != len(fraction_shortest_edges):
64
64
  raise ValueError(
65
65
  "The number of distance metrics must match the number of edge length thresholds."
66
66
  )
67
67
 
68
- # Initialize combined neighborhood matrix
68
+ # Initialize a sparse LIL matrix for incremental updates
69
69
  num_nodes = network.number_of_nodes()
70
- combined_neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
71
-
72
- # Loop through each distance metric and corresponding edge length threshold
73
- for metric, threshold in zip(distance_metric, edge_length_threshold):
74
- # Create a subgraph based on the specific edge length threshold for this algorithm
75
- subgraph = _create_percentile_limited_subgraph(network, edge_length_percentile=threshold)
76
- # Call the appropriate neighborhood function based on the metric
77
- if metric == "louvain":
78
- neighborhoods = calculate_louvain_neighborhoods(
79
- subgraph, louvain_resolution, random_seed=random_seed
70
+ # Initialize a sparse matrix with the same shape as the network
71
+ combined_neighborhoods = csr_matrix((num_nodes, num_nodes), dtype=np.uint8)
72
+ # Loop through each distance metric and corresponding edge rank fraction
73
+ for metric, percentile in zip(distance_metric, fraction_shortest_edges):
74
+ # Compute neighborhoods for the specified metric
75
+ if metric == "greedy_modularity":
76
+ neighborhoods = calculate_greedy_modularity_neighborhoods(
77
+ network, fraction_shortest_edges=percentile
80
78
  )
81
- elif metric == "greedy_modularity":
82
- neighborhoods = calculate_greedy_modularity_neighborhoods(subgraph)
83
79
  elif metric == "label_propagation":
84
- neighborhoods = calculate_label_propagation_neighborhoods(subgraph)
80
+ neighborhoods = calculate_label_propagation_neighborhoods(
81
+ network, fraction_shortest_edges=percentile
82
+ )
83
+ elif metric == "leiden":
84
+ neighborhoods = calculate_leiden_neighborhoods(
85
+ network,
86
+ resolution=leiden_resolution,
87
+ fraction_shortest_edges=percentile,
88
+ random_seed=random_seed,
89
+ )
90
+ elif metric == "louvain":
91
+ neighborhoods = calculate_louvain_neighborhoods(
92
+ network,
93
+ resolution=louvain_resolution,
94
+ fraction_shortest_edges=percentile,
95
+ random_seed=random_seed,
96
+ )
85
97
  elif metric == "markov_clustering":
86
- neighborhoods = calculate_markov_clustering_neighborhoods(subgraph)
87
- elif metric == "walktrap":
88
- neighborhoods = calculate_walktrap_neighborhoods(subgraph)
98
+ neighborhoods = calculate_markov_clustering_neighborhoods(
99
+ network, fraction_shortest_edges=percentile
100
+ )
89
101
  elif metric == "spinglass":
90
- neighborhoods = calculate_spinglass_neighborhoods(subgraph)
102
+ neighborhoods = calculate_spinglass_neighborhoods(
103
+ network, fraction_shortest_edges=percentile
104
+ )
105
+ elif metric == "walktrap":
106
+ neighborhoods = calculate_walktrap_neighborhoods(
107
+ network, fraction_shortest_edges=percentile
108
+ )
91
109
  else:
92
110
  raise ValueError(
93
- "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'louvain',"
94
- "'label_propagation', 'markov_clustering', 'walktrap', 'spinglass'."
111
+ "Invalid distance metric. Choose from: 'greedy_modularity', 'label_propagation',"
112
+ "'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
95
113
  )
96
114
 
97
- # Sum the neighborhood matrices
115
+ # Add the sparse neighborhood matrix
98
116
  combined_neighborhoods += neighborhoods
99
117
 
100
- # Ensure that the maximum value in each row is set to 1
101
- # This ensures that for each row, only the strongest relationship (the maximum value) is retained,
102
- # while all other values are reset to 0. This transformation simplifies the neighborhood matrix by
103
- # focusing on the most significant connection per row.
104
- combined_neighborhoods = _set_max_to_one(combined_neighborhoods)
118
+ # Ensure maximum value in each row is set to 1
119
+ combined_neighborhoods = _set_max_row_value_to_one_sparse(combined_neighborhoods)
105
120
 
106
121
  return combined_neighborhoods
107
122
 
108
123
 
109
- def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: float) -> nx.Graph:
110
- """Create a subgraph containing all nodes and edges where the edge length is below the
111
- specified percentile of all edge lengths in the input graph.
124
+ def _set_max_row_value_to_one_sparse(matrix: csr_matrix) -> csr_matrix:
125
+ """Set the maximum value in each row of a sparse matrix to 1.
112
126
 
113
127
  Args:
114
- G (nx.Graph): The input graph with 'length' attributes on edges.
115
- edge_length_percentile (float): The percentile (between 0 and 1) to filter edges by length.
128
+ matrix (csr_matrix): The input sparse matrix.
116
129
 
117
130
  Returns:
118
- nx.Graph: A subgraph with all nodes and edges where the edge length is below the
119
- calculated threshold length.
131
+ csr_matrix: The modified sparse matrix where only the maximum value in each row is set to 1.
120
132
  """
121
- # Extract edge lengths and handle missing lengths
122
- edge_lengths = [d["length"] for _, _, d in G.edges(data=True) if "length" in d]
123
- if not edge_lengths:
124
- raise ValueError(
125
- "No edge lengths found in the graph. Ensure edges have 'length' attributes."
126
- )
127
-
128
- # Calculate the specific edge length for the given percentile
129
- percentile_length = np.percentile(edge_lengths, edge_length_percentile * 100)
130
- # Create the subgraph by directly filtering edges during iteration
131
- subgraph = nx.Graph()
132
- subgraph.add_nodes_from(G.nodes(data=True)) # Retain all nodes from the original graph
133
- # Add edges below the specified percentile length in a single pass
134
- for u, v, d in G.edges(data=True):
135
- if d.get("length", 1) <= percentile_length:
136
- subgraph.add_edge(u, v, **d)
137
-
138
- # Return the subgraph; optionally check if it's too sparse
139
- if subgraph.number_of_edges() == 0:
140
- raise Warning("The resulting subgraph has no edges. Consider adjusting the percentile.")
133
+ # Iterate over each row and set the maximum value to 1
134
+ for i in range(matrix.shape[0]):
135
+ row_data = matrix[i].data
136
+ if len(row_data) > 0:
137
+ row_data[:] = (row_data == max(row_data)).astype(int)
141
138
 
142
- return subgraph
139
+ return matrix
143
140
 
144
141
 
145
- def _set_max_to_one(matrix: np.ndarray) -> np.ndarray:
146
- """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0.
142
+ def _set_max_row_value_to_one(matrix: np.ndarray) -> np.ndarray:
143
+ """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0. This is particularly
144
+ useful for neighborhood matrices that have undergone multiple neighborhood detection algorithms, where the
145
+ maximum value in each row represents the most significant relationship per node in the combined neighborhoods.
147
146
 
148
147
  Args:
149
148
  matrix (np.ndarray): A 2D numpy array representing the neighborhood matrix.
@@ -171,163 +170,170 @@ def process_neighborhoods(
171
170
 
172
171
  Args:
173
172
  network (nx.Graph): The network data structure used for imputing and pruning neighbors.
174
- neighborhoods (Dict[str, Any]): Dictionary containing 'enrichment_matrix', 'significant_binary_enrichment_matrix', and 'significant_enrichment_matrix'.
173
+ neighborhoods (Dict[str, Any]): Dictionary containing 'significance_matrix', 'significant_binary_significance_matrix', and 'significant_significance_matrix'.
175
174
  impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
176
175
  prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
177
176
 
178
177
  Returns:
179
- Dict[str, Any]: Processed neighborhoods data, including the updated matrices and enrichment counts.
178
+ Dict[str, Any]: Processed neighborhoods data, including the updated matrices and significance counts.
180
179
  """
181
- enrichment_matrix = neighborhoods["enrichment_matrix"]
182
- significant_binary_enrichment_matrix = neighborhoods["significant_binary_enrichment_matrix"]
183
- significant_enrichment_matrix = neighborhoods["significant_enrichment_matrix"]
180
+ significance_matrix = neighborhoods["significance_matrix"]
181
+ significant_binary_significance_matrix = neighborhoods["significant_binary_significance_matrix"]
182
+ significant_significance_matrix = neighborhoods["significant_significance_matrix"]
184
183
  logger.debug(f"Imputation depth: {impute_depth}")
185
184
  if impute_depth:
186
185
  (
187
- enrichment_matrix,
188
- significant_binary_enrichment_matrix,
189
- significant_enrichment_matrix,
186
+ significance_matrix,
187
+ significant_binary_significance_matrix,
188
+ significant_significance_matrix,
190
189
  ) = _impute_neighbors(
191
190
  network,
192
- enrichment_matrix,
193
- significant_binary_enrichment_matrix,
191
+ significance_matrix,
192
+ significant_binary_significance_matrix,
194
193
  max_depth=impute_depth,
195
194
  )
196
195
 
197
196
  logger.debug(f"Pruning threshold: {prune_threshold}")
198
197
  if prune_threshold:
199
198
  (
200
- enrichment_matrix,
201
- significant_binary_enrichment_matrix,
202
- significant_enrichment_matrix,
199
+ significance_matrix,
200
+ significant_binary_significance_matrix,
201
+ significant_significance_matrix,
203
202
  ) = _prune_neighbors(
204
203
  network,
205
- enrichment_matrix,
206
- significant_binary_enrichment_matrix,
204
+ significance_matrix,
205
+ significant_binary_significance_matrix,
207
206
  distance_threshold=prune_threshold,
208
207
  )
209
208
 
210
- neighborhood_enrichment_counts = np.sum(significant_binary_enrichment_matrix, axis=0)
211
- node_enrichment_sums = np.sum(enrichment_matrix, axis=1)
209
+ neighborhood_significance_counts = np.sum(significant_binary_significance_matrix, axis=0)
210
+ node_significance_sums = np.sum(significance_matrix, axis=1)
212
211
  return {
213
- "enrichment_matrix": enrichment_matrix,
214
- "significant_binary_enrichment_matrix": significant_binary_enrichment_matrix,
215
- "significant_enrichment_matrix": significant_enrichment_matrix,
216
- "neighborhood_enrichment_counts": neighborhood_enrichment_counts,
217
- "node_enrichment_sums": node_enrichment_sums,
212
+ "significance_matrix": significance_matrix,
213
+ "significant_binary_significance_matrix": significant_binary_significance_matrix,
214
+ "significant_significance_matrix": significant_significance_matrix,
215
+ "neighborhood_significance_counts": neighborhood_significance_counts,
216
+ "node_significance_sums": node_significance_sums,
218
217
  }
219
218
 
220
219
 
221
220
  def _impute_neighbors(
222
221
  network: nx.Graph,
223
- enrichment_matrix: np.ndarray,
224
- significant_binary_enrichment_matrix: np.ndarray,
222
+ significance_matrix: np.ndarray,
223
+ significant_binary_significance_matrix: np.ndarray,
225
224
  max_depth: int = 3,
226
225
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
227
- """Impute rows with sums of zero in the enrichment matrix based on the closest non-zero neighbors in the network graph.
226
+ """Impute rows with sums of zero in the significance matrix based on the closest non-zero neighbors in the network graph.
228
227
 
229
228
  Args:
230
229
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
231
- enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
232
- significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
230
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
231
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
233
232
  max_depth (int): Maximum depth of nodes to traverse for imputing values.
234
233
 
235
234
  Returns:
236
- tuple: A tuple containing:
237
- - np.ndarray: The imputed enrichment matrix.
235
+ Tuple[np.ndarray, np.ndarray, np.ndarray]:
236
+ - np.ndarray: The imputed significance matrix.
238
237
  - np.ndarray: The imputed alpha threshold matrix.
239
- - np.ndarray: The significant enrichment matrix with non-significant entries set to zero.
238
+ - np.ndarray: The significant significance matrix with non-significant entries set to zero.
240
239
  """
241
240
  # Calculate the distance threshold value based on the shortest distances
242
- enrichment_matrix, significant_binary_enrichment_matrix = _impute_neighbors_with_similarity(
243
- network, enrichment_matrix, significant_binary_enrichment_matrix, max_depth=max_depth
241
+ significance_matrix, significant_binary_significance_matrix = _impute_neighbors_with_similarity(
242
+ network, significance_matrix, significant_binary_significance_matrix, max_depth=max_depth
244
243
  )
245
244
  # Create a matrix where non-significant entries are set to zero
246
- significant_enrichment_matrix = np.where(
247
- significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
245
+ significant_significance_matrix = np.where(
246
+ significant_binary_significance_matrix == 1, significance_matrix, 0
248
247
  )
249
248
 
250
- return enrichment_matrix, significant_binary_enrichment_matrix, significant_enrichment_matrix
249
+ return (
250
+ significance_matrix,
251
+ significant_binary_significance_matrix,
252
+ significant_significance_matrix,
253
+ )
251
254
 
252
255
 
253
256
  def _impute_neighbors_with_similarity(
254
257
  network: nx.Graph,
255
- enrichment_matrix: np.ndarray,
256
- significant_binary_enrichment_matrix: np.ndarray,
258
+ significance_matrix: np.ndarray,
259
+ significant_binary_significance_matrix: np.ndarray,
257
260
  max_depth: int = 3,
258
261
  ) -> Tuple[np.ndarray, np.ndarray]:
259
- """Impute non-enriched nodes based on the closest enriched neighbors' profiles and their similarity.
262
+ """Impute non-significant nodes based on the closest significant neighbors' profiles and their similarity.
260
263
 
261
264
  Args:
262
265
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
263
- enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
264
- significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
266
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
267
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
265
268
  max_depth (int): Maximum depth of nodes to traverse for imputing values.
266
269
 
267
270
  Returns:
268
- Tuple[np.ndarray, np.ndarray]: A tuple containing:
269
- - The imputed enrichment matrix.
271
+ Tuple[np.ndarray, np.ndarray]:
272
+ - The imputed significance matrix.
270
273
  - The imputed alpha threshold matrix.
271
274
  """
272
275
  depth = 1
273
- rows_to_impute = np.where(significant_binary_enrichment_matrix.sum(axis=1) == 0)[0]
276
+ rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
274
277
  while len(rows_to_impute) and depth <= max_depth:
275
- # Iterate over all enriched nodes
276
- for row_index in range(significant_binary_enrichment_matrix.shape[0]):
277
- if significant_binary_enrichment_matrix[row_index].sum() != 0:
278
- enrichment_matrix, significant_binary_enrichment_matrix = _process_node_imputation(
278
+ # Iterate over all significant nodes
279
+ for row_index in range(significant_binary_significance_matrix.shape[0]):
280
+ if significant_binary_significance_matrix[row_index].sum() != 0:
281
+ (
282
+ significance_matrix,
283
+ significant_binary_significance_matrix,
284
+ ) = _process_node_imputation(
279
285
  row_index,
280
286
  network,
281
- enrichment_matrix,
282
- significant_binary_enrichment_matrix,
287
+ significance_matrix,
288
+ significant_binary_significance_matrix,
283
289
  depth,
284
290
  )
285
291
 
286
292
  # Update rows to impute for the next iteration
287
- rows_to_impute = np.where(significant_binary_enrichment_matrix.sum(axis=1) == 0)[0]
293
+ rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
288
294
  depth += 1
289
295
 
290
- return enrichment_matrix, significant_binary_enrichment_matrix
296
+ return significance_matrix, significant_binary_significance_matrix
291
297
 
292
298
 
293
299
  def _process_node_imputation(
294
300
  row_index: int,
295
301
  network: nx.Graph,
296
- enrichment_matrix: np.ndarray,
297
- significant_binary_enrichment_matrix: np.ndarray,
302
+ significance_matrix: np.ndarray,
303
+ significant_binary_significance_matrix: np.ndarray,
298
304
  depth: int,
299
305
  ) -> Tuple[np.ndarray, np.ndarray]:
300
- """Process the imputation for a single node based on its enriched neighbors.
306
+ """Process the imputation for a single node based on its significant neighbors.
301
307
 
302
308
  Args:
303
- row_index (int): The index of the enriched node being processed.
309
+ row_index (int): The index of the significant node being processed.
304
310
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
305
- enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
306
- significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
311
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
312
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
307
313
  depth (int): Current depth for traversal.
308
314
 
309
315
  Returns:
310
- Tuple[np.ndarray, np.ndarray]: The modified enrichment matrix and binary threshold matrix.
316
+ Tuple[np.ndarray, np.ndarray]: The modified significance matrix and binary threshold matrix.
311
317
  """
312
318
  # Check neighbors at the current depth
313
319
  neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
314
- # Filter annotated neighbors (already enriched)
320
+ # Filter annotated neighbors (already significant)
315
321
  annotated_neighbors = [
316
322
  n
317
323
  for n in neighbors
318
324
  if n != row_index
319
- and significant_binary_enrichment_matrix[n].sum() != 0
320
- and enrichment_matrix[n].sum() != 0
325
+ and significant_binary_significance_matrix[n].sum() != 0
326
+ and significance_matrix[n].sum() != 0
321
327
  ]
322
- # Filter non-enriched neighbors
328
+ # Filter non-significant neighbors
323
329
  valid_neighbors = [
324
330
  n
325
331
  for n in neighbors
326
332
  if n != row_index
327
- and significant_binary_enrichment_matrix[n].sum() == 0
328
- and enrichment_matrix[n].sum() == 0
333
+ and significant_binary_significance_matrix[n].sum() == 0
334
+ and significance_matrix[n].sum() == 0
329
335
  ]
330
- # If there are valid non-enriched neighbors
336
+ # If there are valid non-significant neighbors
331
337
  if valid_neighbors and annotated_neighbors:
332
338
  # Calculate distances to annotated neighbors
333
339
  distances_to_annotated = [
@@ -338,7 +344,7 @@ def _process_node_imputation(
338
344
  iqr = q3 - q1
339
345
  lower_bound = q1 - 1.5 * iqr
340
346
  upper_bound = q3 + 1.5 * iqr
341
- # Filter valid non-enriched neighbors that fall within the IQR bounds
347
+ # Filter valid non-significant neighbors that fall within the IQR bounds
342
348
  valid_neighbors_within_iqr = [
343
349
  n
344
350
  for n in valid_neighbors
@@ -352,8 +358,8 @@ def _process_node_imputation(
352
358
  def sum_pairwise_cosine_similarities(neighbor):
353
359
  return sum(
354
360
  cosine_similarity(
355
- enrichment_matrix[neighbor].reshape(1, -1),
356
- enrichment_matrix[other_neighbor].reshape(1, -1),
361
+ significance_matrix[neighbor].reshape(1, -1),
362
+ significance_matrix[other_neighbor].reshape(1, -1),
357
363
  )[0][0]
358
364
  for other_neighbor in valid_neighbors_within_iqr
359
365
  if other_neighbor != neighbor
@@ -365,43 +371,45 @@ def _process_node_imputation(
365
371
  else:
366
372
  most_similar_neighbor = valid_neighbors_within_iqr[0]
367
373
 
368
- # Impute the most similar non-enriched neighbor with the enriched node's data, scaled by depth
369
- enrichment_matrix[most_similar_neighbor] = enrichment_matrix[row_index] / np.sqrt(
374
+ # Impute the most similar non-significant neighbor with the significant node's data, scaled by depth
375
+ significance_matrix[most_similar_neighbor] = significance_matrix[row_index] / np.sqrt(
370
376
  depth + 1
371
377
  )
372
- significant_binary_enrichment_matrix[most_similar_neighbor] = (
373
- significant_binary_enrichment_matrix[row_index]
378
+ significant_binary_significance_matrix[most_similar_neighbor] = (
379
+ significant_binary_significance_matrix[row_index]
374
380
  )
375
381
 
376
- return enrichment_matrix, significant_binary_enrichment_matrix
382
+ return significance_matrix, significant_binary_significance_matrix
377
383
 
378
384
 
379
385
  def _prune_neighbors(
380
386
  network: nx.Graph,
381
- enrichment_matrix: np.ndarray,
382
- significant_binary_enrichment_matrix: np.ndarray,
387
+ significance_matrix: np.ndarray,
388
+ significant_binary_significance_matrix: np.ndarray,
383
389
  distance_threshold: float = 0.9,
384
390
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
385
391
  """Remove outliers based on their rank for edge lengths.
386
392
 
387
393
  Args:
388
394
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
389
- enrichment_matrix (np.ndarray): The enrichment matrix.
390
- significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix.
395
+ significance_matrix (np.ndarray): The significance matrix.
396
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix.
391
397
  distance_threshold (float): Rank threshold (0 to 1) to determine outliers.
392
398
 
393
399
  Returns:
394
- tuple: A tuple containing:
395
- - np.ndarray: The updated enrichment matrix with outliers set to zero.
400
+ Tuple[np.ndarray, np.ndarray, np.ndarray]:
401
+ - np.ndarray: The updated significance matrix with outliers set to zero.
396
402
  - np.ndarray: The updated alpha threshold matrix with outliers set to zero.
397
- - np.ndarray: The significant enrichment matrix, where non-significant entries are set to zero.
403
+ - np.ndarray: The significant significance matrix, where non-significant entries are set to zero.
398
404
  """
399
- # Identify indices with non-zero rows in the binary enrichment matrix
400
- non_zero_indices = np.where(significant_binary_enrichment_matrix.sum(axis=1) != 0)[0]
405
+ # Identify indices with non-zero rows in the binary significance matrix
406
+ non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
401
407
  median_distances = []
402
408
  for node in non_zero_indices:
403
409
  neighbors = [
404
- n for n in network.neighbors(node) if significant_binary_enrichment_matrix[n].sum() != 0
410
+ n
411
+ for n in network.neighbors(node)
412
+ if significant_binary_significance_matrix[n].sum() != 0
405
413
  ]
406
414
  if neighbors:
407
415
  median_distance = np.median(
@@ -416,22 +424,26 @@ def _prune_neighbors(
416
424
  neighbors = [
417
425
  n
418
426
  for n in network.neighbors(row_index)
419
- if significant_binary_enrichment_matrix[n].sum() != 0
427
+ if significant_binary_significance_matrix[n].sum() != 0
420
428
  ]
421
429
  if neighbors:
422
430
  median_distance = np.median(
423
431
  [_get_euclidean_distance(row_index, n, network) for n in neighbors]
424
432
  )
425
433
  if median_distance >= distance_threshold_value:
426
- enrichment_matrix[row_index] = 0
427
- significant_binary_enrichment_matrix[row_index] = 0
434
+ significance_matrix[row_index] = 0
435
+ significant_binary_significance_matrix[row_index] = 0
428
436
 
429
437
  # Create a matrix where non-significant entries are set to zero
430
- significant_enrichment_matrix = np.where(
431
- significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
438
+ significant_significance_matrix = np.where(
439
+ significant_binary_significance_matrix == 1, significance_matrix, 0
432
440
  )
433
441
 
434
- return enrichment_matrix, significant_binary_enrichment_matrix, significant_enrichment_matrix
442
+ return (
443
+ significance_matrix,
444
+ significant_binary_significance_matrix,
445
+ significant_significance_matrix,
446
+ )
435
447
 
436
448
 
437
449
  def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
@@ -481,7 +493,7 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
481
493
  """
482
494
  # Sort the median distances
483
495
  sorted_distances = np.sort(median_distances)
484
- # Compute the rank percentiles for the sorted distances
496
+ # Compute the rank fractions for the sorted distances
485
497
  rank_percentiles = np.linspace(0, 1, len(sorted_distances))
486
498
  # Interpolating the ranks to 1000 evenly spaced percentiles
487
499
  interpolated_percentiles = np.linspace(0, 1, 1000)
risk/network/__init__.py CHANGED
@@ -3,6 +3,4 @@ risk/network
3
3
  ~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from .graph import NetworkGraph
7
- from .io import NetworkIO
8
- from .plot import NetworkPlotter
6
+ from risk.network.io import NetworkIO