risk-network 0.0.8b18__py3-none-any.whl → 0.0.9b26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. risk/__init__.py +2 -2
  2. risk/annotations/__init__.py +2 -2
  3. risk/annotations/annotations.py +133 -72
  4. risk/annotations/io.py +50 -34
  5. risk/log/__init__.py +4 -2
  6. risk/log/{config.py → console.py} +5 -3
  7. risk/log/{params.py → parameters.py} +21 -46
  8. risk/neighborhoods/__init__.py +3 -5
  9. risk/neighborhoods/api.py +446 -0
  10. risk/neighborhoods/community.py +281 -96
  11. risk/neighborhoods/domains.py +92 -38
  12. risk/neighborhoods/neighborhoods.py +210 -149
  13. risk/network/__init__.py +1 -3
  14. risk/network/geometry.py +69 -58
  15. risk/network/graph/__init__.py +6 -0
  16. risk/network/graph/api.py +194 -0
  17. risk/network/graph/network.py +269 -0
  18. risk/network/graph/summary.py +254 -0
  19. risk/network/io.py +58 -48
  20. risk/network/plotter/__init__.py +6 -0
  21. risk/network/plotter/api.py +54 -0
  22. risk/network/{plot → plotter}/canvas.py +80 -26
  23. risk/network/{plot → plotter}/contour.py +43 -34
  24. risk/network/{plot → plotter}/labels.py +123 -113
  25. risk/network/plotter/network.py +424 -0
  26. risk/network/plotter/utils/colors.py +416 -0
  27. risk/network/plotter/utils/layout.py +94 -0
  28. risk/risk.py +11 -469
  29. risk/stats/__init__.py +8 -4
  30. risk/stats/binom.py +51 -0
  31. risk/stats/chi2.py +69 -0
  32. risk/stats/hypergeom.py +28 -18
  33. risk/stats/permutation/__init__.py +1 -1
  34. risk/stats/permutation/permutation.py +45 -39
  35. risk/stats/permutation/test_functions.py +25 -17
  36. risk/stats/poisson.py +17 -11
  37. risk/stats/stats.py +20 -16
  38. risk/stats/zscore.py +68 -0
  39. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
  40. risk_network-0.0.9b26.dist-info/RECORD +44 -0
  41. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
  42. risk/network/graph.py +0 -159
  43. risk/network/plot/__init__.py +0 -6
  44. risk/network/plot/network.py +0 -282
  45. risk/network/plot/plotter.py +0 -137
  46. risk/network/plot/utils/color.py +0 -353
  47. risk/network/plot/utils/layout.py +0 -53
  48. risk_network-0.0.8b18.dist-info/RECORD +0 -37
  49. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
  50. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@ risk/neighborhoods/neighborhoods
5
5
 
6
6
  import random
7
7
  import warnings
8
- from typing import Any, Dict, List, Tuple
8
+ from typing import Any, Dict, List, Tuple, Union
9
9
 
10
10
  import networkx as nx
11
11
  import numpy as np
@@ -15,6 +15,7 @@ from sklearn.metrics.pairwise import cosine_similarity
15
15
  from risk.neighborhoods.community import (
16
16
  calculate_greedy_modularity_neighborhoods,
17
17
  calculate_label_propagation_neighborhoods,
18
+ calculate_leiden_neighborhoods,
18
19
  calculate_louvain_neighborhoods,
19
20
  calculate_markov_clustering_neighborhoods,
20
21
  calculate_spinglass_neighborhoods,
@@ -28,86 +29,119 @@ warnings.filterwarnings(action="ignore", category=DataConversionWarning)
28
29
 
29
30
  def get_network_neighborhoods(
30
31
  network: nx.Graph,
31
- distance_metric: str = "louvain",
32
- edge_length_threshold: float = 1.0,
33
- louvain_resolution: float = 1.0,
32
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
33
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 1.0,
34
+ louvain_resolution: float = 0.1,
35
+ leiden_resolution: float = 1.0,
34
36
  random_seed: int = 888,
35
37
  ) -> np.ndarray:
36
- """Calculate the neighborhoods for each node in the network based on the specified distance metric.
38
+ """Calculate the combined neighborhoods for each node based on the specified community detection algorithm(s).
37
39
 
38
40
  Args:
39
41
  network (nx.Graph): The network graph.
40
- distance_metric (str): The distance metric to use ('greedy_modularity', 'louvain', 'label_propagation',
41
- 'markov_clustering', 'walktrap', 'spinglass').
42
- edge_length_threshold (float): The edge length threshold for the neighborhoods.
43
- louvain_resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 1.0.
44
- random_seed (int, optional): Random seed for methods requiring random initialization. Defaults to 888.
42
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use.
43
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
44
+ louvain_resolution (float, optional): Resolution parameter for the Louvain method.
45
+ leiden_resolution (float, optional): Resolution parameter for the Leiden method.
46
+ random_seed (int, optional): Random seed for methods requiring random initialization.
45
47
 
46
48
  Returns:
47
- np.ndarray: Neighborhood matrix calculated based on the selected distance metric.
49
+ np.ndarray: Summed neighborhood matrix from all selected algorithms.
48
50
  """
49
- # Set random seed for reproducibility in all methods besides Louvain, which requires a separate seed
51
+ # Set random seed for reproducibility
50
52
  random.seed(random_seed)
51
53
  np.random.seed(random_seed)
52
54
 
53
- # Create a subgraph based on the edge length percentile threshold
54
- network = _create_percentile_limited_subgraph(
55
- network, edge_length_percentile=edge_length_threshold
56
- )
55
+ # Ensure distance_metric is a list/tuple for multi-algorithm handling
56
+ if isinstance(distance_metric, (str, np.ndarray)):
57
+ distance_metric = [distance_metric]
58
+ # Ensure fraction_shortest_edges is a list/tuple for multi-threshold handling
59
+ if isinstance(fraction_shortest_edges, (float, int)):
60
+ fraction_shortest_edges = [fraction_shortest_edges] * len(distance_metric)
61
+ # Check that the number of distance metrics matches the number of edge length thresholds
62
+ if len(distance_metric) != len(fraction_shortest_edges):
63
+ raise ValueError(
64
+ "The number of distance metrics must match the number of edge length thresholds."
65
+ )
57
66
 
58
- if distance_metric == "louvain":
59
- return calculate_louvain_neighborhoods(network, louvain_resolution, random_seed=random_seed)
60
- if distance_metric == "greedy_modularity":
61
- return calculate_greedy_modularity_neighborhoods(network)
62
- if distance_metric == "label_propagation":
63
- return calculate_label_propagation_neighborhoods(network)
64
- if distance_metric == "markov_clustering":
65
- return calculate_markov_clustering_neighborhoods(network)
66
- if distance_metric == "walktrap":
67
- return calculate_walktrap_neighborhoods(network)
68
- if distance_metric == "spinglass":
69
- return calculate_spinglass_neighborhoods(network)
70
-
71
- raise ValueError(
72
- "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'louvain',"
73
- "'label_propagation', 'markov_clustering', 'walktrap', 'spinglass'."
74
- )
67
+ # Initialize combined neighborhood matrix
68
+ num_nodes = network.number_of_nodes()
69
+ combined_neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
75
70
 
71
+ # Loop through each distance metric and corresponding edge rank fraction
72
+ for metric, percentile in zip(distance_metric, fraction_shortest_edges):
73
+ # Call the appropriate neighborhood function based on the metric
74
+ if metric == "greedy_modularity":
75
+ neighborhoods = calculate_greedy_modularity_neighborhoods(
76
+ network, fraction_shortest_edges=percentile
77
+ )
78
+ elif metric == "label_propagation":
79
+ neighborhoods = calculate_label_propagation_neighborhoods(
80
+ network, fraction_shortest_edges=percentile
81
+ )
82
+ elif metric == "leiden":
83
+ neighborhoods = calculate_leiden_neighborhoods(
84
+ network,
85
+ resolution=leiden_resolution,
86
+ fraction_shortest_edges=percentile,
87
+ random_seed=random_seed,
88
+ )
89
+ elif metric == "louvain":
90
+ neighborhoods = calculate_louvain_neighborhoods(
91
+ network,
92
+ resolution=louvain_resolution,
93
+ fraction_shortest_edges=percentile,
94
+ random_seed=random_seed,
95
+ )
96
+ elif metric == "markov_clustering":
97
+ neighborhoods = calculate_markov_clustering_neighborhoods(
98
+ network, fraction_shortest_edges=percentile
99
+ )
100
+ elif metric == "spinglass":
101
+ neighborhoods = calculate_spinglass_neighborhoods(
102
+ network, fraction_shortest_edges=percentile
103
+ )
104
+ elif metric == "walktrap":
105
+ neighborhoods = calculate_walktrap_neighborhoods(
106
+ network, fraction_shortest_edges=percentile
107
+ )
108
+ else:
109
+ raise ValueError(
110
+ "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'label_propagation',"
111
+ "'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
112
+ )
76
113
 
77
- def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: float) -> nx.Graph:
78
- """Create a subgraph containing all nodes and edges where the edge length is below the
79
- specified percentile of all edge lengths in the input graph.
114
+ # Sum the neighborhood matrices
115
+ combined_neighborhoods += neighborhoods
80
116
 
81
- Args:
82
- G (nx.Graph): The input graph with 'length' attributes on edges.
83
- edge_length_percentile (float): The percentile (between 0 and 1) to filter edges by length.
117
+ # Ensure that the maximum value in each row is set to 1
118
+ # This ensures that for each row, only the strongest relationship (the maximum value) is retained,
119
+ # while all other values are reset to 0. This transformation simplifies the neighborhood matrix by
120
+ # focusing on the most significant connection per row (or nodes).
121
+ combined_neighborhoods = _set_max_row_value_to_one(combined_neighborhoods)
84
122
 
85
- Returns:
86
- nx.Graph: A subgraph with all nodes and edges where the edge length is below the
87
- calculated threshold length.
88
- """
89
- # Extract edge lengths and handle missing lengths
90
- edge_lengths = [d["length"] for _, _, d in G.edges(data=True) if "length" in d]
91
- if not edge_lengths:
92
- raise ValueError(
93
- "No edge lengths found in the graph. Ensure edges have 'length' attributes."
94
- )
123
+ return combined_neighborhoods
95
124
 
96
- # Calculate the specific edge length for the given percentile
97
- percentile_length = np.percentile(edge_lengths, edge_length_percentile * 100)
98
- # Create the subgraph by directly filtering edges during iteration
99
- subgraph = nx.Graph()
100
- subgraph.add_nodes_from(G.nodes(data=True)) # Retain all nodes from the original graph
101
- # Add edges below the specified percentile length in a single pass
102
- for u, v, d in G.edges(data=True):
103
- if d.get("length", 1) <= percentile_length:
104
- subgraph.add_edge(u, v, **d)
105
125
 
106
- # Return the subgraph; optionally check if it's too sparse
107
- if subgraph.number_of_edges() == 0:
108
- raise Warning("The resulting subgraph has no edges. Consider adjusting the percentile.")
126
+ def _set_max_row_value_to_one(matrix: np.ndarray) -> np.ndarray:
127
+ """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0. This is particularly
128
+ useful for neighborhood matrices that have undergone multiple neighborhood detection algorithms, where the
129
+ maximum value in each row represents the most significant relationship per node in the combined neighborhoods.
130
+
131
+ Args:
132
+ matrix (np.ndarray): A 2D numpy array representing the neighborhood matrix.
109
133
 
110
- return subgraph
134
+ Returns:
135
+ np.ndarray: The modified matrix where only the maximum value(s) in each row is set to 1, and others are set to 0.
136
+ """
137
+ # Find the maximum value in each row (column-wise max operation)
138
+ max_values = np.max(matrix, axis=1, keepdims=True)
139
+ # Create a boolean mask where elements are True if they are the max value in their row
140
+ max_mask = matrix == max_values
141
+ # Set all elements to 0, and then set the maximum value positions to 1
142
+ matrix[:] = 0 # Set everything to 0
143
+ matrix[max_mask] = 1 # Set only the max values to 1
144
+ return matrix
111
145
 
112
146
 
113
147
  def process_neighborhoods(
@@ -120,157 +154,170 @@ def process_neighborhoods(
120
154
 
121
155
  Args:
122
156
  network (nx.Graph): The network data structure used for imputing and pruning neighbors.
123
- neighborhoods (dict): Dictionary containing 'enrichment_matrix', 'binary_enrichment_matrix', and 'significant_enrichment_matrix'.
157
+ neighborhoods (Dict[str, Any]): Dictionary containing 'significance_matrix', 'significant_binary_significance_matrix', and 'significant_significance_matrix'.
124
158
  impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
125
159
  prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
126
160
 
127
161
  Returns:
128
- dict: Processed neighborhoods data, including the updated matrices and enrichment counts.
162
+ Dict[str, Any]: Processed neighborhoods data, including the updated matrices and significance counts.
129
163
  """
130
- enrichment_matrix = neighborhoods["enrichment_matrix"]
131
- binary_enrichment_matrix = neighborhoods["binary_enrichment_matrix"]
132
- significant_enrichment_matrix = neighborhoods["significant_enrichment_matrix"]
164
+ significance_matrix = neighborhoods["significance_matrix"]
165
+ significant_binary_significance_matrix = neighborhoods["significant_binary_significance_matrix"]
166
+ significant_significance_matrix = neighborhoods["significant_significance_matrix"]
133
167
  logger.debug(f"Imputation depth: {impute_depth}")
134
168
  if impute_depth:
135
169
  (
136
- enrichment_matrix,
137
- binary_enrichment_matrix,
138
- significant_enrichment_matrix,
170
+ significance_matrix,
171
+ significant_binary_significance_matrix,
172
+ significant_significance_matrix,
139
173
  ) = _impute_neighbors(
140
174
  network,
141
- enrichment_matrix,
142
- binary_enrichment_matrix,
175
+ significance_matrix,
176
+ significant_binary_significance_matrix,
143
177
  max_depth=impute_depth,
144
178
  )
145
179
 
146
180
  logger.debug(f"Pruning threshold: {prune_threshold}")
147
181
  if prune_threshold:
148
182
  (
149
- enrichment_matrix,
150
- binary_enrichment_matrix,
151
- significant_enrichment_matrix,
183
+ significance_matrix,
184
+ significant_binary_significance_matrix,
185
+ significant_significance_matrix,
152
186
  ) = _prune_neighbors(
153
187
  network,
154
- enrichment_matrix,
155
- binary_enrichment_matrix,
188
+ significance_matrix,
189
+ significant_binary_significance_matrix,
156
190
  distance_threshold=prune_threshold,
157
191
  )
158
192
 
159
- neighborhood_enrichment_counts = np.sum(binary_enrichment_matrix, axis=0)
160
- node_enrichment_sums = np.sum(enrichment_matrix, axis=1)
193
+ neighborhood_significance_counts = np.sum(significant_binary_significance_matrix, axis=0)
194
+ node_significance_sums = np.sum(significance_matrix, axis=1)
161
195
  return {
162
- "enrichment_matrix": enrichment_matrix,
163
- "binary_enrichment_matrix": binary_enrichment_matrix,
164
- "significant_enrichment_matrix": significant_enrichment_matrix,
165
- "neighborhood_enrichment_counts": neighborhood_enrichment_counts,
166
- "node_enrichment_sums": node_enrichment_sums,
196
+ "significance_matrix": significance_matrix,
197
+ "significant_binary_significance_matrix": significant_binary_significance_matrix,
198
+ "significant_significance_matrix": significant_significance_matrix,
199
+ "neighborhood_significance_counts": neighborhood_significance_counts,
200
+ "node_significance_sums": node_significance_sums,
167
201
  }
168
202
 
169
203
 
170
204
  def _impute_neighbors(
171
205
  network: nx.Graph,
172
- enrichment_matrix: np.ndarray,
173
- binary_enrichment_matrix: np.ndarray,
206
+ significance_matrix: np.ndarray,
207
+ significant_binary_significance_matrix: np.ndarray,
174
208
  max_depth: int = 3,
175
209
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
176
- """Impute rows with sums of zero in the enrichment matrix based on the closest non-zero neighbors in the network graph.
210
+ """Impute rows with sums of zero in the significance matrix based on the closest non-zero neighbors in the network graph.
177
211
 
178
212
  Args:
179
213
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
180
- enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
181
- binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
214
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
215
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
182
216
  max_depth (int): Maximum depth of nodes to traverse for imputing values.
183
217
 
184
218
  Returns:
185
- tuple: A tuple containing:
186
- - np.ndarray: The imputed enrichment matrix.
219
+ Tuple[np.ndarray, np.ndarray, np.ndarray]:
220
+ - np.ndarray: The imputed significance matrix.
187
221
  - np.ndarray: The imputed alpha threshold matrix.
188
- - np.ndarray: The significant enrichment matrix with non-significant entries set to zero.
222
+ - np.ndarray: The significant significance matrix with non-significant entries set to zero.
189
223
  """
190
224
  # Calculate the distance threshold value based on the shortest distances
191
- enrichment_matrix, binary_enrichment_matrix = _impute_neighbors_with_similarity(
192
- network, enrichment_matrix, binary_enrichment_matrix, max_depth=max_depth
225
+ significance_matrix, significant_binary_significance_matrix = _impute_neighbors_with_similarity(
226
+ network, significance_matrix, significant_binary_significance_matrix, max_depth=max_depth
193
227
  )
194
228
  # Create a matrix where non-significant entries are set to zero
195
- significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
229
+ significant_significance_matrix = np.where(
230
+ significant_binary_significance_matrix == 1, significance_matrix, 0
231
+ )
196
232
 
197
- return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
233
+ return (
234
+ significance_matrix,
235
+ significant_binary_significance_matrix,
236
+ significant_significance_matrix,
237
+ )
198
238
 
199
239
 
200
240
  def _impute_neighbors_with_similarity(
201
241
  network: nx.Graph,
202
- enrichment_matrix: np.ndarray,
203
- binary_enrichment_matrix: np.ndarray,
242
+ significance_matrix: np.ndarray,
243
+ significant_binary_significance_matrix: np.ndarray,
204
244
  max_depth: int = 3,
205
245
  ) -> Tuple[np.ndarray, np.ndarray]:
206
- """Impute non-enriched nodes based on the closest enriched neighbors' profiles and their similarity.
246
+ """Impute non-significant nodes based on the closest significant neighbors' profiles and their similarity.
207
247
 
208
248
  Args:
209
249
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
210
- enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
211
- binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
250
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
251
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
212
252
  max_depth (int): Maximum depth of nodes to traverse for imputing values.
213
253
 
214
254
  Returns:
215
- Tuple[np.ndarray, np.ndarray]: A tuple containing:
216
- - The imputed enrichment matrix.
255
+ Tuple[np.ndarray, np.ndarray]:
256
+ - The imputed significance matrix.
217
257
  - The imputed alpha threshold matrix.
218
258
  """
219
259
  depth = 1
220
- rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
260
+ rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
221
261
  while len(rows_to_impute) and depth <= max_depth:
222
- # Iterate over all enriched nodes
223
- for row_index in range(binary_enrichment_matrix.shape[0]):
224
- if binary_enrichment_matrix[row_index].sum() != 0:
225
- enrichment_matrix, binary_enrichment_matrix = _process_node_imputation(
226
- row_index, network, enrichment_matrix, binary_enrichment_matrix, depth
262
+ # Iterate over all significant nodes
263
+ for row_index in range(significant_binary_significance_matrix.shape[0]):
264
+ if significant_binary_significance_matrix[row_index].sum() != 0:
265
+ (
266
+ significance_matrix,
267
+ significant_binary_significance_matrix,
268
+ ) = _process_node_imputation(
269
+ row_index,
270
+ network,
271
+ significance_matrix,
272
+ significant_binary_significance_matrix,
273
+ depth,
227
274
  )
228
275
 
229
276
  # Update rows to impute for the next iteration
230
- rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
277
+ rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
231
278
  depth += 1
232
279
 
233
- return enrichment_matrix, binary_enrichment_matrix
280
+ return significance_matrix, significant_binary_significance_matrix
234
281
 
235
282
 
236
283
  def _process_node_imputation(
237
284
  row_index: int,
238
285
  network: nx.Graph,
239
- enrichment_matrix: np.ndarray,
240
- binary_enrichment_matrix: np.ndarray,
286
+ significance_matrix: np.ndarray,
287
+ significant_binary_significance_matrix: np.ndarray,
241
288
  depth: int,
242
289
  ) -> Tuple[np.ndarray, np.ndarray]:
243
- """Process the imputation for a single node based on its enriched neighbors.
290
+ """Process the imputation for a single node based on its significant neighbors.
244
291
 
245
292
  Args:
246
- row_index (int): The index of the enriched node being processed.
293
+ row_index (int): The index of the significant node being processed.
247
294
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
248
- enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
249
- binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
295
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
296
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
250
297
  depth (int): Current depth for traversal.
251
298
 
252
299
  Returns:
253
- Tuple[np.ndarray, np.ndarray]: The modified enrichment matrix and binary threshold matrix.
300
+ Tuple[np.ndarray, np.ndarray]: The modified significance matrix and binary threshold matrix.
254
301
  """
255
302
  # Check neighbors at the current depth
256
303
  neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
257
- # Filter annotated neighbors (already enriched)
304
+ # Filter annotated neighbors (already significant)
258
305
  annotated_neighbors = [
259
306
  n
260
307
  for n in neighbors
261
308
  if n != row_index
262
- and binary_enrichment_matrix[n].sum() != 0
263
- and enrichment_matrix[n].sum() != 0
309
+ and significant_binary_significance_matrix[n].sum() != 0
310
+ and significance_matrix[n].sum() != 0
264
311
  ]
265
- # Filter non-enriched neighbors
312
+ # Filter non-significant neighbors
266
313
  valid_neighbors = [
267
314
  n
268
315
  for n in neighbors
269
316
  if n != row_index
270
- and binary_enrichment_matrix[n].sum() == 0
271
- and enrichment_matrix[n].sum() == 0
317
+ and significant_binary_significance_matrix[n].sum() == 0
318
+ and significance_matrix[n].sum() == 0
272
319
  ]
273
- # If there are valid non-enriched neighbors
320
+ # If there are valid non-significant neighbors
274
321
  if valid_neighbors and annotated_neighbors:
275
322
  # Calculate distances to annotated neighbors
276
323
  distances_to_annotated = [
@@ -281,7 +328,7 @@ def _process_node_imputation(
281
328
  iqr = q3 - q1
282
329
  lower_bound = q1 - 1.5 * iqr
283
330
  upper_bound = q3 + 1.5 * iqr
284
- # Filter valid non-enriched neighbors that fall within the IQR bounds
331
+ # Filter valid non-significant neighbors that fall within the IQR bounds
285
332
  valid_neighbors_within_iqr = [
286
333
  n
287
334
  for n in valid_neighbors
@@ -295,8 +342,8 @@ def _process_node_imputation(
295
342
  def sum_pairwise_cosine_similarities(neighbor):
296
343
  return sum(
297
344
  cosine_similarity(
298
- enrichment_matrix[neighbor].reshape(1, -1),
299
- enrichment_matrix[other_neighbor].reshape(1, -1),
345
+ significance_matrix[neighbor].reshape(1, -1),
346
+ significance_matrix[other_neighbor].reshape(1, -1),
300
347
  )[0][0]
301
348
  for other_neighbor in valid_neighbors_within_iqr
302
349
  if other_neighbor != neighbor
@@ -308,40 +355,46 @@ def _process_node_imputation(
308
355
  else:
309
356
  most_similar_neighbor = valid_neighbors_within_iqr[0]
310
357
 
311
- # Impute the most similar non-enriched neighbor with the enriched node's data, scaled by depth
312
- enrichment_matrix[most_similar_neighbor] = enrichment_matrix[row_index] / np.sqrt(
358
+ # Impute the most similar non-significant neighbor with the significant node's data, scaled by depth
359
+ significance_matrix[most_similar_neighbor] = significance_matrix[row_index] / np.sqrt(
313
360
  depth + 1
314
361
  )
315
- binary_enrichment_matrix[most_similar_neighbor] = binary_enrichment_matrix[row_index]
362
+ significant_binary_significance_matrix[most_similar_neighbor] = (
363
+ significant_binary_significance_matrix[row_index]
364
+ )
316
365
 
317
- return enrichment_matrix, binary_enrichment_matrix
366
+ return significance_matrix, significant_binary_significance_matrix
318
367
 
319
368
 
320
369
  def _prune_neighbors(
321
370
  network: nx.Graph,
322
- enrichment_matrix: np.ndarray,
323
- binary_enrichment_matrix: np.ndarray,
371
+ significance_matrix: np.ndarray,
372
+ significant_binary_significance_matrix: np.ndarray,
324
373
  distance_threshold: float = 0.9,
325
374
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
326
375
  """Remove outliers based on their rank for edge lengths.
327
376
 
328
377
  Args:
329
378
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
330
- enrichment_matrix (np.ndarray): The enrichment matrix.
331
- binary_enrichment_matrix (np.ndarray): The alpha threshold matrix.
379
+ significance_matrix (np.ndarray): The significance matrix.
380
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix.
332
381
  distance_threshold (float): Rank threshold (0 to 1) to determine outliers.
333
382
 
334
383
  Returns:
335
- tuple: A tuple containing:
336
- - np.ndarray: The updated enrichment matrix with outliers set to zero.
384
+ Tuple[np.ndarray, np.ndarray, np.ndarray]:
385
+ - np.ndarray: The updated significance matrix with outliers set to zero.
337
386
  - np.ndarray: The updated alpha threshold matrix with outliers set to zero.
338
- - np.ndarray: The significant enrichment matrix, where non-significant entries are set to zero.
387
+ - np.ndarray: The significant significance matrix, where non-significant entries are set to zero.
339
388
  """
340
- # Identify indices with non-zero rows in the binary enrichment matrix
341
- non_zero_indices = np.where(binary_enrichment_matrix.sum(axis=1) != 0)[0]
389
+ # Identify indices with non-zero rows in the binary significance matrix
390
+ non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
342
391
  median_distances = []
343
392
  for node in non_zero_indices:
344
- neighbors = [n for n in network.neighbors(node) if binary_enrichment_matrix[n].sum() != 0]
393
+ neighbors = [
394
+ n
395
+ for n in network.neighbors(node)
396
+ if significant_binary_significance_matrix[n].sum() != 0
397
+ ]
345
398
  if neighbors:
346
399
  median_distance = np.median(
347
400
  [_get_euclidean_distance(node, n, network) for n in neighbors]
@@ -353,20 +406,28 @@ def _prune_neighbors(
353
406
  # Prune nodes that are outliers based on the distance threshold
354
407
  for row_index in non_zero_indices:
355
408
  neighbors = [
356
- n for n in network.neighbors(row_index) if binary_enrichment_matrix[n].sum() != 0
409
+ n
410
+ for n in network.neighbors(row_index)
411
+ if significant_binary_significance_matrix[n].sum() != 0
357
412
  ]
358
413
  if neighbors:
359
414
  median_distance = np.median(
360
415
  [_get_euclidean_distance(row_index, n, network) for n in neighbors]
361
416
  )
362
417
  if median_distance >= distance_threshold_value:
363
- enrichment_matrix[row_index] = 0
364
- binary_enrichment_matrix[row_index] = 0
418
+ significance_matrix[row_index] = 0
419
+ significant_binary_significance_matrix[row_index] = 0
365
420
 
366
421
  # Create a matrix where non-significant entries are set to zero
367
- significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
422
+ significant_significance_matrix = np.where(
423
+ significant_binary_significance_matrix == 1, significance_matrix, 0
424
+ )
368
425
 
369
- return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
426
+ return (
427
+ significance_matrix,
428
+ significant_binary_significance_matrix,
429
+ significant_significance_matrix,
430
+ )
370
431
 
371
432
 
372
433
  def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
@@ -408,7 +469,7 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
408
469
  """Calculate the distance threshold based on the given median distances and a percentile threshold.
409
470
 
410
471
  Args:
411
- median_distances (list): An array of median distances.
472
+ median_distances (List): An array of median distances.
412
473
  distance_threshold (float): A percentile threshold (0 to 1) used to determine the distance cutoff.
413
474
 
414
475
  Returns:
@@ -416,7 +477,7 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
416
477
  """
417
478
  # Sort the median distances
418
479
  sorted_distances = np.sort(median_distances)
419
- # Compute the rank percentiles for the sorted distances
480
+ # Compute the rank fractions for the sorted distances
420
481
  rank_percentiles = np.linspace(0, 1, len(sorted_distances))
421
482
  # Interpolating the ranks to 1000 evenly spaced percentiles
422
483
  interpolated_percentiles = np.linspace(0, 1, 1000)
risk/network/__init__.py CHANGED
@@ -3,6 +3,4 @@ risk/network
3
3
  ~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from .graph import NetworkGraph
7
- from .io import NetworkIO
8
- from .plot import NetworkPlotter
6
+ from risk.network.io import NetworkIO