risk-network 0.0.8b26__py3-none-any.whl → 0.0.9b26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. risk/__init__.py +2 -2
  2. risk/annotations/__init__.py +2 -2
  3. risk/annotations/annotations.py +74 -47
  4. risk/annotations/io.py +47 -31
  5. risk/log/__init__.py +4 -2
  6. risk/log/{config.py → console.py} +5 -3
  7. risk/log/{params.py → parameters.py} +17 -42
  8. risk/neighborhoods/__init__.py +3 -5
  9. risk/neighborhoods/api.py +446 -0
  10. risk/neighborhoods/community.py +255 -77
  11. risk/neighborhoods/domains.py +62 -31
  12. risk/neighborhoods/neighborhoods.py +156 -160
  13. risk/network/__init__.py +1 -3
  14. risk/network/geometry.py +65 -57
  15. risk/network/graph/__init__.py +6 -0
  16. risk/network/graph/api.py +194 -0
  17. risk/network/{graph.py → graph/network.py} +87 -37
  18. risk/network/graph/summary.py +254 -0
  19. risk/network/io.py +56 -47
  20. risk/network/plotter/__init__.py +6 -0
  21. risk/network/plotter/api.py +54 -0
  22. risk/network/{plot → plotter}/canvas.py +7 -4
  23. risk/network/{plot → plotter}/contour.py +22 -19
  24. risk/network/{plot → plotter}/labels.py +69 -74
  25. risk/network/{plot → plotter}/network.py +170 -34
  26. risk/network/{plot/utils/color.py → plotter/utils/colors.py} +104 -112
  27. risk/network/{plot → plotter}/utils/layout.py +8 -5
  28. risk/risk.py +11 -500
  29. risk/stats/__init__.py +8 -4
  30. risk/stats/binom.py +51 -0
  31. risk/stats/chi2.py +69 -0
  32. risk/stats/hypergeom.py +27 -17
  33. risk/stats/permutation/__init__.py +1 -1
  34. risk/stats/permutation/permutation.py +44 -38
  35. risk/stats/permutation/test_functions.py +25 -17
  36. risk/stats/poisson.py +15 -9
  37. risk/stats/stats.py +15 -13
  38. risk/stats/zscore.py +68 -0
  39. {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
  40. risk_network-0.0.9b26.dist-info/RECORD +44 -0
  41. {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
  42. risk/network/plot/__init__.py +0 -6
  43. risk/network/plot/plotter.py +0 -137
  44. risk_network-0.0.8b26.dist-info/RECORD +0 -37
  45. {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
  46. {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,7 @@ from sklearn.metrics.pairwise import cosine_similarity
15
15
  from risk.neighborhoods.community import (
16
16
  calculate_greedy_modularity_neighborhoods,
17
17
  calculate_label_propagation_neighborhoods,
18
+ calculate_leiden_neighborhoods,
18
19
  calculate_louvain_neighborhoods,
19
20
  calculate_markov_clustering_neighborhoods,
20
21
  calculate_spinglass_neighborhoods,
@@ -29,22 +30,20 @@ warnings.filterwarnings(action="ignore", category=DataConversionWarning)
29
30
  def get_network_neighborhoods(
30
31
  network: nx.Graph,
31
32
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
32
- edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 1.0,
33
- louvain_resolution: float = 1.0,
33
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 1.0,
34
+ louvain_resolution: float = 0.1,
35
+ leiden_resolution: float = 1.0,
34
36
  random_seed: int = 888,
35
37
  ) -> np.ndarray:
36
38
  """Calculate the combined neighborhoods for each node based on the specified community detection algorithm(s).
37
39
 
38
40
  Args:
39
41
  network (nx.Graph): The network graph.
40
- distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
41
- metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
42
- 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
43
- edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
44
- Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
45
- Defaults to 1.0.
46
- louvain_resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 1.0.
47
- random_seed (int, optional): Random seed for methods requiring random initialization. Defaults to 888.
42
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use.
43
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
44
+ louvain_resolution (float, optional): Resolution parameter for the Louvain method.
45
+ leiden_resolution (float, optional): Resolution parameter for the Leiden method.
46
+ random_seed (int, optional): Random seed for methods requiring random initialization.
48
47
 
49
48
  Returns:
50
49
  np.ndarray: Summed neighborhood matrix from all selected algorithms.
@@ -56,11 +55,11 @@ def get_network_neighborhoods(
56
55
  # Ensure distance_metric is a list/tuple for multi-algorithm handling
57
56
  if isinstance(distance_metric, (str, np.ndarray)):
58
57
  distance_metric = [distance_metric]
59
- # Ensure edge_length_threshold is a list/tuple for multi-threshold handling
60
- if isinstance(edge_length_threshold, (float, int)):
61
- edge_length_threshold = [edge_length_threshold] * len(distance_metric)
58
+ # Ensure fraction_shortest_edges is a list/tuple for multi-threshold handling
59
+ if isinstance(fraction_shortest_edges, (float, int)):
60
+ fraction_shortest_edges = [fraction_shortest_edges] * len(distance_metric)
62
61
  # Check that the number of distance metrics matches the number of edge length thresholds
63
- if len(distance_metric) != len(edge_length_threshold):
62
+ if len(distance_metric) != len(fraction_shortest_edges):
64
63
  raise ValueError(
65
64
  "The number of distance metrics must match the number of edge length thresholds."
66
65
  )
@@ -69,29 +68,47 @@ def get_network_neighborhoods(
69
68
  num_nodes = network.number_of_nodes()
70
69
  combined_neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
71
70
 
72
- # Loop through each distance metric and corresponding edge length threshold
73
- for metric, threshold in zip(distance_metric, edge_length_threshold):
74
- # Create a subgraph based on the specific edge length threshold for this algorithm
75
- subgraph = _create_percentile_limited_subgraph(network, edge_length_percentile=threshold)
71
+ # Loop through each distance metric and corresponding edge rank fraction
72
+ for metric, percentile in zip(distance_metric, fraction_shortest_edges):
76
73
  # Call the appropriate neighborhood function based on the metric
77
- if metric == "louvain":
78
- neighborhoods = calculate_louvain_neighborhoods(
79
- subgraph, louvain_resolution, random_seed=random_seed
74
+ if metric == "greedy_modularity":
75
+ neighborhoods = calculate_greedy_modularity_neighborhoods(
76
+ network, fraction_shortest_edges=percentile
80
77
  )
81
- elif metric == "greedy_modularity":
82
- neighborhoods = calculate_greedy_modularity_neighborhoods(subgraph)
83
78
  elif metric == "label_propagation":
84
- neighborhoods = calculate_label_propagation_neighborhoods(subgraph)
79
+ neighborhoods = calculate_label_propagation_neighborhoods(
80
+ network, fraction_shortest_edges=percentile
81
+ )
82
+ elif metric == "leiden":
83
+ neighborhoods = calculate_leiden_neighborhoods(
84
+ network,
85
+ resolution=leiden_resolution,
86
+ fraction_shortest_edges=percentile,
87
+ random_seed=random_seed,
88
+ )
89
+ elif metric == "louvain":
90
+ neighborhoods = calculate_louvain_neighborhoods(
91
+ network,
92
+ resolution=louvain_resolution,
93
+ fraction_shortest_edges=percentile,
94
+ random_seed=random_seed,
95
+ )
85
96
  elif metric == "markov_clustering":
86
- neighborhoods = calculate_markov_clustering_neighborhoods(subgraph)
87
- elif metric == "walktrap":
88
- neighborhoods = calculate_walktrap_neighborhoods(subgraph)
97
+ neighborhoods = calculate_markov_clustering_neighborhoods(
98
+ network, fraction_shortest_edges=percentile
99
+ )
89
100
  elif metric == "spinglass":
90
- neighborhoods = calculate_spinglass_neighborhoods(subgraph)
101
+ neighborhoods = calculate_spinglass_neighborhoods(
102
+ network, fraction_shortest_edges=percentile
103
+ )
104
+ elif metric == "walktrap":
105
+ neighborhoods = calculate_walktrap_neighborhoods(
106
+ network, fraction_shortest_edges=percentile
107
+ )
91
108
  else:
92
109
  raise ValueError(
93
- "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'louvain',"
94
- "'label_propagation', 'markov_clustering', 'walktrap', 'spinglass'."
110
+ "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'label_propagation',"
111
+ "'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
95
112
  )
96
113
 
97
114
  # Sum the neighborhood matrices
@@ -100,50 +117,16 @@ def get_network_neighborhoods(
100
117
  # Ensure that the maximum value in each row is set to 1
101
118
  # This ensures that for each row, only the strongest relationship (the maximum value) is retained,
102
119
  # while all other values are reset to 0. This transformation simplifies the neighborhood matrix by
103
- # focusing on the most significant connection per row.
104
- combined_neighborhoods = _set_max_to_one(combined_neighborhoods)
120
+ # focusing on the most significant connection per row (or nodes).
121
+ combined_neighborhoods = _set_max_row_value_to_one(combined_neighborhoods)
105
122
 
106
123
  return combined_neighborhoods
107
124
 
108
125
 
109
- def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: float) -> nx.Graph:
110
- """Create a subgraph containing all nodes and edges where the edge length is below the
111
- specified percentile of all edge lengths in the input graph.
112
-
113
- Args:
114
- G (nx.Graph): The input graph with 'length' attributes on edges.
115
- edge_length_percentile (float): The percentile (between 0 and 1) to filter edges by length.
116
-
117
- Returns:
118
- nx.Graph: A subgraph with all nodes and edges where the edge length is below the
119
- calculated threshold length.
120
- """
121
- # Extract edge lengths and handle missing lengths
122
- edge_lengths = [d["length"] for _, _, d in G.edges(data=True) if "length" in d]
123
- if not edge_lengths:
124
- raise ValueError(
125
- "No edge lengths found in the graph. Ensure edges have 'length' attributes."
126
- )
127
-
128
- # Calculate the specific edge length for the given percentile
129
- percentile_length = np.percentile(edge_lengths, edge_length_percentile * 100)
130
- # Create the subgraph by directly filtering edges during iteration
131
- subgraph = nx.Graph()
132
- subgraph.add_nodes_from(G.nodes(data=True)) # Retain all nodes from the original graph
133
- # Add edges below the specified percentile length in a single pass
134
- for u, v, d in G.edges(data=True):
135
- if d.get("length", 1) <= percentile_length:
136
- subgraph.add_edge(u, v, **d)
137
-
138
- # Return the subgraph; optionally check if it's too sparse
139
- if subgraph.number_of_edges() == 0:
140
- raise Warning("The resulting subgraph has no edges. Consider adjusting the percentile.")
141
-
142
- return subgraph
143
-
144
-
145
- def _set_max_to_one(matrix: np.ndarray) -> np.ndarray:
146
- """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0.
126
+ def _set_max_row_value_to_one(matrix: np.ndarray) -> np.ndarray:
127
+ """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0. This is particularly
128
+ useful for neighborhood matrices that have undergone multiple neighborhood detection algorithms, where the
129
+ maximum value in each row represents the most significant relationship per node in the combined neighborhoods.
147
130
 
148
131
  Args:
149
132
  matrix (np.ndarray): A 2D numpy array representing the neighborhood matrix.
@@ -171,163 +154,170 @@ def process_neighborhoods(
171
154
 
172
155
  Args:
173
156
  network (nx.Graph): The network data structure used for imputing and pruning neighbors.
174
- neighborhoods (Dict[str, Any]): Dictionary containing 'enrichment_matrix', 'significant_binary_enrichment_matrix', and 'significant_enrichment_matrix'.
157
+ neighborhoods (Dict[str, Any]): Dictionary containing 'significance_matrix', 'significant_binary_significance_matrix', and 'significant_significance_matrix'.
175
158
  impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
176
159
  prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
177
160
 
178
161
  Returns:
179
- Dict[str, Any]: Processed neighborhoods data, including the updated matrices and enrichment counts.
162
+ Dict[str, Any]: Processed neighborhoods data, including the updated matrices and significance counts.
180
163
  """
181
- enrichment_matrix = neighborhoods["enrichment_matrix"]
182
- significant_binary_enrichment_matrix = neighborhoods["significant_binary_enrichment_matrix"]
183
- significant_enrichment_matrix = neighborhoods["significant_enrichment_matrix"]
164
+ significance_matrix = neighborhoods["significance_matrix"]
165
+ significant_binary_significance_matrix = neighborhoods["significant_binary_significance_matrix"]
166
+ significant_significance_matrix = neighborhoods["significant_significance_matrix"]
184
167
  logger.debug(f"Imputation depth: {impute_depth}")
185
168
  if impute_depth:
186
169
  (
187
- enrichment_matrix,
188
- significant_binary_enrichment_matrix,
189
- significant_enrichment_matrix,
170
+ significance_matrix,
171
+ significant_binary_significance_matrix,
172
+ significant_significance_matrix,
190
173
  ) = _impute_neighbors(
191
174
  network,
192
- enrichment_matrix,
193
- significant_binary_enrichment_matrix,
175
+ significance_matrix,
176
+ significant_binary_significance_matrix,
194
177
  max_depth=impute_depth,
195
178
  )
196
179
 
197
180
  logger.debug(f"Pruning threshold: {prune_threshold}")
198
181
  if prune_threshold:
199
182
  (
200
- enrichment_matrix,
201
- significant_binary_enrichment_matrix,
202
- significant_enrichment_matrix,
183
+ significance_matrix,
184
+ significant_binary_significance_matrix,
185
+ significant_significance_matrix,
203
186
  ) = _prune_neighbors(
204
187
  network,
205
- enrichment_matrix,
206
- significant_binary_enrichment_matrix,
188
+ significance_matrix,
189
+ significant_binary_significance_matrix,
207
190
  distance_threshold=prune_threshold,
208
191
  )
209
192
 
210
- neighborhood_enrichment_counts = np.sum(significant_binary_enrichment_matrix, axis=0)
211
- node_enrichment_sums = np.sum(enrichment_matrix, axis=1)
193
+ neighborhood_significance_counts = np.sum(significant_binary_significance_matrix, axis=0)
194
+ node_significance_sums = np.sum(significance_matrix, axis=1)
212
195
  return {
213
- "enrichment_matrix": enrichment_matrix,
214
- "significant_binary_enrichment_matrix": significant_binary_enrichment_matrix,
215
- "significant_enrichment_matrix": significant_enrichment_matrix,
216
- "neighborhood_enrichment_counts": neighborhood_enrichment_counts,
217
- "node_enrichment_sums": node_enrichment_sums,
196
+ "significance_matrix": significance_matrix,
197
+ "significant_binary_significance_matrix": significant_binary_significance_matrix,
198
+ "significant_significance_matrix": significant_significance_matrix,
199
+ "neighborhood_significance_counts": neighborhood_significance_counts,
200
+ "node_significance_sums": node_significance_sums,
218
201
  }
219
202
 
220
203
 
221
204
  def _impute_neighbors(
222
205
  network: nx.Graph,
223
- enrichment_matrix: np.ndarray,
224
- significant_binary_enrichment_matrix: np.ndarray,
206
+ significance_matrix: np.ndarray,
207
+ significant_binary_significance_matrix: np.ndarray,
225
208
  max_depth: int = 3,
226
209
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
227
- """Impute rows with sums of zero in the enrichment matrix based on the closest non-zero neighbors in the network graph.
210
+ """Impute rows with sums of zero in the significance matrix based on the closest non-zero neighbors in the network graph.
228
211
 
229
212
  Args:
230
213
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
231
- enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
232
- significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
214
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
215
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
233
216
  max_depth (int): Maximum depth of nodes to traverse for imputing values.
234
217
 
235
218
  Returns:
236
- tuple: A tuple containing:
237
- - np.ndarray: The imputed enrichment matrix.
219
+ Tuple[np.ndarray, np.ndarray, np.ndarray]:
220
+ - np.ndarray: The imputed significance matrix.
238
221
  - np.ndarray: The imputed alpha threshold matrix.
239
- - np.ndarray: The significant enrichment matrix with non-significant entries set to zero.
222
+ - np.ndarray: The significant significance matrix with non-significant entries set to zero.
240
223
  """
241
224
  # Calculate the distance threshold value based on the shortest distances
242
- enrichment_matrix, significant_binary_enrichment_matrix = _impute_neighbors_with_similarity(
243
- network, enrichment_matrix, significant_binary_enrichment_matrix, max_depth=max_depth
225
+ significance_matrix, significant_binary_significance_matrix = _impute_neighbors_with_similarity(
226
+ network, significance_matrix, significant_binary_significance_matrix, max_depth=max_depth
244
227
  )
245
228
  # Create a matrix where non-significant entries are set to zero
246
- significant_enrichment_matrix = np.where(
247
- significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
229
+ significant_significance_matrix = np.where(
230
+ significant_binary_significance_matrix == 1, significance_matrix, 0
248
231
  )
249
232
 
250
- return enrichment_matrix, significant_binary_enrichment_matrix, significant_enrichment_matrix
233
+ return (
234
+ significance_matrix,
235
+ significant_binary_significance_matrix,
236
+ significant_significance_matrix,
237
+ )
251
238
 
252
239
 
253
240
  def _impute_neighbors_with_similarity(
254
241
  network: nx.Graph,
255
- enrichment_matrix: np.ndarray,
256
- significant_binary_enrichment_matrix: np.ndarray,
242
+ significance_matrix: np.ndarray,
243
+ significant_binary_significance_matrix: np.ndarray,
257
244
  max_depth: int = 3,
258
245
  ) -> Tuple[np.ndarray, np.ndarray]:
259
- """Impute non-enriched nodes based on the closest enriched neighbors' profiles and their similarity.
246
+ """Impute non-significant nodes based on the closest significant neighbors' profiles and their similarity.
260
247
 
261
248
  Args:
262
249
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
263
- enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
264
- significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
250
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
251
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
265
252
  max_depth (int): Maximum depth of nodes to traverse for imputing values.
266
253
 
267
254
  Returns:
268
- Tuple[np.ndarray, np.ndarray]: A tuple containing:
269
- - The imputed enrichment matrix.
255
+ Tuple[np.ndarray, np.ndarray]:
256
+ - The imputed significance matrix.
270
257
  - The imputed alpha threshold matrix.
271
258
  """
272
259
  depth = 1
273
- rows_to_impute = np.where(significant_binary_enrichment_matrix.sum(axis=1) == 0)[0]
260
+ rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
274
261
  while len(rows_to_impute) and depth <= max_depth:
275
- # Iterate over all enriched nodes
276
- for row_index in range(significant_binary_enrichment_matrix.shape[0]):
277
- if significant_binary_enrichment_matrix[row_index].sum() != 0:
278
- enrichment_matrix, significant_binary_enrichment_matrix = _process_node_imputation(
262
+ # Iterate over all significant nodes
263
+ for row_index in range(significant_binary_significance_matrix.shape[0]):
264
+ if significant_binary_significance_matrix[row_index].sum() != 0:
265
+ (
266
+ significance_matrix,
267
+ significant_binary_significance_matrix,
268
+ ) = _process_node_imputation(
279
269
  row_index,
280
270
  network,
281
- enrichment_matrix,
282
- significant_binary_enrichment_matrix,
271
+ significance_matrix,
272
+ significant_binary_significance_matrix,
283
273
  depth,
284
274
  )
285
275
 
286
276
  # Update rows to impute for the next iteration
287
- rows_to_impute = np.where(significant_binary_enrichment_matrix.sum(axis=1) == 0)[0]
277
+ rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
288
278
  depth += 1
289
279
 
290
- return enrichment_matrix, significant_binary_enrichment_matrix
280
+ return significance_matrix, significant_binary_significance_matrix
291
281
 
292
282
 
293
283
  def _process_node_imputation(
294
284
  row_index: int,
295
285
  network: nx.Graph,
296
- enrichment_matrix: np.ndarray,
297
- significant_binary_enrichment_matrix: np.ndarray,
286
+ significance_matrix: np.ndarray,
287
+ significant_binary_significance_matrix: np.ndarray,
298
288
  depth: int,
299
289
  ) -> Tuple[np.ndarray, np.ndarray]:
300
- """Process the imputation for a single node based on its enriched neighbors.
290
+ """Process the imputation for a single node based on its significant neighbors.
301
291
 
302
292
  Args:
303
- row_index (int): The index of the enriched node being processed.
293
+ row_index (int): The index of the significant node being processed.
304
294
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
305
- enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
306
- significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
295
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
296
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
307
297
  depth (int): Current depth for traversal.
308
298
 
309
299
  Returns:
310
- Tuple[np.ndarray, np.ndarray]: The modified enrichment matrix and binary threshold matrix.
300
+ Tuple[np.ndarray, np.ndarray]: The modified significance matrix and binary threshold matrix.
311
301
  """
312
302
  # Check neighbors at the current depth
313
303
  neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
314
- # Filter annotated neighbors (already enriched)
304
+ # Filter annotated neighbors (already significant)
315
305
  annotated_neighbors = [
316
306
  n
317
307
  for n in neighbors
318
308
  if n != row_index
319
- and significant_binary_enrichment_matrix[n].sum() != 0
320
- and enrichment_matrix[n].sum() != 0
309
+ and significant_binary_significance_matrix[n].sum() != 0
310
+ and significance_matrix[n].sum() != 0
321
311
  ]
322
- # Filter non-enriched neighbors
312
+ # Filter non-significant neighbors
323
313
  valid_neighbors = [
324
314
  n
325
315
  for n in neighbors
326
316
  if n != row_index
327
- and significant_binary_enrichment_matrix[n].sum() == 0
328
- and enrichment_matrix[n].sum() == 0
317
+ and significant_binary_significance_matrix[n].sum() == 0
318
+ and significance_matrix[n].sum() == 0
329
319
  ]
330
- # If there are valid non-enriched neighbors
320
+ # If there are valid non-significant neighbors
331
321
  if valid_neighbors and annotated_neighbors:
332
322
  # Calculate distances to annotated neighbors
333
323
  distances_to_annotated = [
@@ -338,7 +328,7 @@ def _process_node_imputation(
338
328
  iqr = q3 - q1
339
329
  lower_bound = q1 - 1.5 * iqr
340
330
  upper_bound = q3 + 1.5 * iqr
341
- # Filter valid non-enriched neighbors that fall within the IQR bounds
331
+ # Filter valid non-significant neighbors that fall within the IQR bounds
342
332
  valid_neighbors_within_iqr = [
343
333
  n
344
334
  for n in valid_neighbors
@@ -352,8 +342,8 @@ def _process_node_imputation(
352
342
  def sum_pairwise_cosine_similarities(neighbor):
353
343
  return sum(
354
344
  cosine_similarity(
355
- enrichment_matrix[neighbor].reshape(1, -1),
356
- enrichment_matrix[other_neighbor].reshape(1, -1),
345
+ significance_matrix[neighbor].reshape(1, -1),
346
+ significance_matrix[other_neighbor].reshape(1, -1),
357
347
  )[0][0]
358
348
  for other_neighbor in valid_neighbors_within_iqr
359
349
  if other_neighbor != neighbor
@@ -365,43 +355,45 @@ def _process_node_imputation(
365
355
  else:
366
356
  most_similar_neighbor = valid_neighbors_within_iqr[0]
367
357
 
368
- # Impute the most similar non-enriched neighbor with the enriched node's data, scaled by depth
369
- enrichment_matrix[most_similar_neighbor] = enrichment_matrix[row_index] / np.sqrt(
358
+ # Impute the most similar non-significant neighbor with the significant node's data, scaled by depth
359
+ significance_matrix[most_similar_neighbor] = significance_matrix[row_index] / np.sqrt(
370
360
  depth + 1
371
361
  )
372
- significant_binary_enrichment_matrix[most_similar_neighbor] = (
373
- significant_binary_enrichment_matrix[row_index]
362
+ significant_binary_significance_matrix[most_similar_neighbor] = (
363
+ significant_binary_significance_matrix[row_index]
374
364
  )
375
365
 
376
- return enrichment_matrix, significant_binary_enrichment_matrix
366
+ return significance_matrix, significant_binary_significance_matrix
377
367
 
378
368
 
379
369
  def _prune_neighbors(
380
370
  network: nx.Graph,
381
- enrichment_matrix: np.ndarray,
382
- significant_binary_enrichment_matrix: np.ndarray,
371
+ significance_matrix: np.ndarray,
372
+ significant_binary_significance_matrix: np.ndarray,
383
373
  distance_threshold: float = 0.9,
384
374
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
385
375
  """Remove outliers based on their rank for edge lengths.
386
376
 
387
377
  Args:
388
378
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
389
- enrichment_matrix (np.ndarray): The enrichment matrix.
390
- significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix.
379
+ significance_matrix (np.ndarray): The significance matrix.
380
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix.
391
381
  distance_threshold (float): Rank threshold (0 to 1) to determine outliers.
392
382
 
393
383
  Returns:
394
- tuple: A tuple containing:
395
- - np.ndarray: The updated enrichment matrix with outliers set to zero.
384
+ Tuple[np.ndarray, np.ndarray, np.ndarray]:
385
+ - np.ndarray: The updated significance matrix with outliers set to zero.
396
386
  - np.ndarray: The updated alpha threshold matrix with outliers set to zero.
397
- - np.ndarray: The significant enrichment matrix, where non-significant entries are set to zero.
387
+ - np.ndarray: The significant significance matrix, where non-significant entries are set to zero.
398
388
  """
399
- # Identify indices with non-zero rows in the binary enrichment matrix
400
- non_zero_indices = np.where(significant_binary_enrichment_matrix.sum(axis=1) != 0)[0]
389
+ # Identify indices with non-zero rows in the binary significance matrix
390
+ non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
401
391
  median_distances = []
402
392
  for node in non_zero_indices:
403
393
  neighbors = [
404
- n for n in network.neighbors(node) if significant_binary_enrichment_matrix[n].sum() != 0
394
+ n
395
+ for n in network.neighbors(node)
396
+ if significant_binary_significance_matrix[n].sum() != 0
405
397
  ]
406
398
  if neighbors:
407
399
  median_distance = np.median(
@@ -416,22 +408,26 @@ def _prune_neighbors(
416
408
  neighbors = [
417
409
  n
418
410
  for n in network.neighbors(row_index)
419
- if significant_binary_enrichment_matrix[n].sum() != 0
411
+ if significant_binary_significance_matrix[n].sum() != 0
420
412
  ]
421
413
  if neighbors:
422
414
  median_distance = np.median(
423
415
  [_get_euclidean_distance(row_index, n, network) for n in neighbors]
424
416
  )
425
417
  if median_distance >= distance_threshold_value:
426
- enrichment_matrix[row_index] = 0
427
- significant_binary_enrichment_matrix[row_index] = 0
418
+ significance_matrix[row_index] = 0
419
+ significant_binary_significance_matrix[row_index] = 0
428
420
 
429
421
  # Create a matrix where non-significant entries are set to zero
430
- significant_enrichment_matrix = np.where(
431
- significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
422
+ significant_significance_matrix = np.where(
423
+ significant_binary_significance_matrix == 1, significance_matrix, 0
432
424
  )
433
425
 
434
- return enrichment_matrix, significant_binary_enrichment_matrix, significant_enrichment_matrix
426
+ return (
427
+ significance_matrix,
428
+ significant_binary_significance_matrix,
429
+ significant_significance_matrix,
430
+ )
435
431
 
436
432
 
437
433
  def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
@@ -481,7 +477,7 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
481
477
  """
482
478
  # Sort the median distances
483
479
  sorted_distances = np.sort(median_distances)
484
- # Compute the rank percentiles for the sorted distances
480
+ # Compute the rank fractions for the sorted distances
485
481
  rank_percentiles = np.linspace(0, 1, len(sorted_distances))
486
482
  # Interpolating the ranks to 1000 evenly spaced percentiles
487
483
  interpolated_percentiles = np.linspace(0, 1, 1000)
risk/network/__init__.py CHANGED
@@ -3,6 +3,4 @@ risk/network
3
3
  ~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from .graph import NetworkGraph
7
- from .io import NetworkIO
8
- from .plot import NetworkPlotter
6
+ from risk.network.io import NetworkIO