risk-network 0.0.8b26__py3-none-any.whl → 0.0.9b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.8-beta.26"
10
+ __version__ = "0.0.9-beta.1"
@@ -83,69 +83,69 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
83
83
  def define_top_annotations(
84
84
  network: nx.Graph,
85
85
  ordered_annotation_labels: List[str],
86
- neighborhood_enrichment_sums: List[int],
87
- significant_enrichment_matrix: np.ndarray,
88
- significant_binary_enrichment_matrix: np.ndarray,
86
+ neighborhood_significance_sums: List[int],
87
+ significant_significance_matrix: np.ndarray,
88
+ significant_binary_significance_matrix: np.ndarray,
89
89
  min_cluster_size: int = 5,
90
90
  max_cluster_size: int = 1000,
91
91
  ) -> pd.DataFrame:
92
- """Define top annotations based on neighborhood enrichment sums and binary enrichment matrix.
92
+ """Define top annotations based on neighborhood significance sums and binary significance matrix.
93
93
 
94
94
  Args:
95
95
  network (NetworkX graph): The network graph.
96
96
  ordered_annotation_labels (list of str): List of ordered annotation labels.
97
- neighborhood_enrichment_sums (list of int): List of neighborhood enrichment sums.
98
- significant_enrichment_matrix (np.ndarray): Enrichment matrix below alpha threshold.
99
- significant_binary_enrichment_matrix (np.ndarray): Binary enrichment matrix below alpha threshold.
97
+ neighborhood_significance_sums (list of int): List of neighborhood significance sums.
98
+ significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
99
+ significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
100
100
  min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
101
101
  max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
102
102
 
103
103
  Returns:
104
104
  pd.DataFrame: DataFrame with top annotations and their properties.
105
105
  """
106
- # Sum the columns of the significant enrichment matrix (positive floating point values)
107
- significant_enrichment_scores = significant_enrichment_matrix.sum(axis=0)
108
- # Create DataFrame to store annotations, their neighborhood enrichment sums, and enrichment scores
109
- annotations_enrichment_matrix = pd.DataFrame(
106
+ # Sum the columns of the significant significance matrix (positive floating point values)
107
+ significant_significance_scores = significant_significance_matrix.sum(axis=0)
108
+ # Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
109
+ annotations_significance_matrix = pd.DataFrame(
110
110
  {
111
111
  "id": range(len(ordered_annotation_labels)),
112
112
  "full_terms": ordered_annotation_labels,
113
- "significant_neighborhood_enrichment_sums": neighborhood_enrichment_sums,
114
- "significant_enrichment_score": significant_enrichment_scores,
113
+ "significant_neighborhood_significance_sums": neighborhood_significance_sums,
114
+ "significant_significance_score": significant_significance_scores,
115
115
  }
116
116
  )
117
- annotations_enrichment_matrix["significant_annotations"] = False
117
+ annotations_significance_matrix["significant_annotations"] = False
118
118
  # Apply size constraints to identify potential significant annotations
119
- annotations_enrichment_matrix.loc[
119
+ annotations_significance_matrix.loc[
120
120
  (
121
- annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
121
+ annotations_significance_matrix["significant_neighborhood_significance_sums"]
122
122
  >= min_cluster_size
123
123
  )
124
124
  & (
125
- annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
125
+ annotations_significance_matrix["significant_neighborhood_significance_sums"]
126
126
  <= max_cluster_size
127
127
  ),
128
128
  "significant_annotations",
129
129
  ] = True
130
130
  # Initialize columns for connected components analysis
131
- annotations_enrichment_matrix["num_connected_components"] = 0
132
- annotations_enrichment_matrix["size_connected_components"] = None
133
- annotations_enrichment_matrix["size_connected_components"] = annotations_enrichment_matrix[
131
+ annotations_significance_matrix["num_connected_components"] = 0
132
+ annotations_significance_matrix["size_connected_components"] = None
133
+ annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
134
134
  "size_connected_components"
135
135
  ].astype(object)
136
- annotations_enrichment_matrix["num_large_connected_components"] = 0
136
+ annotations_significance_matrix["num_large_connected_components"] = 0
137
137
 
138
- for attribute in annotations_enrichment_matrix.index.values[
139
- annotations_enrichment_matrix["significant_annotations"]
138
+ for attribute in annotations_significance_matrix.index.values[
139
+ annotations_significance_matrix["significant_annotations"]
140
140
  ]:
141
- # Identify enriched neighborhoods based on the binary enrichment matrix
142
- enriched_neighborhoods = list(
143
- compress(list(network), significant_binary_enrichment_matrix[:, attribute])
141
+ # Identify significant neighborhoods based on the binary significance matrix
142
+ significant_neighborhoods = list(
143
+ compress(list(network), significant_binary_significance_matrix[:, attribute])
144
144
  )
145
- enriched_network = nx.subgraph(network, enriched_neighborhoods)
146
- # Analyze connected components within the enriched subnetwork
145
+ significant_network = nx.subgraph(network, significant_neighborhoods)
146
+ # Analyze connected components within the significant subnetwork
147
147
  connected_components = sorted(
148
- nx.connected_components(enriched_network), key=len, reverse=True
148
+ nx.connected_components(significant_network), key=len, reverse=True
149
149
  )
150
150
  size_connected_components = np.array([len(c) for c in connected_components])
151
151
 
@@ -159,23 +159,24 @@ def define_top_annotations(
159
159
  num_large_connected_components = len(filtered_size_connected_components)
160
160
 
161
161
  # Assign the number of connected components
162
- annotations_enrichment_matrix.loc[attribute, "num_connected_components"] = (
162
+ annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
163
163
  num_connected_components
164
164
  )
165
165
  # Filter out attributes with more than one connected component
166
- annotations_enrichment_matrix.loc[
167
- annotations_enrichment_matrix["num_connected_components"] > 1, "significant_annotations"
166
+ annotations_significance_matrix.loc[
167
+ annotations_significance_matrix["num_connected_components"] > 1,
168
+ "significant_annotations",
168
169
  ] = False
169
170
  # Assign the number of large connected components
170
- annotations_enrichment_matrix.loc[attribute, "num_large_connected_components"] = (
171
+ annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
171
172
  num_large_connected_components
172
173
  )
173
174
  # Assign the size of connected components, ensuring it is always a list
174
- annotations_enrichment_matrix.at[attribute, "size_connected_components"] = (
175
+ annotations_significance_matrix.at[attribute, "size_connected_components"] = (
175
176
  filtered_size_connected_components.tolist()
176
177
  )
177
178
 
178
- return annotations_enrichment_matrix
179
+ return annotations_significance_matrix
179
180
 
180
181
 
181
182
  def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
@@ -184,16 +185,16 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
184
185
 
185
186
  Args:
186
187
  words_column (pd.Series): A pandas Series containing strings to process.
187
- scores_column (pd.Series): A pandas Series containing enrichment scores to weigh the terms.
188
+ scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
188
189
 
189
190
  Returns:
190
- str: A coherent description formed from the most frequent and significant words, weighed by enrichment scores.
191
+ str: A coherent description formed from the most frequent and significant words, weighed by significance scores.
191
192
  """
192
193
  # Handle case where all scores are the same
193
194
  if scores_column.max() == scores_column.min():
194
195
  normalized_scores = pd.Series([1] * len(scores_column))
195
196
  else:
196
- # Normalize the enrichment scores to be between 0 and 1
197
+ # Normalize the significance scores to be between 0 and 1
197
198
  normalized_scores = (scores_column - scores_column.min()) / (
198
199
  scores_column.max() - scores_column.min()
199
200
  )
risk/log/__init__.py CHANGED
@@ -3,7 +3,7 @@ risk/log
3
3
  ~~~~~~~~
4
4
  """
5
5
 
6
- from .config import logger, log_header, set_global_verbosity
6
+ from .console import logger, log_header, set_global_verbosity
7
7
  from .params import Params
8
8
 
9
9
  params = Params()
@@ -1,6 +1,6 @@
1
1
  """
2
- risk/log/config
3
- ~~~~~~~~~~~~~~~
2
+ risk/log/console
3
+ ~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import logging
risk/log/enrichment.py ADDED
@@ -0,0 +1,18 @@
1
+ """
2
+ risk/log/enrichment
3
+ ~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ import csv
7
+ import json
8
+ import warnings
9
+ from datetime import datetime
10
+ from functools import wraps
11
+ from typing import Any, Dict
12
+
13
+ import numpy as np
14
+
15
+ from .console import logger, log_header
16
+
17
+ # Suppress all warnings - this is to resolve warnings from multiprocessing
18
+ warnings.filterwarnings("ignore")
risk/log/params.py CHANGED
@@ -12,7 +12,7 @@ from typing import Any, Dict
12
12
 
13
13
  import numpy as np
14
14
 
15
- from .config import logger, log_header
15
+ from .console import logger, log_header
16
16
 
17
17
  # Suppress all warnings - this is to resolve warnings from multiprocessing
18
18
  warnings.filterwarnings("ignore")
@@ -20,17 +20,17 @@ from risk.log import logger
20
20
 
21
21
  def define_domains(
22
22
  top_annotations: pd.DataFrame,
23
- significant_neighborhoods_enrichment: np.ndarray,
23
+ significant_neighborhoods_significance: np.ndarray,
24
24
  linkage_criterion: str,
25
25
  linkage_method: str,
26
26
  linkage_metric: str,
27
27
  ) -> pd.DataFrame:
28
- """Define domains and assign nodes to these domains based on their enrichment scores and clustering,
28
+ """Define domains and assign nodes to these domains based on their significance scores and clustering,
29
29
  handling errors by assigning unique domains when clustering fails.
30
30
 
31
31
  Args:
32
32
  top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
33
- significant_neighborhoods_enrichment (np.ndarray): The binary enrichment matrix below alpha.
33
+ significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
34
34
  linkage_criterion (str): The clustering criterion for defining groups.
35
35
  linkage_method (str): The linkage method for clustering.
36
36
  linkage_metric (str): The linkage metric for clustering.
@@ -40,7 +40,7 @@ def define_domains(
40
40
  """
41
41
  try:
42
42
  # Transpose the matrix to cluster annotations
43
- m = significant_neighborhoods_enrichment[:, top_annotations["significant_annotations"]].T
43
+ m = significant_neighborhoods_significance[:, top_annotations["significant_annotations"]].T
44
44
  best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
45
45
  m, linkage_criterion, linkage_method, linkage_metric
46
46
  )
@@ -65,13 +65,13 @@ def define_domains(
65
65
  top_annotations["domain"] = range(1, n_rows + 1) # Assign unique domains
66
66
 
67
67
  # Create DataFrames to store domain information
68
- node_to_enrichment = pd.DataFrame(
69
- data=significant_neighborhoods_enrichment,
68
+ node_to_significance = pd.DataFrame(
69
+ data=significant_neighborhoods_significance,
70
70
  columns=[top_annotations.index.values, top_annotations["domain"]],
71
71
  )
72
- node_to_domain = node_to_enrichment.groupby(level="domain", axis=1).sum()
72
+ node_to_domain = node_to_significance.groupby(level="domain", axis=1).sum()
73
73
 
74
- # Find the maximum enrichment score for each node
74
+ # Find the maximum significance score for each node
75
75
  t_max = node_to_domain.loc[:, 1:].max(axis=1)
76
76
  t_idxmax = node_to_domain.loc[:, 1:].idxmax(axis=1)
77
77
  t_idxmax[t_max == 0] = 0
@@ -119,27 +119,27 @@ def trim_domains_and_top_annotations(
119
119
  top_annotations["domain"].replace(to_remove, invalid_domain_id, inplace=True)
120
120
  domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
121
121
 
122
- # Normalize "num enriched neighborhoods" by percentile for each domain and scale to 0-10
122
+ # Normalize "num significant neighborhoods" by percentile for each domain and scale to 0-10
123
123
  top_annotations["normalized_value"] = top_annotations.groupby("domain")[
124
- "significant_neighborhood_enrichment_sums"
124
+ "significant_neighborhood_significance_sums"
125
125
  ].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
126
- # Modify the lambda function to pass both full_terms and significant_enrichment_score
126
+ # Modify the lambda function to pass both full_terms and significant_significance_score
127
127
  top_annotations["combined_terms"] = top_annotations.apply(
128
128
  lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
129
129
  )
130
130
 
131
- # Perform the groupby operation while retaining the other columns and adding the weighting with enrichment scores
131
+ # Perform the groupby operation while retaining the other columns and adding the weighting with significance scores
132
132
  domain_labels = (
133
133
  top_annotations.groupby("domain")
134
134
  .agg(
135
135
  full_terms=("full_terms", lambda x: list(x)),
136
- enrichment_scores=("significant_enrichment_score", lambda x: list(x)),
136
+ significance_scores=("significant_significance_score", lambda x: list(x)),
137
137
  )
138
138
  .reset_index()
139
139
  )
140
140
  domain_labels["combined_terms"] = domain_labels.apply(
141
141
  lambda row: get_weighted_description(
142
- pd.Series(row["full_terms"]), pd.Series(row["enrichment_scores"])
142
+ pd.Series(row["full_terms"]), pd.Series(row["significance_scores"])
143
143
  ),
144
144
  axis=1,
145
145
  )
@@ -150,7 +150,7 @@ def trim_domains_and_top_annotations(
150
150
  "domain": "id",
151
151
  "combined_terms": "normalized_description",
152
152
  "full_terms": "full_descriptions",
153
- "enrichment_scores": "enrichment_scores",
153
+ "significance_scores": "significance_scores",
154
154
  }
155
155
  ).set_index("id")
156
156
 
@@ -171,163 +171,169 @@ def process_neighborhoods(
171
171
 
172
172
  Args:
173
173
  network (nx.Graph): The network data structure used for imputing and pruning neighbors.
174
- neighborhoods (Dict[str, Any]): Dictionary containing 'enrichment_matrix', 'significant_binary_enrichment_matrix', and 'significant_enrichment_matrix'.
174
+ neighborhoods (Dict[str, Any]): Dictionary containing 'significance_matrix', 'significant_binary_significance_matrix', and 'significant_significance_matrix'.
175
175
  impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
176
176
  prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
177
177
 
178
178
  Returns:
179
- Dict[str, Any]: Processed neighborhoods data, including the updated matrices and enrichment counts.
179
+ Dict[str, Any]: Processed neighborhoods data, including the updated matrices and significance counts.
180
180
  """
181
- enrichment_matrix = neighborhoods["enrichment_matrix"]
182
- significant_binary_enrichment_matrix = neighborhoods["significant_binary_enrichment_matrix"]
183
- significant_enrichment_matrix = neighborhoods["significant_enrichment_matrix"]
181
+ significance_matrix = neighborhoods["significance_matrix"]
182
+ significant_binary_significance_matrix = neighborhoods["significant_binary_significance_matrix"]
183
+ significant_significance_matrix = neighborhoods["significant_significance_matrix"]
184
184
  logger.debug(f"Imputation depth: {impute_depth}")
185
185
  if impute_depth:
186
186
  (
187
- enrichment_matrix,
188
- significant_binary_enrichment_matrix,
189
- significant_enrichment_matrix,
187
+ significance_matrix,
188
+ significant_binary_significance_matrix,
189
+ significant_significance_matrix,
190
190
  ) = _impute_neighbors(
191
191
  network,
192
- enrichment_matrix,
193
- significant_binary_enrichment_matrix,
192
+ significance_matrix,
193
+ significant_binary_significance_matrix,
194
194
  max_depth=impute_depth,
195
195
  )
196
196
 
197
197
  logger.debug(f"Pruning threshold: {prune_threshold}")
198
198
  if prune_threshold:
199
199
  (
200
- enrichment_matrix,
201
- significant_binary_enrichment_matrix,
202
- significant_enrichment_matrix,
200
+ significance_matrix,
201
+ significant_binary_significance_matrix,
202
+ significant_significance_matrix,
203
203
  ) = _prune_neighbors(
204
204
  network,
205
- enrichment_matrix,
206
- significant_binary_enrichment_matrix,
205
+ significance_matrix,
206
+ significant_binary_significance_matrix,
207
207
  distance_threshold=prune_threshold,
208
208
  )
209
209
 
210
- neighborhood_enrichment_counts = np.sum(significant_binary_enrichment_matrix, axis=0)
211
- node_enrichment_sums = np.sum(enrichment_matrix, axis=1)
210
+ neighborhood_significance_counts = np.sum(significant_binary_significance_matrix, axis=0)
211
+ node_significance_sums = np.sum(significance_matrix, axis=1)
212
212
  return {
213
- "enrichment_matrix": enrichment_matrix,
214
- "significant_binary_enrichment_matrix": significant_binary_enrichment_matrix,
215
- "significant_enrichment_matrix": significant_enrichment_matrix,
216
- "neighborhood_enrichment_counts": neighborhood_enrichment_counts,
217
- "node_enrichment_sums": node_enrichment_sums,
213
+ "significance_matrix": significance_matrix,
214
+ "significant_binary_significance_matrix": significant_binary_significance_matrix,
215
+ "significant_significance_matrix": significant_significance_matrix,
216
+ "neighborhood_significance_counts": neighborhood_significance_counts,
217
+ "node_significance_sums": node_significance_sums,
218
218
  }
219
219
 
220
220
 
221
221
  def _impute_neighbors(
222
222
  network: nx.Graph,
223
- enrichment_matrix: np.ndarray,
224
- significant_binary_enrichment_matrix: np.ndarray,
223
+ significance_matrix: np.ndarray,
224
+ significant_binary_significance_matrix: np.ndarray,
225
225
  max_depth: int = 3,
226
226
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
227
- """Impute rows with sums of zero in the enrichment matrix based on the closest non-zero neighbors in the network graph.
227
+ """Impute rows with sums of zero in the significance matrix based on the closest non-zero neighbors in the network graph.
228
228
 
229
229
  Args:
230
230
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
231
- enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
232
- significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
231
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
232
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
233
233
  max_depth (int): Maximum depth of nodes to traverse for imputing values.
234
234
 
235
235
  Returns:
236
236
  tuple: A tuple containing:
237
- - np.ndarray: The imputed enrichment matrix.
237
+ - np.ndarray: The imputed significance matrix.
238
238
  - np.ndarray: The imputed alpha threshold matrix.
239
- - np.ndarray: The significant enrichment matrix with non-significant entries set to zero.
239
+ - np.ndarray: The significant significance matrix with non-significant entries set to zero.
240
240
  """
241
241
  # Calculate the distance threshold value based on the shortest distances
242
- enrichment_matrix, significant_binary_enrichment_matrix = _impute_neighbors_with_similarity(
243
- network, enrichment_matrix, significant_binary_enrichment_matrix, max_depth=max_depth
242
+ significance_matrix, significant_binary_significance_matrix = _impute_neighbors_with_similarity(
243
+ network, significance_matrix, significant_binary_significance_matrix, max_depth=max_depth
244
244
  )
245
245
  # Create a matrix where non-significant entries are set to zero
246
- significant_enrichment_matrix = np.where(
247
- significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
246
+ significant_significance_matrix = np.where(
247
+ significant_binary_significance_matrix == 1, significance_matrix, 0
248
248
  )
249
249
 
250
- return enrichment_matrix, significant_binary_enrichment_matrix, significant_enrichment_matrix
250
+ return (
251
+ significance_matrix,
252
+ significant_binary_significance_matrix,
253
+ significant_significance_matrix,
254
+ )
251
255
 
252
256
 
253
257
  def _impute_neighbors_with_similarity(
254
258
  network: nx.Graph,
255
- enrichment_matrix: np.ndarray,
256
- significant_binary_enrichment_matrix: np.ndarray,
259
+ significance_matrix: np.ndarray,
260
+ significant_binary_significance_matrix: np.ndarray,
257
261
  max_depth: int = 3,
258
262
  ) -> Tuple[np.ndarray, np.ndarray]:
259
- """Impute non-enriched nodes based on the closest enriched neighbors' profiles and their similarity.
263
+ """Impute non-significant nodes based on the closest significant neighbors' profiles and their similarity.
260
264
 
261
265
  Args:
262
266
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
263
- enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
264
- significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
267
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
268
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
265
269
  max_depth (int): Maximum depth of nodes to traverse for imputing values.
266
270
 
267
271
  Returns:
268
272
  Tuple[np.ndarray, np.ndarray]: A tuple containing:
269
- - The imputed enrichment matrix.
273
+ - The imputed significance matrix.
270
274
  - The imputed alpha threshold matrix.
271
275
  """
272
276
  depth = 1
273
- rows_to_impute = np.where(significant_binary_enrichment_matrix.sum(axis=1) == 0)[0]
277
+ rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
274
278
  while len(rows_to_impute) and depth <= max_depth:
275
- # Iterate over all enriched nodes
276
- for row_index in range(significant_binary_enrichment_matrix.shape[0]):
277
- if significant_binary_enrichment_matrix[row_index].sum() != 0:
278
- enrichment_matrix, significant_binary_enrichment_matrix = _process_node_imputation(
279
- row_index,
280
- network,
281
- enrichment_matrix,
282
- significant_binary_enrichment_matrix,
283
- depth,
279
+ # Iterate over all significant nodes
280
+ for row_index in range(significant_binary_significance_matrix.shape[0]):
281
+ if significant_binary_significance_matrix[row_index].sum() != 0:
282
+ significance_matrix, significant_binary_significance_matrix = (
283
+ _process_node_imputation(
284
+ row_index,
285
+ network,
286
+ significance_matrix,
287
+ significant_binary_significance_matrix,
288
+ depth,
289
+ )
284
290
  )
285
291
 
286
292
  # Update rows to impute for the next iteration
287
- rows_to_impute = np.where(significant_binary_enrichment_matrix.sum(axis=1) == 0)[0]
293
+ rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
288
294
  depth += 1
289
295
 
290
- return enrichment_matrix, significant_binary_enrichment_matrix
296
+ return significance_matrix, significant_binary_significance_matrix
291
297
 
292
298
 
293
299
  def _process_node_imputation(
294
300
  row_index: int,
295
301
  network: nx.Graph,
296
- enrichment_matrix: np.ndarray,
297
- significant_binary_enrichment_matrix: np.ndarray,
302
+ significance_matrix: np.ndarray,
303
+ significant_binary_significance_matrix: np.ndarray,
298
304
  depth: int,
299
305
  ) -> Tuple[np.ndarray, np.ndarray]:
300
- """Process the imputation for a single node based on its enriched neighbors.
306
+ """Process the imputation for a single node based on its significant neighbors.
301
307
 
302
308
  Args:
303
- row_index (int): The index of the enriched node being processed.
309
+ row_index (int): The index of the significant node being processed.
304
310
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
305
- enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
306
- significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
311
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
312
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
307
313
  depth (int): Current depth for traversal.
308
314
 
309
315
  Returns:
310
- Tuple[np.ndarray, np.ndarray]: The modified enrichment matrix and binary threshold matrix.
316
+ Tuple[np.ndarray, np.ndarray]: The modified significance matrix and binary threshold matrix.
311
317
  """
312
318
  # Check neighbors at the current depth
313
319
  neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
314
- # Filter annotated neighbors (already enriched)
320
+ # Filter annotated neighbors (already significant)
315
321
  annotated_neighbors = [
316
322
  n
317
323
  for n in neighbors
318
324
  if n != row_index
319
- and significant_binary_enrichment_matrix[n].sum() != 0
320
- and enrichment_matrix[n].sum() != 0
325
+ and significant_binary_significance_matrix[n].sum() != 0
326
+ and significance_matrix[n].sum() != 0
321
327
  ]
322
- # Filter non-enriched neighbors
328
+ # Filter non-significant neighbors
323
329
  valid_neighbors = [
324
330
  n
325
331
  for n in neighbors
326
332
  if n != row_index
327
- and significant_binary_enrichment_matrix[n].sum() == 0
328
- and enrichment_matrix[n].sum() == 0
333
+ and significant_binary_significance_matrix[n].sum() == 0
334
+ and significance_matrix[n].sum() == 0
329
335
  ]
330
- # If there are valid non-enriched neighbors
336
+ # If there are valid non-significant neighbors
331
337
  if valid_neighbors and annotated_neighbors:
332
338
  # Calculate distances to annotated neighbors
333
339
  distances_to_annotated = [
@@ -338,7 +344,7 @@ def _process_node_imputation(
338
344
  iqr = q3 - q1
339
345
  lower_bound = q1 - 1.5 * iqr
340
346
  upper_bound = q3 + 1.5 * iqr
341
- # Filter valid non-enriched neighbors that fall within the IQR bounds
347
+ # Filter valid non-significant neighbors that fall within the IQR bounds
342
348
  valid_neighbors_within_iqr = [
343
349
  n
344
350
  for n in valid_neighbors
@@ -352,8 +358,8 @@ def _process_node_imputation(
352
358
  def sum_pairwise_cosine_similarities(neighbor):
353
359
  return sum(
354
360
  cosine_similarity(
355
- enrichment_matrix[neighbor].reshape(1, -1),
356
- enrichment_matrix[other_neighbor].reshape(1, -1),
361
+ significance_matrix[neighbor].reshape(1, -1),
362
+ significance_matrix[other_neighbor].reshape(1, -1),
357
363
  )[0][0]
358
364
  for other_neighbor in valid_neighbors_within_iqr
359
365
  if other_neighbor != neighbor
@@ -365,43 +371,45 @@ def _process_node_imputation(
365
371
  else:
366
372
  most_similar_neighbor = valid_neighbors_within_iqr[0]
367
373
 
368
- # Impute the most similar non-enriched neighbor with the enriched node's data, scaled by depth
369
- enrichment_matrix[most_similar_neighbor] = enrichment_matrix[row_index] / np.sqrt(
374
+ # Impute the most similar non-significant neighbor with the significant node's data, scaled by depth
375
+ significance_matrix[most_similar_neighbor] = significance_matrix[row_index] / np.sqrt(
370
376
  depth + 1
371
377
  )
372
- significant_binary_enrichment_matrix[most_similar_neighbor] = (
373
- significant_binary_enrichment_matrix[row_index]
378
+ significant_binary_significance_matrix[most_similar_neighbor] = (
379
+ significant_binary_significance_matrix[row_index]
374
380
  )
375
381
 
376
- return enrichment_matrix, significant_binary_enrichment_matrix
382
+ return significance_matrix, significant_binary_significance_matrix
377
383
 
378
384
 
379
385
  def _prune_neighbors(
380
386
  network: nx.Graph,
381
- enrichment_matrix: np.ndarray,
382
- significant_binary_enrichment_matrix: np.ndarray,
387
+ significance_matrix: np.ndarray,
388
+ significant_binary_significance_matrix: np.ndarray,
383
389
  distance_threshold: float = 0.9,
384
390
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
385
391
  """Remove outliers based on their rank for edge lengths.
386
392
 
387
393
  Args:
388
394
  network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
389
- enrichment_matrix (np.ndarray): The enrichment matrix.
390
- significant_binary_enrichment_matrix (np.ndarray): The alpha threshold matrix.
395
+ significance_matrix (np.ndarray): The significance matrix.
396
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix.
391
397
  distance_threshold (float): Rank threshold (0 to 1) to determine outliers.
392
398
 
393
399
  Returns:
394
400
  tuple: A tuple containing:
395
- - np.ndarray: The updated enrichment matrix with outliers set to zero.
401
+ - np.ndarray: The updated significance matrix with outliers set to zero.
396
402
  - np.ndarray: The updated alpha threshold matrix with outliers set to zero.
397
- - np.ndarray: The significant enrichment matrix, where non-significant entries are set to zero.
403
+ - np.ndarray: The significant significance matrix, where non-significant entries are set to zero.
398
404
  """
399
- # Identify indices with non-zero rows in the binary enrichment matrix
400
- non_zero_indices = np.where(significant_binary_enrichment_matrix.sum(axis=1) != 0)[0]
405
+ # Identify indices with non-zero rows in the binary significance matrix
406
+ non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
401
407
  median_distances = []
402
408
  for node in non_zero_indices:
403
409
  neighbors = [
404
- n for n in network.neighbors(node) if significant_binary_enrichment_matrix[n].sum() != 0
410
+ n
411
+ for n in network.neighbors(node)
412
+ if significant_binary_significance_matrix[n].sum() != 0
405
413
  ]
406
414
  if neighbors:
407
415
  median_distance = np.median(
@@ -416,22 +424,26 @@ def _prune_neighbors(
416
424
  neighbors = [
417
425
  n
418
426
  for n in network.neighbors(row_index)
419
- if significant_binary_enrichment_matrix[n].sum() != 0
427
+ if significant_binary_significance_matrix[n].sum() != 0
420
428
  ]
421
429
  if neighbors:
422
430
  median_distance = np.median(
423
431
  [_get_euclidean_distance(row_index, n, network) for n in neighbors]
424
432
  )
425
433
  if median_distance >= distance_threshold_value:
426
- enrichment_matrix[row_index] = 0
427
- significant_binary_enrichment_matrix[row_index] = 0
434
+ significance_matrix[row_index] = 0
435
+ significant_binary_significance_matrix[row_index] = 0
428
436
 
429
437
  # Create a matrix where non-significant entries are set to zero
430
- significant_enrichment_matrix = np.where(
431
- significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
438
+ significant_significance_matrix = np.where(
439
+ significant_binary_significance_matrix == 1, significance_matrix, 0
432
440
  )
433
441
 
434
- return enrichment_matrix, significant_binary_enrichment_matrix, significant_enrichment_matrix
442
+ return (
443
+ significance_matrix,
444
+ significant_binary_significance_matrix,
445
+ significant_significance_matrix,
446
+ )
435
447
 
436
448
 
437
449
  def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float: