risk-network 0.0.12b2__py3-none-any.whl → 0.0.12b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/neighborhoods/api.py CHANGED
@@ -28,13 +28,13 @@ class NeighborhoodsAPI:
28
28
  The NeighborhoodsAPI class provides methods to load neighborhood results from statistical tests.
29
29
  """
30
30
 
31
- def __init__() -> None:
31
+ def __init__(self) -> None:
32
32
  pass
33
33
 
34
- def load_neighborhoods_by_binom(
34
+ def load_neighborhoods_binom(
35
35
  self,
36
36
  network: nx.Graph,
37
- annotations: Dict[str, Any],
37
+ annotation: Dict[str, Any],
38
38
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
39
39
  louvain_resolution: float = 0.1,
40
40
  leiden_resolution: float = 1.0,
@@ -46,7 +46,7 @@ class NeighborhoodsAPI:
46
46
 
47
47
  Args:
48
48
  network (nx.Graph): The network graph.
49
- annotations (Dict[str, Any]): The annotations associated with the network.
49
+ annotation (Dict[str, Any]): The annotation associated with the network.
50
50
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
51
51
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
52
52
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -55,7 +55,7 @@ class NeighborhoodsAPI:
55
55
  fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
56
56
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
57
57
  Defaults to 0.5.
58
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
58
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
59
59
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
60
60
 
61
61
  Returns:
@@ -65,7 +65,7 @@ class NeighborhoodsAPI:
65
65
  # Compute neighborhood significance using the binomial test
66
66
  return self._load_neighborhoods_by_statistical_test(
67
67
  network=network,
68
- annotations=annotations,
68
+ annotation=annotation,
69
69
  distance_metric=distance_metric,
70
70
  louvain_resolution=louvain_resolution,
71
71
  leiden_resolution=leiden_resolution,
@@ -76,10 +76,10 @@ class NeighborhoodsAPI:
76
76
  statistical_test_function=compute_binom_test,
77
77
  )
78
78
 
79
- def load_neighborhoods_by_chi2(
79
+ def load_neighborhoods_chi2(
80
80
  self,
81
81
  network: nx.Graph,
82
- annotations: Dict[str, Any],
82
+ annotation: Dict[str, Any],
83
83
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
84
84
  louvain_resolution: float = 0.1,
85
85
  leiden_resolution: float = 1.0,
@@ -91,7 +91,7 @@ class NeighborhoodsAPI:
91
91
 
92
92
  Args:
93
93
  network (nx.Graph): The network graph.
94
- annotations (Dict[str, Any]): The annotations associated with the network.
94
+ annotation (Dict[str, Any]): The annotation associated with the network.
95
95
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
96
96
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
97
97
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -100,7 +100,7 @@ class NeighborhoodsAPI:
100
100
  fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
101
101
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
102
102
  Defaults to 0.5.
103
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
103
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
104
104
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
105
105
 
106
106
  Returns:
@@ -110,7 +110,7 @@ class NeighborhoodsAPI:
110
110
  # Compute neighborhood significance using the chi-squared test
111
111
  return self._load_neighborhoods_by_statistical_test(
112
112
  network=network,
113
- annotations=annotations,
113
+ annotation=annotation,
114
114
  distance_metric=distance_metric,
115
115
  louvain_resolution=louvain_resolution,
116
116
  leiden_resolution=leiden_resolution,
@@ -121,10 +121,10 @@ class NeighborhoodsAPI:
121
121
  statistical_test_function=compute_chi2_test,
122
122
  )
123
123
 
124
- def load_neighborhoods_by_hypergeom(
124
+ def load_neighborhoods_hypergeom(
125
125
  self,
126
126
  network: nx.Graph,
127
- annotations: Dict[str, Any],
127
+ annotation: Dict[str, Any],
128
128
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
129
129
  louvain_resolution: float = 0.1,
130
130
  leiden_resolution: float = 1.0,
@@ -136,7 +136,7 @@ class NeighborhoodsAPI:
136
136
 
137
137
  Args:
138
138
  network (nx.Graph): The network graph.
139
- annotations (Dict[str, Any]): The annotations associated with the network.
139
+ annotation (Dict[str, Any]): The annotation associated with the network.
140
140
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
141
141
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
142
142
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -145,7 +145,7 @@ class NeighborhoodsAPI:
145
145
  fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
146
146
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
147
147
  Defaults to 0.5.
148
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
148
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
149
149
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
150
150
 
151
151
  Returns:
@@ -155,7 +155,7 @@ class NeighborhoodsAPI:
155
155
  # Compute neighborhood significance using the hypergeometric test
156
156
  return self._load_neighborhoods_by_statistical_test(
157
157
  network=network,
158
- annotations=annotations,
158
+ annotation=annotation,
159
159
  distance_metric=distance_metric,
160
160
  louvain_resolution=louvain_resolution,
161
161
  leiden_resolution=leiden_resolution,
@@ -166,10 +166,10 @@ class NeighborhoodsAPI:
166
166
  statistical_test_function=compute_hypergeom_test,
167
167
  )
168
168
 
169
- def load_neighborhoods_by_permutation(
169
+ def load_neighborhoods_permutation(
170
170
  self,
171
171
  network: nx.Graph,
172
- annotations: Dict[str, Any],
172
+ annotation: Dict[str, Any],
173
173
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
174
174
  louvain_resolution: float = 0.1,
175
175
  leiden_resolution: float = 1.0,
@@ -184,7 +184,7 @@ class NeighborhoodsAPI:
184
184
 
185
185
  Args:
186
186
  network (nx.Graph): The network graph.
187
- annotations (Dict[str, Any]): The annotations associated with the network.
187
+ annotation (Dict[str, Any]): The annotation associated with the network.
188
188
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
189
189
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
190
190
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -194,7 +194,7 @@ class NeighborhoodsAPI:
194
194
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
195
195
  Defaults to 0.5.
196
196
  score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
197
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
197
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
198
198
  num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
199
199
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
200
200
  max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
@@ -210,7 +210,7 @@ class NeighborhoodsAPI:
210
210
  # Compute neighborhood significance using the permutation test
211
211
  return self._load_neighborhoods_by_statistical_test(
212
212
  network=network,
213
- annotations=annotations,
213
+ annotation=annotation,
214
214
  distance_metric=distance_metric,
215
215
  louvain_resolution=louvain_resolution,
216
216
  leiden_resolution=leiden_resolution,
@@ -224,10 +224,10 @@ class NeighborhoodsAPI:
224
224
  max_workers=max_workers,
225
225
  )
226
226
 
227
- def load_neighborhoods_by_poisson(
227
+ def load_neighborhoods_poisson(
228
228
  self,
229
229
  network: nx.Graph,
230
- annotations: Dict[str, Any],
230
+ annotation: Dict[str, Any],
231
231
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
232
232
  louvain_resolution: float = 0.1,
233
233
  leiden_resolution: float = 1.0,
@@ -239,7 +239,7 @@ class NeighborhoodsAPI:
239
239
 
240
240
  Args:
241
241
  network (nx.Graph): The network graph.
242
- annotations (Dict[str, Any]): The annotations associated with the network.
242
+ annotation (Dict[str, Any]): The annotation associated with the network.
243
243
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
244
244
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
245
245
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -248,7 +248,7 @@ class NeighborhoodsAPI:
248
248
  fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
249
249
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
250
250
  Defaults to 0.5.
251
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
251
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
252
252
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
253
253
 
254
254
  Returns:
@@ -258,7 +258,7 @@ class NeighborhoodsAPI:
258
258
  # Compute neighborhood significance using the Poisson test
259
259
  return self._load_neighborhoods_by_statistical_test(
260
260
  network=network,
261
- annotations=annotations,
261
+ annotation=annotation,
262
262
  distance_metric=distance_metric,
263
263
  louvain_resolution=louvain_resolution,
264
264
  leiden_resolution=leiden_resolution,
@@ -269,10 +269,10 @@ class NeighborhoodsAPI:
269
269
  statistical_test_function=compute_poisson_test,
270
270
  )
271
271
 
272
- def load_neighborhoods_by_zscore(
272
+ def load_neighborhoods_zscore(
273
273
  self,
274
274
  network: nx.Graph,
275
- annotations: Dict[str, Any],
275
+ annotation: Dict[str, Any],
276
276
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
277
277
  louvain_resolution: float = 0.1,
278
278
  leiden_resolution: float = 1.0,
@@ -284,7 +284,7 @@ class NeighborhoodsAPI:
284
284
 
285
285
  Args:
286
286
  network (nx.Graph): The network graph.
287
- annotations (Dict[str, Any]): The annotations associated with the network.
287
+ annotation (Dict[str, Any]): The annotation associated with the network.
288
288
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
289
289
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
290
290
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -293,7 +293,7 @@ class NeighborhoodsAPI:
293
293
  fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
294
294
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
295
295
  Defaults to 0.5.
296
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
296
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
297
297
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
298
298
 
299
299
  Returns:
@@ -303,7 +303,7 @@ class NeighborhoodsAPI:
303
303
  # Compute neighborhood significance using the z-score test
304
304
  return self._load_neighborhoods_by_statistical_test(
305
305
  network=network,
306
- annotations=annotations,
306
+ annotation=annotation,
307
307
  distance_metric=distance_metric,
308
308
  louvain_resolution=louvain_resolution,
309
309
  leiden_resolution=leiden_resolution,
@@ -317,7 +317,7 @@ class NeighborhoodsAPI:
317
317
  def _load_neighborhoods_by_statistical_test(
318
318
  self,
319
319
  network: nx.Graph,
320
- annotations: Dict[str, Any],
320
+ annotation: Dict[str, Any],
321
321
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
322
322
  louvain_resolution: float = 0.1,
323
323
  leiden_resolution: float = 1.0,
@@ -332,7 +332,7 @@ class NeighborhoodsAPI:
332
332
 
333
333
  Args:
334
334
  network (nx.Graph): The input network graph.
335
- annotations (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
335
+ annotation (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
336
336
  distance_metric (Union[str, List, Tuple, np.ndarray], optional): The distance metric or clustering method to define neighborhoods.
337
337
  Can be a string specifying one method (e.g., 'louvain', 'leiden') or a collection of methods.
338
338
  Defaults to "louvain".
@@ -340,13 +340,13 @@ class NeighborhoodsAPI:
340
340
  leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
341
341
  fraction_shortest_edges (Union[float, List, Tuple, np.ndarray], optional): Fraction of shortest edges to consider for creating subgraphs.
342
342
  Can be a single value or a collection of thresholds for flexibility. Defaults to 0.5.
343
- null_distribution (str, optional): The type of null distribution to use ('network' or 'annotations').
343
+ null_distribution (str, optional): The type of null distribution to use ('network' or 'annotation').
344
344
  Defaults to "network".
345
345
  random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
346
346
  statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "poisson").
347
347
  Used for logging and debugging. Defaults to "hypergeom".
348
348
  statistical_test_function (Any, optional): The function implementing the statistical test.
349
- It should accept neighborhoods, annotations, null distribution, and additional kwargs.
349
+ It should accept neighborhoods, annotation, null distribution, and additional kwargs.
350
350
  Defaults to `compute_hypergeom_test`.
351
351
  **kwargs: Additional parameters to be passed to the statistical test function.
352
352
 
@@ -381,7 +381,7 @@ class NeighborhoodsAPI:
381
381
  # Apply statistical test function to compute neighborhood significance
382
382
  neighborhood_significance = statistical_test_function(
383
383
  neighborhoods=neighborhoods,
384
- annotations=annotations["matrix"],
384
+ annotation=annotation["matrix"],
385
385
  null_distribution=null_distribution,
386
386
  **kwargs,
387
387
  )
@@ -13,7 +13,7 @@ from scipy.cluster.hierarchy import fcluster, linkage
13
13
  from sklearn.metrics import silhouette_score
14
14
  from tqdm import tqdm
15
15
 
16
- from risk.annotations import get_weighted_description
16
+ from risk.annotation import get_weighted_description
17
17
  from risk.log import logger
18
18
 
19
19
  # Define constants for clustering
@@ -28,7 +28,7 @@ LINKAGE_METRICS = {
28
28
 
29
29
 
30
30
  def define_domains(
31
- top_annotations: pd.DataFrame,
31
+ top_annotation: pd.DataFrame,
32
32
  significant_neighborhoods_significance: np.ndarray,
33
33
  linkage_criterion: str,
34
34
  linkage_method: str,
@@ -39,7 +39,7 @@ def define_domains(
39
39
  handling errors by assigning unique domains when clustering fails.
40
40
 
41
41
  Args:
42
- top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
42
+ top_annotation (pd.DataFrame): DataFrame of top annotations data for the network nodes.
43
43
  significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
44
44
  linkage_criterion (str): The clustering criterion for defining groups. Choose "off" to disable clustering.
45
45
  linkage_method (str): The linkage method for clustering. Choose "auto" to optimize.
@@ -57,7 +57,7 @@ def define_domains(
57
57
  raise ValueError("Clustering is turned off.")
58
58
 
59
59
  # Transpose the matrix to cluster annotations
60
- m = significant_neighborhoods_significance[:, top_annotations["significant_annotations"]].T
60
+ m = significant_neighborhoods_significance[:, top_annotation["significant_annotation"]].T
61
61
  # Safeguard the matrix by replacing NaN, Inf, and -Inf values
62
62
  m = _safeguard_matrix(m)
63
63
  # Optimize silhouette score across different linkage methods and distance metrics
@@ -71,27 +71,23 @@ def define_domains(
71
71
  )
72
72
  # Calculate the optimal threshold for clustering
73
73
  max_d_optimal = np.max(Z[:, 2]) * best_threshold
74
- # Assign domains to the annotations matrix
74
+ # Assign domains to the annotation matrix
75
75
  domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
76
- top_annotations["domain"] = 0
77
- top_annotations.loc[top_annotations["significant_annotations"], "domain"] = domains
76
+ top_annotation["domain"] = 0
77
+ top_annotation.loc[top_annotation["significant_annotation"], "domain"] = domains
78
78
  except (ValueError, LinAlgError):
79
79
  # If a ValueError is encountered, handle it by assigning unique domains
80
- n_rows = len(top_annotations)
80
+ n_rows = len(top_annotation)
81
81
  if linkage_criterion == "off":
82
- logger.warning(
83
- f"Clustering is turned off. Skipping clustering and assigning {n_rows} unique domains."
84
- )
82
+ logger.warning("Clustering is turned off. Skipping clustering.")
85
83
  else:
86
- logger.error(
87
- f"Error encountered. Skipping clustering and assigning {n_rows} unique domains."
88
- )
89
- top_annotations["domain"] = range(1, n_rows + 1) # Assign unique domains
84
+ logger.error("Error encountered. Skipping clustering.")
85
+ top_annotation["domain"] = range(1, n_rows + 1) # Assign unique domains
90
86
 
91
87
  # Create DataFrames to store domain information
92
88
  node_to_significance = pd.DataFrame(
93
89
  data=significant_neighborhoods_significance,
94
- columns=[top_annotations.index.values, top_annotations["domain"]],
90
+ columns=[top_annotation.index.values, top_annotation["domain"]],
95
91
  )
96
92
  node_to_domain = node_to_significance.T.groupby(level="domain").sum().T
97
93
 
@@ -112,15 +108,15 @@ def define_domains(
112
108
 
113
109
  def trim_domains(
114
110
  domains: pd.DataFrame,
115
- top_annotations: pd.DataFrame,
111
+ top_annotation: pd.DataFrame,
116
112
  min_cluster_size: int = 5,
117
113
  max_cluster_size: int = 1000,
118
- ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
114
+ ) -> Tuple[pd.DataFrame, pd.DataFrame]:
119
115
  """Trim domains that do not meet size criteria and find outliers.
120
116
 
121
117
  Args:
122
118
  domains (pd.DataFrame): DataFrame of domain data for the network nodes.
123
- top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
119
+ top_annotation (pd.DataFrame): DataFrame of top annotations data for the network nodes.
124
120
  min_cluster_size (int, optional): Minimum size of a cluster to be retained. Defaults to 5.
125
121
  max_cluster_size (int, optional): Maximum size of a cluster to be retained. Defaults to 1000.
126
122
 
@@ -139,21 +135,21 @@ def trim_domains(
139
135
  invalid_domain_id = 888888
140
136
  invalid_domain_ids = {0, invalid_domain_id}
141
137
  # Mark domains to be removed
142
- top_annotations["domain"] = top_annotations["domain"].replace(to_remove, invalid_domain_id)
138
+ top_annotation["domain"] = top_annotation["domain"].replace(to_remove, invalid_domain_id)
143
139
  domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
144
140
 
145
141
  # Normalize "num significant neighborhoods" by percentile for each domain and scale to 0-10
146
- top_annotations["normalized_value"] = top_annotations.groupby("domain")[
142
+ top_annotation["normalized_value"] = top_annotation.groupby("domain")[
147
143
  "significant_neighborhood_significance_sums"
148
144
  ].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
149
145
  # Modify the lambda function to pass both full_terms and significant_significance_score
150
- top_annotations["combined_terms"] = top_annotations.apply(
146
+ top_annotation["combined_terms"] = top_annotation.apply(
151
147
  lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
152
148
  )
153
149
 
154
150
  # Perform the groupby operation while retaining the other columns and adding the weighting with significance scores
155
151
  domain_labels = (
156
- top_annotations.groupby("domain")
152
+ top_annotation.groupby("domain")
157
153
  .agg(
158
154
  full_terms=("full_terms", lambda x: list(x)),
159
155
  significance_scores=("significant_significance_score", lambda x: list(x)),
@@ -233,7 +229,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
233
229
  # Initialize best overall values
234
230
  best_overall_method = linkage_method
235
231
  best_overall_metric = linkage_metric
236
- best_overall_threshold = linkage_threshold
232
+ best_overall_threshold = 0.0
237
233
  best_overall_score = -np.inf
238
234
 
239
235
  # Set linkage methods and metrics to all combinations if "auto" is selected
@@ -449,7 +449,7 @@ def _prune_neighbors(
449
449
  )
450
450
 
451
451
 
452
- def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
452
+ def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> np.floating[Any]:
453
453
  """Calculate the Euclidean distance between two nodes in the network.
454
454
 
455
455
  Args:
@@ -458,7 +458,7 @@ def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
458
458
  network (nx.Graph): The network graph containing the nodes.
459
459
 
460
460
  Returns:
461
- float: The Euclidean distance between the two nodes.
461
+ np.floating[Any]: The Euclidean distance between the two nodes.
462
462
  """
463
463
  pos1 = _get_node_position(network, node1)
464
464
  pos2 = _get_node_position(network, node2)
@@ -495,7 +495,7 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
495
495
  float: The calculated distance threshold value.
496
496
 
497
497
  Raises:
498
- ValueError: If no significant annotations are found in the median distances.
498
+ ValueError: If no significant annotation is found in the median distances.
499
499
  """
500
500
  # Sort the median distances
501
501
  sorted_distances = np.sort(median_distances)
@@ -506,7 +506,7 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
506
506
  try:
507
507
  smoothed_distances = np.interp(interpolated_percentiles, rank_percentiles, sorted_distances)
508
508
  except ValueError as e:
509
- raise ValueError("No significant annotations found.") from e
509
+ raise ValueError("No significant annotation found.") from e
510
510
 
511
511
  # Determine the index corresponding to the distance threshold
512
512
  threshold_index = int(np.ceil(distance_threshold * len(smoothed_distances))) - 1
@@ -17,7 +17,7 @@ from risk.neighborhoods.stats.permutation.test_functions import DISPATCH_TEST_FU
17
17
 
18
18
  def compute_permutation_test(
19
19
  neighborhoods: csr_matrix,
20
- annotations: csr_matrix,
20
+ annotation: csr_matrix,
21
21
  score_metric: str = "sum",
22
22
  null_distribution: str = "network",
23
23
  num_permutations: int = 1000,
@@ -28,9 +28,9 @@ def compute_permutation_test(
28
28
 
29
29
  Args:
30
30
  neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
31
- annotations (csr_matrix): Sparse binary matrix representing annotations.
31
+ annotation (csr_matrix): Sparse binary matrix representing annotation.
32
32
  score_metric (str, optional): Metric to use for scoring ('sum' or 'stdev'). Defaults to "sum".
33
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
33
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
34
34
  num_permutations (int, optional): Number of permutations to run. Defaults to 1000.
35
35
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
36
36
  max_workers (int, optional): Number of workers for multiprocessing. Defaults to 1.
@@ -41,14 +41,14 @@ def compute_permutation_test(
41
41
  # Ensure that the matrices are in the correct format and free of NaN values
42
42
  # NOTE: Keep the data type as float32 to avoid locking issues with dot product operations
43
43
  neighborhoods = neighborhoods.astype(np.float32)
44
- annotations = annotations.astype(np.float32)
44
+ annotation = annotation.astype(np.float32)
45
45
  # Retrieve the appropriate neighborhood score function based on the metric
46
46
  neighborhood_score_func = DISPATCH_TEST_FUNCTIONS[score_metric]
47
47
 
48
48
  # Run the permutation test to calculate depletion and enrichment counts
49
49
  counts_depletion, counts_enrichment = _run_permutation_test(
50
50
  neighborhoods=neighborhoods,
51
- annotations=annotations,
51
+ annotation=annotation,
52
52
  neighborhood_score_func=neighborhood_score_func,
53
53
  null_distribution=null_distribution,
54
54
  num_permutations=num_permutations,
@@ -68,7 +68,7 @@ def compute_permutation_test(
68
68
 
69
69
  def _run_permutation_test(
70
70
  neighborhoods: csr_matrix,
71
- annotations: csr_matrix,
71
+ annotation: csr_matrix,
72
72
  neighborhood_score_func: Callable,
73
73
  null_distribution: str = "network",
74
74
  num_permutations: int = 1000,
@@ -79,9 +79,9 @@ def _run_permutation_test(
79
79
 
80
80
  Args:
81
81
  neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
82
- annotations (csr_matrix): Sparse binary matrix representing annotations.
82
+ annotation (csr_matrix): Sparse binary matrix representing annotation.
83
83
  neighborhood_score_func (Callable): Function to calculate neighborhood scores.
84
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
84
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
85
85
  num_permutations (int, optional): Number of permutations. Defaults to 1000.
86
86
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
87
87
  max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
@@ -96,17 +96,17 @@ def _run_permutation_test(
96
96
  rng = np.random.default_rng(seed=random_seed)
97
97
  # Determine the indices to use based on the null distribution type
98
98
  if null_distribution == "network":
99
- idxs = range(annotations.shape[0])
100
- elif null_distribution == "annotations":
101
- idxs = np.nonzero(annotations.getnnz(axis=1) > 0)[0]
99
+ idxs = range(annotation.shape[0])
100
+ elif null_distribution == "annotation":
101
+ idxs = np.nonzero(annotation.getnnz(axis=1) > 0)[0]
102
102
  else:
103
103
  raise ValueError(
104
- "Invalid null_distribution value. Choose either 'network' or 'annotations'."
104
+ "Invalid null_distribution value. Choose either 'network' or 'annotation'."
105
105
  )
106
106
 
107
- # Replace NaNs with zeros in the sparse annotations matrix
108
- annotations.data[np.isnan(annotations.data)] = 0
109
- annotation_matrix_obsv = annotations[idxs]
107
+ # Replace NaNs with zeros in the sparse annotation matrix
108
+ annotation.data[np.isnan(annotation.data)] = 0
109
+ annotation_matrix_obsv = annotation[idxs]
110
110
  neighborhoods_matrix_obsv = neighborhoods.T[idxs].T
111
111
  # Calculate observed neighborhood scores
112
112
  with np.errstate(invalid="ignore", divide="ignore"):
@@ -142,7 +142,7 @@ def _run_permutation_test(
142
142
  params_list = [
143
143
  (
144
144
  permutation_batches[i], # Pass the batch of precomputed permutations
145
- annotations,
145
+ annotation,
146
146
  neighborhoods_matrix_obsv,
147
147
  observed_neighborhood_scores,
148
148
  neighborhood_score_func,
@@ -185,7 +185,7 @@ def _permutation_process_batch(
185
185
 
186
186
  Args:
187
187
  permutations (Union[List, Tuple, np.ndarray]): Permutation batch to process.
188
- annotation_matrix (csr_matrix): Sparse binary matrix representing annotations.
188
+ annotation_matrix (csr_matrix): Sparse binary matrix representing annotation.
189
189
  neighborhoods_matrix_obsv (csr_matrix): Sparse binary matrix representing observed neighborhoods.
190
190
  observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
191
191
  neighborhood_score_func (Callable): Function to calculate neighborhood scores.
@@ -24,7 +24,7 @@ def compute_neighborhood_score_by_sum(
24
24
  Returns:
25
25
  np.ndarray: Dense array of summed attribute values for each neighborhood.
26
26
  """
27
- # Calculate the neighborhood score as the dot product of neighborhoods and annotations
27
+ # Calculate the neighborhood score as the dot product of neighborhoods and annotation
28
28
  neighborhood_score = neighborhoods_matrix @ annotation_matrix # Sparse matrix multiplication
29
29
  # Convert the result to a dense array for downstream calculations
30
30
  neighborhood_score_dense = neighborhood_score.toarray()
@@ -43,7 +43,7 @@ def compute_neighborhood_score_by_stdev(
43
43
  Returns:
44
44
  np.ndarray: Standard deviation of the neighborhood scores.
45
45
  """
46
- # Calculate the neighborhood score as the dot product of neighborhoods and annotations
46
+ # Calculate the neighborhood score as the dot product of neighborhoods and annotation
47
47
  neighborhood_score = neighborhoods_matrix @ annotation_matrix # Sparse matrix multiplication
48
48
  # Calculate the number of elements in each neighborhood (sum of rows)
49
49
  N = neighborhoods_matrix.sum(axis=1).A.flatten() # Convert to 1D array