risk-network 0.0.6b9__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ risk/neighborhoods/domains
4
4
  """
5
5
 
6
6
  from contextlib import suppress
7
+ from itertools import product
7
8
  from tqdm import tqdm
8
9
  from typing import Tuple
9
10
 
@@ -14,6 +15,7 @@ from sklearn.metrics import silhouette_score
14
15
 
15
16
  from risk.annotations import get_description
16
17
  from risk.constants import GROUP_LINKAGE_METHODS, GROUP_DISTANCE_METRICS
18
+ from risk.log import logger
17
19
 
18
20
 
19
21
  def define_domains(
@@ -23,7 +25,8 @@ def define_domains(
23
25
  linkage_method: str,
24
26
  linkage_metric: str,
25
27
  ) -> pd.DataFrame:
26
- """Define domains and assign nodes to these domains based on their enrichment scores and clustering.
28
+ """Define domains and assign nodes to these domains based on their enrichment scores and clustering,
29
+ handling errors by assigning unique domains when clustering fails.
27
30
 
28
31
  Args:
29
32
  top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
@@ -35,31 +38,31 @@ def define_domains(
35
38
  Returns:
36
39
  pd.DataFrame: DataFrame with the primary domain for each node.
37
40
  """
38
- # Check if there's more than one column in significant_neighborhoods_enrichment
39
- if significant_neighborhoods_enrichment.shape[1] == 1:
40
- print("Single annotation detected. Skipping clustering.")
41
- top_annotations["domain"] = 1 # Assign a default domain or handle appropriately
42
- else:
43
- # Perform hierarchical clustering on the binary enrichment matrix
41
+ try:
42
+ # Transpose the matrix to cluster annotations
44
43
  m = significant_neighborhoods_enrichment[:, top_annotations["top attributes"]].T
45
44
  best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
46
45
  m, linkage_criterion, linkage_method, linkage_metric
47
46
  )
48
- try:
49
- Z = linkage(m, method=best_linkage, metric=best_metric)
50
- except ValueError as e:
51
- raise ValueError("No significant annotations found.") from e
52
-
53
- print(
47
+ # Perform hierarchical clustering
48
+ Z = linkage(m, method=best_linkage, metric=best_metric)
49
+ logger.warning(
54
50
  f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'"
55
51
  )
56
- print(f"Optimal linkage threshold: {round(best_threshold, 3)}")
57
-
52
+ logger.debug(f"Optimal linkage threshold: {round(best_threshold, 3)}")
53
+ # Calculate the optimal threshold for clustering
58
54
  max_d_optimal = np.max(Z[:, 2]) * best_threshold
59
- domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
60
55
  # Assign domains to the annotations matrix
56
+ domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
61
57
  top_annotations["domain"] = 0
62
58
  top_annotations.loc[top_annotations["top attributes"], "domain"] = domains
59
+ except ValueError:
60
+ # If a ValueError is encountered, handle it by assigning unique domains
61
+ n_rows = len(top_annotations)
62
+ logger.error(
63
+ f"Error encountered. Skipping clustering and assigning {n_rows} unique domains."
64
+ )
65
+ top_annotations["domain"] = range(1, n_rows + 1) # Assign unique domains
63
66
 
64
67
  # Create DataFrames to store domain information
65
68
  node_to_enrichment = pd.DataFrame(
@@ -166,21 +169,20 @@ def _optimize_silhouette_across_linkage_and_metrics(
166
169
  total_combinations = len(linkage_methods) * len(linkage_metrics)
167
170
 
168
171
  # Evaluating optimal linkage method and metric
169
- for method in tqdm(
170
- linkage_methods,
172
+ for method, metric in tqdm(
173
+ product(linkage_methods, linkage_metrics),
171
174
  desc="Evaluating optimal linkage method and metric",
172
175
  total=total_combinations,
173
176
  bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
174
177
  ):
175
- for metric in linkage_metrics:
176
- with suppress(Exception):
177
- Z = linkage(m, method=method, metric=metric)
178
- threshold, score = _find_best_silhouette_score(Z, m, metric, linkage_criterion)
179
- if score > best_overall_score:
180
- best_overall_score = score
181
- best_overall_threshold = threshold
182
- best_overall_method = method
183
- best_overall_metric = metric
178
+ with suppress(Exception):
179
+ Z = linkage(m, method=method, metric=metric)
180
+ threshold, score = _find_best_silhouette_score(Z, m, metric, linkage_criterion)
181
+ if score > best_overall_score:
182
+ best_overall_score = score
183
+ best_overall_threshold = threshold
184
+ best_overall_method = method
185
+ best_overall_metric = metric
184
186
 
185
187
  return best_overall_method, best_overall_metric, best_overall_threshold
186
188
 
@@ -3,21 +3,24 @@ risk/neighborhoods/neighborhoods
3
3
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
+ import random
6
7
  import warnings
7
8
  from typing import Any, Dict, List, Tuple
8
9
 
9
10
  import networkx as nx
10
11
  import numpy as np
11
12
  from sklearn.exceptions import DataConversionWarning
13
+ from sklearn.metrics.pairwise import cosine_similarity
12
14
 
13
15
  from risk.neighborhoods.community import (
14
- calculate_dijkstra_neighborhoods,
16
+ calculate_greedy_modularity_neighborhoods,
15
17
  calculate_label_propagation_neighborhoods,
16
18
  calculate_louvain_neighborhoods,
17
19
  calculate_markov_clustering_neighborhoods,
18
20
  calculate_spinglass_neighborhoods,
19
21
  calculate_walktrap_neighborhoods,
20
22
  )
23
+ from risk.log import logger
21
24
 
22
25
  # Suppress DataConversionWarning
23
26
  warnings.filterwarnings(action="ignore", category=DataConversionWarning)
@@ -25,7 +28,7 @@ warnings.filterwarnings(action="ignore", category=DataConversionWarning)
25
28
 
26
29
  def get_network_neighborhoods(
27
30
  network: nx.Graph,
28
- distance_metric: str = "dijkstra",
31
+ distance_metric: str = "louvain",
29
32
  edge_length_threshold: float = 1.0,
30
33
  louvain_resolution: float = 1.0,
31
34
  random_seed: int = 888,
@@ -34,8 +37,8 @@ def get_network_neighborhoods(
34
37
 
35
38
  Args:
36
39
  network (nx.Graph): The network graph.
37
- distance_metric (str): The distance metric to use ('euclidean', 'dijkstra', 'louvain', 'affinity_propagation',
38
- 'label_propagation', 'markov_clustering', 'walktrap', 'spinglass').
40
+ distance_metric (str): The distance metric to use ('greedy_modularity', 'louvain', 'label_propagation',
41
+ 'markov_clustering', 'walktrap', 'spinglass').
39
42
  edge_length_threshold (float): The edge length threshold for the neighborhoods.
40
43
  louvain_resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 1.0.
41
44
  random_seed (int, optional): Random seed for methods requiring random initialization. Defaults to 888.
@@ -43,12 +46,19 @@ def get_network_neighborhoods(
43
46
  Returns:
44
47
  np.ndarray: Neighborhood matrix calculated based on the selected distance metric.
45
48
  """
46
- network = _create_percentile_limited_subgraph(network, edge_length_threshold)
49
+ # Set random seed for reproducibility in all methods besides Louvain, which requires a separate seed
50
+ random.seed(random_seed)
51
+ np.random.seed(random_seed)
52
+
53
+ # Create a subgraph based on the edge length percentile threshold
54
+ network = _create_percentile_limited_subgraph(
55
+ network, edge_length_percentile=edge_length_threshold
56
+ )
47
57
 
48
- if distance_metric == "dijkstra":
49
- return calculate_dijkstra_neighborhoods(network)
50
58
  if distance_metric == "louvain":
51
59
  return calculate_louvain_neighborhoods(network, louvain_resolution, random_seed=random_seed)
60
+ if distance_metric == "greedy_modularity":
61
+ return calculate_greedy_modularity_neighborhoods(network)
52
62
  if distance_metric == "label_propagation":
53
63
  return calculate_label_propagation_neighborhoods(network)
54
64
  if distance_metric == "markov_clustering":
@@ -59,41 +69,51 @@ def get_network_neighborhoods(
59
69
  return calculate_spinglass_neighborhoods(network)
60
70
 
61
71
  raise ValueError(
62
- "Incorrect distance metric specified. Please choose from 'dijkstra', 'louvain',"
72
+ "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'louvain',"
63
73
  "'label_propagation', 'markov_clustering', 'walktrap', 'spinglass'."
64
74
  )
65
75
 
66
76
 
67
77
  def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: float) -> nx.Graph:
68
- """Calculate the edge length corresponding to the given percentile of edge lengths in the graph
69
- and create a subgraph with all nodes and edges below this length.
78
+ """Create a subgraph containing all nodes and edges where the edge length is below the
79
+ specified percentile of all edge lengths in the input graph.
70
80
 
71
81
  Args:
72
- G (nx.Graph): The input graph.
73
- edge_length_percentile (float): The percentile to calculate (between 0 and 1).
82
+ G (nx.Graph): The input graph with 'length' attributes on edges.
83
+ edge_length_percentile (float): The percentile (between 0 and 1) to filter edges by length.
74
84
 
75
85
  Returns:
76
- nx.Graph: A subgraph with all nodes and edges below the edge length corresponding to the given percentile.
86
+ nx.Graph: A subgraph with all nodes and edges where the edge length is below the
87
+ calculated threshold length.
77
88
  """
78
- # Extract edge lengths from the graph
89
+ # Extract edge lengths and handle missing lengths
79
90
  edge_lengths = [d["length"] for _, _, d in G.edges(data=True) if "length" in d]
91
+ if not edge_lengths:
92
+ raise ValueError(
93
+ "No edge lengths found in the graph. Ensure edges have 'length' attributes."
94
+ )
95
+
80
96
  # Calculate the specific edge length for the given percentile
81
97
  percentile_length = np.percentile(edge_lengths, edge_length_percentile * 100)
82
- # Create a new graph with all nodes from the original graph
98
+ # Create the subgraph by directly filtering edges during iteration
83
99
  subgraph = nx.Graph()
84
- subgraph.add_nodes_from(G.nodes(data=True))
85
- # Add edges to the subgraph if they are below the specified percentile length
100
+ subgraph.add_nodes_from(G.nodes(data=True)) # Retain all nodes from the original graph
101
+ # Add edges below the specified percentile length in a single pass
86
102
  for u, v, d in G.edges(data=True):
87
103
  if d.get("length", 1) <= percentile_length:
88
104
  subgraph.add_edge(u, v, **d)
89
105
 
106
+ # Return the subgraph; optionally check if it's too sparse
107
+ if subgraph.number_of_edges() == 0:
108
+ raise Warning("The resulting subgraph has no edges. Consider adjusting the percentile.")
109
+
90
110
  return subgraph
91
111
 
92
112
 
93
113
  def process_neighborhoods(
94
114
  network: nx.Graph,
95
115
  neighborhoods: Dict[str, Any],
96
- impute_depth: int = 1,
116
+ impute_depth: int = 0,
97
117
  prune_threshold: float = 0.0,
98
118
  ) -> Dict[str, Any]:
99
119
  """Process neighborhoods based on the imputation and pruning settings.
@@ -101,7 +121,7 @@ def process_neighborhoods(
101
121
  Args:
102
122
  network (nx.Graph): The network data structure used for imputing and pruning neighbors.
103
123
  neighborhoods (dict): Dictionary containing 'enrichment_matrix', 'binary_enrichment_matrix', and 'significant_enrichment_matrix'.
104
- impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
124
+ impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
105
125
  prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
106
126
 
107
127
  Returns:
@@ -110,7 +130,7 @@ def process_neighborhoods(
110
130
  enrichment_matrix = neighborhoods["enrichment_matrix"]
111
131
  binary_enrichment_matrix = neighborhoods["binary_enrichment_matrix"]
112
132
  significant_enrichment_matrix = neighborhoods["significant_enrichment_matrix"]
113
- print(f"Imputation depth: {impute_depth}")
133
+ logger.debug(f"Imputation depth: {impute_depth}")
114
134
  if impute_depth:
115
135
  (
116
136
  enrichment_matrix,
@@ -123,7 +143,7 @@ def process_neighborhoods(
123
143
  max_depth=impute_depth,
124
144
  )
125
145
 
126
- print(f"Pruning threshold: {prune_threshold}")
146
+ logger.debug(f"Pruning threshold: {prune_threshold}")
127
147
  if prune_threshold:
128
148
  (
129
149
  enrichment_matrix,
@@ -167,55 +187,134 @@ def _impute_neighbors(
167
187
  - np.ndarray: The imputed alpha threshold matrix.
168
188
  - np.ndarray: The significant enrichment matrix with non-significant entries set to zero.
169
189
  """
170
- # Calculate shortest distances for each node to determine the distance threshold
171
- shortest_distances = []
172
- for node in network.nodes():
173
- try:
174
- neighbors = [
175
- n for n in network.neighbors(node) if binary_enrichment_matrix[n].sum() != 0
176
- ]
177
- except IndexError as e:
178
- raise IndexError(
179
- f"Failed to find neighbors for node '{node}': Ensure that the node exists in the network and that the binary enrichment matrix is correctly indexed."
180
- ) from e
181
-
182
- # Calculate the shortest distance to a neighbor
183
- if neighbors:
184
- shortest_distance = min([_get_euclidean_distance(node, n, network) for n in neighbors])
185
- shortest_distances.append(shortest_distance)
190
+ # Calculate the distance threshold value based on the shortest distances
191
+ enrichment_matrix, binary_enrichment_matrix = _impute_neighbors_with_similarity(
192
+ network, enrichment_matrix, binary_enrichment_matrix, max_depth=max_depth
193
+ )
194
+ # Create a matrix where non-significant entries are set to zero
195
+ significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
196
+
197
+ return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
198
+
186
199
 
200
+ def _impute_neighbors_with_similarity(
201
+ network: nx.Graph,
202
+ enrichment_matrix: np.ndarray,
203
+ binary_enrichment_matrix: np.ndarray,
204
+ max_depth: int = 3,
205
+ ) -> Tuple[np.ndarray, np.ndarray]:
206
+ """Impute non-enriched nodes based on the closest enriched neighbors' profiles and their similarity.
207
+
208
+ Args:
209
+ network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
210
+ enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
211
+ binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
212
+ max_depth (int): Maximum depth of nodes to traverse for imputing values.
213
+
214
+ Returns:
215
+ Tuple[np.ndarray, np.ndarray]: A tuple containing:
216
+ - The imputed enrichment matrix.
217
+ - The imputed alpha threshold matrix.
218
+ """
187
219
  depth = 1
188
220
  rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
189
221
  while len(rows_to_impute) and depth <= max_depth:
190
- next_rows_to_impute = []
191
- for row_index in rows_to_impute:
192
- neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
193
- valid_neighbors = [
194
- n
195
- for n in neighbors
196
- if n != row_index
197
- and binary_enrichment_matrix[n].sum() != 0
198
- and enrichment_matrix[n].sum() != 0
199
- ]
200
- if valid_neighbors:
201
- closest_neighbor = min(
202
- valid_neighbors, key=lambda n: _get_euclidean_distance(row_index, n, network)
222
+ # Iterate over all enriched nodes
223
+ for row_index in range(binary_enrichment_matrix.shape[0]):
224
+ if binary_enrichment_matrix[row_index].sum() != 0:
225
+ enrichment_matrix, binary_enrichment_matrix = _process_node_imputation(
226
+ row_index, network, enrichment_matrix, binary_enrichment_matrix, depth
203
227
  )
204
- # Impute the row with the closest valid neighbor's data
205
- enrichment_matrix[row_index] = enrichment_matrix[closest_neighbor]
206
- binary_enrichment_matrix[row_index] = binary_enrichment_matrix[
207
- closest_neighbor
208
- ] / np.sqrt(depth + 1)
209
- else:
210
- next_rows_to_impute.append(row_index)
211
228
 
212
- rows_to_impute = next_rows_to_impute
229
+ # Update rows to impute for the next iteration
230
+ rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
213
231
  depth += 1
214
232
 
215
- # Create a matrix where non-significant entries are set to zero
216
- significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
233
+ return enrichment_matrix, binary_enrichment_matrix
217
234
 
218
- return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
235
+
236
+ def _process_node_imputation(
237
+ row_index: int,
238
+ network: nx.Graph,
239
+ enrichment_matrix: np.ndarray,
240
+ binary_enrichment_matrix: np.ndarray,
241
+ depth: int,
242
+ ) -> Tuple[np.ndarray, np.ndarray]:
243
+ """Process the imputation for a single node based on its enriched neighbors.
244
+
245
+ Args:
246
+ row_index (int): The index of the enriched node being processed.
247
+ network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
248
+ enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
249
+ binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
250
+ depth (int): Current depth for traversal.
251
+
252
+ Returns:
253
+ Tuple[np.ndarray, np.ndarray]: The modified enrichment matrix and binary threshold matrix.
254
+ """
255
+ # Check neighbors at the current depth
256
+ neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
257
+ # Filter annotated neighbors (already enriched)
258
+ annotated_neighbors = [
259
+ n
260
+ for n in neighbors
261
+ if n != row_index
262
+ and binary_enrichment_matrix[n].sum() != 0
263
+ and enrichment_matrix[n].sum() != 0
264
+ ]
265
+ # Filter non-enriched neighbors
266
+ valid_neighbors = [
267
+ n
268
+ for n in neighbors
269
+ if n != row_index
270
+ and binary_enrichment_matrix[n].sum() == 0
271
+ and enrichment_matrix[n].sum() == 0
272
+ ]
273
+ # If there are valid non-enriched neighbors
274
+ if valid_neighbors and annotated_neighbors:
275
+ # Calculate distances to annotated neighbors
276
+ distances_to_annotated = [
277
+ _get_euclidean_distance(row_index, n, network) for n in annotated_neighbors
278
+ ]
279
+ # Calculate the IQR to identify outliers
280
+ q1, q3 = np.percentile(distances_to_annotated, [25, 75])
281
+ iqr = q3 - q1
282
+ lower_bound = q1 - 1.5 * iqr
283
+ upper_bound = q3 + 1.5 * iqr
284
+ # Filter valid non-enriched neighbors that fall within the IQR bounds
285
+ valid_neighbors_within_iqr = [
286
+ n
287
+ for n in valid_neighbors
288
+ if lower_bound <= _get_euclidean_distance(row_index, n, network) <= upper_bound
289
+ ]
290
+ # If there are any valid neighbors within the IQR
291
+ if valid_neighbors_within_iqr:
292
+ # If more than one valid neighbor is within the IQR, compute pairwise cosine similarities
293
+ if len(valid_neighbors_within_iqr) > 1:
294
+ # Find the most similar neighbor based on pairwise cosine similarities
295
+ def sum_pairwise_cosine_similarities(neighbor):
296
+ return sum(
297
+ cosine_similarity(
298
+ enrichment_matrix[neighbor].reshape(1, -1),
299
+ enrichment_matrix[other_neighbor].reshape(1, -1),
300
+ )[0][0]
301
+ for other_neighbor in valid_neighbors_within_iqr
302
+ if other_neighbor != neighbor
303
+ )
304
+
305
+ most_similar_neighbor = max(
306
+ valid_neighbors_within_iqr, key=sum_pairwise_cosine_similarities
307
+ )
308
+ else:
309
+ most_similar_neighbor = valid_neighbors_within_iqr[0]
310
+
311
+ # Impute the most similar non-enriched neighbor with the enriched node's data, scaled by depth
312
+ enrichment_matrix[most_similar_neighbor] = enrichment_matrix[row_index] / np.sqrt(
313
+ depth + 1
314
+ )
315
+ binary_enrichment_matrix[most_similar_neighbor] = binary_enrichment_matrix[row_index]
316
+
317
+ return enrichment_matrix, binary_enrichment_matrix
219
318
 
220
319
 
221
320
  def _prune_neighbors(
@@ -240,27 +339,27 @@ def _prune_neighbors(
240
339
  """
241
340
  # Identify indices with non-zero rows in the binary enrichment matrix
242
341
  non_zero_indices = np.where(binary_enrichment_matrix.sum(axis=1) != 0)[0]
243
- average_distances = []
342
+ median_distances = []
244
343
  for node in non_zero_indices:
245
344
  neighbors = [n for n in network.neighbors(node) if binary_enrichment_matrix[n].sum() != 0]
246
345
  if neighbors:
247
- average_distance = np.mean(
346
+ median_distance = np.median(
248
347
  [_get_euclidean_distance(node, n, network) for n in neighbors]
249
348
  )
250
- average_distances.append(average_distance)
349
+ median_distances.append(median_distance)
251
350
 
252
351
  # Calculate the distance threshold value based on rank
253
- distance_threshold_value = _calculate_threshold(average_distances, 1 - distance_threshold)
352
+ distance_threshold_value = _calculate_threshold(median_distances, 1 - distance_threshold)
254
353
  # Prune nodes that are outliers based on the distance threshold
255
354
  for row_index in non_zero_indices:
256
355
  neighbors = [
257
356
  n for n in network.neighbors(row_index) if binary_enrichment_matrix[n].sum() != 0
258
357
  ]
259
358
  if neighbors:
260
- average_distance = np.mean(
359
+ median_distance = np.median(
261
360
  [_get_euclidean_distance(row_index, n, network) for n in neighbors]
262
361
  )
263
- if average_distance >= distance_threshold_value:
362
+ if median_distance >= distance_threshold_value:
264
363
  enrichment_matrix[row_index] = 0
265
364
  binary_enrichment_matrix[row_index] = 0
266
365
 
@@ -305,18 +404,18 @@ def _get_node_position(network: nx.Graph, node: Any) -> np.ndarray:
305
404
  )
306
405
 
307
406
 
308
- def _calculate_threshold(average_distances: List, distance_threshold: float) -> float:
309
- """Calculate the distance threshold based on the given average distances and a percentile threshold.
407
+ def _calculate_threshold(median_distances: List, distance_threshold: float) -> float:
408
+ """Calculate the distance threshold based on the given median distances and a percentile threshold.
310
409
 
311
410
  Args:
312
- average_distances (list): An array of average distances.
411
+ median_distances (list): An array of median distances.
313
412
  distance_threshold (float): A percentile threshold (0 to 1) used to determine the distance cutoff.
314
413
 
315
414
  Returns:
316
415
  float: The calculated distance threshold value.
317
416
  """
318
- # Sort the average distances
319
- sorted_distances = np.sort(average_distances)
417
+ # Sort the median distances
418
+ sorted_distances = np.sort(median_distances)
320
419
  # Compute the rank percentiles for the sorted distances
321
420
  rank_percentiles = np.linspace(0, 1, len(sorted_distances))
322
421
  # Interpolating the ranks to 1000 evenly spaced percentiles