risk-network 0.0.12b0__py3-none-any.whl → 0.0.12b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. risk/__init__.py +1 -1
  2. risk/annotations/__init__.py +10 -0
  3. risk/annotations/annotations.py +354 -0
  4. risk/annotations/io.py +241 -0
  5. risk/annotations/nltk_setup.py +86 -0
  6. risk/log/__init__.py +11 -0
  7. risk/log/console.py +141 -0
  8. risk/log/parameters.py +171 -0
  9. risk/neighborhoods/__init__.py +7 -0
  10. risk/neighborhoods/api.py +442 -0
  11. risk/neighborhoods/community.py +441 -0
  12. risk/neighborhoods/domains.py +360 -0
  13. risk/neighborhoods/neighborhoods.py +514 -0
  14. risk/neighborhoods/stats/__init__.py +13 -0
  15. risk/neighborhoods/stats/permutation/__init__.py +6 -0
  16. risk/neighborhoods/stats/permutation/permutation.py +240 -0
  17. risk/neighborhoods/stats/permutation/test_functions.py +70 -0
  18. risk/neighborhoods/stats/tests.py +275 -0
  19. risk/network/__init__.py +4 -0
  20. risk/network/graph/__init__.py +4 -0
  21. risk/network/graph/api.py +200 -0
  22. risk/network/graph/graph.py +274 -0
  23. risk/network/graph/stats.py +166 -0
  24. risk/network/graph/summary.py +253 -0
  25. risk/network/io.py +693 -0
  26. risk/network/plotter/__init__.py +4 -0
  27. risk/network/plotter/api.py +54 -0
  28. risk/network/plotter/canvas.py +291 -0
  29. risk/network/plotter/contour.py +329 -0
  30. risk/network/plotter/labels.py +935 -0
  31. risk/network/plotter/network.py +294 -0
  32. risk/network/plotter/plotter.py +141 -0
  33. risk/network/plotter/utils/colors.py +419 -0
  34. risk/network/plotter/utils/layout.py +94 -0
  35. risk_network-0.0.12b2.dist-info/METADATA +122 -0
  36. risk_network-0.0.12b2.dist-info/RECORD +40 -0
  37. {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b2.dist-info}/WHEEL +1 -1
  38. risk_network-0.0.12b0.dist-info/METADATA +0 -796
  39. risk_network-0.0.12b0.dist-info/RECORD +0 -7
  40. {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b2.dist-info}/licenses/LICENSE +0 -0
  41. {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,514 @@
1
+ """
2
+ risk/neighborhoods/neighborhoods
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ import random
7
+ import warnings
8
+ from typing import Any, Dict, List, Tuple, Union
9
+
10
+ import networkx as nx
11
+ import numpy as np
12
+ from scipy.sparse import csr_matrix
13
+ from sklearn.exceptions import DataConversionWarning
14
+ from sklearn.metrics.pairwise import cosine_similarity
15
+
16
+ from risk.log import logger
17
+ from risk.neighborhoods.community import (
18
+ calculate_greedy_modularity_neighborhoods,
19
+ calculate_label_propagation_neighborhoods,
20
+ calculate_leiden_neighborhoods,
21
+ calculate_louvain_neighborhoods,
22
+ calculate_markov_clustering_neighborhoods,
23
+ calculate_spinglass_neighborhoods,
24
+ calculate_walktrap_neighborhoods,
25
+ )
26
+
27
+ # Suppress DataConversionWarning
28
+ warnings.filterwarnings(action="ignore", category=DataConversionWarning)
29
+
30
+
31
+ def get_network_neighborhoods(
32
+ network: nx.Graph,
33
+ distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
34
+ fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 1.0,
35
+ louvain_resolution: float = 0.1,
36
+ leiden_resolution: float = 1.0,
37
+ random_seed: int = 888,
38
+ ) -> csr_matrix:
39
+ """Calculate the combined neighborhoods for each node using sparse matrices.
40
+
41
+ Args:
42
+ network (nx.Graph): The network graph.
43
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use.
44
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction thresholds.
45
+ louvain_resolution (float, optional): Resolution parameter for the Louvain method.
46
+ leiden_resolution (float, optional): Resolution parameter for the Leiden method.
47
+ random_seed (int, optional): Random seed for methods requiring random initialization.
48
+
49
+ Returns:
50
+ csr_matrix: The combined neighborhood matrix.
51
+
52
+ Raises:
53
+ ValueError: If the number of distance metrics does not match the number of edge length thresholds.
54
+ """
55
+ # Set random seed for reproducibility
56
+ random.seed(random_seed)
57
+ np.random.seed(random_seed)
58
+
59
+ # Ensure distance_metric is a list for multi-algorithm handling
60
+ if isinstance(distance_metric, (str, np.ndarray)):
61
+ distance_metric = [distance_metric]
62
+ # Ensure fraction_shortest_edges is a list for multi-threshold handling
63
+ if isinstance(fraction_shortest_edges, (float, int)):
64
+ fraction_shortest_edges = [fraction_shortest_edges] * len(distance_metric)
65
+ # Validate matching lengths of distance metrics and thresholds
66
+ if len(distance_metric) != len(fraction_shortest_edges):
67
+ raise ValueError(
68
+ "The number of distance metrics must match the number of edge length thresholds."
69
+ )
70
+
71
+ # Initialize a sparse LIL matrix for incremental updates
72
+ num_nodes = network.number_of_nodes()
73
+ # Initialize a sparse matrix with the same shape as the network
74
+ combined_neighborhoods = csr_matrix((num_nodes, num_nodes), dtype=np.uint8)
75
+ # Loop through each distance metric and corresponding edge rank fraction
76
+ for metric, percentile in zip(distance_metric, fraction_shortest_edges):
77
+ # Compute neighborhoods for the specified metric
78
+ if metric == "greedy_modularity":
79
+ neighborhoods = calculate_greedy_modularity_neighborhoods(
80
+ network, fraction_shortest_edges=percentile
81
+ )
82
+ elif metric == "label_propagation":
83
+ neighborhoods = calculate_label_propagation_neighborhoods(
84
+ network, fraction_shortest_edges=percentile
85
+ )
86
+ elif metric == "leiden":
87
+ neighborhoods = calculate_leiden_neighborhoods(
88
+ network,
89
+ resolution=leiden_resolution,
90
+ fraction_shortest_edges=percentile,
91
+ random_seed=random_seed,
92
+ )
93
+ elif metric == "louvain":
94
+ neighborhoods = calculate_louvain_neighborhoods(
95
+ network,
96
+ resolution=louvain_resolution,
97
+ fraction_shortest_edges=percentile,
98
+ random_seed=random_seed,
99
+ )
100
+ elif metric == "markov_clustering":
101
+ neighborhoods = calculate_markov_clustering_neighborhoods(
102
+ network, fraction_shortest_edges=percentile
103
+ )
104
+ elif metric == "spinglass":
105
+ neighborhoods = calculate_spinglass_neighborhoods(
106
+ network, fraction_shortest_edges=percentile
107
+ )
108
+ elif metric == "walktrap":
109
+ neighborhoods = calculate_walktrap_neighborhoods(
110
+ network, fraction_shortest_edges=percentile
111
+ )
112
+ else:
113
+ raise ValueError(
114
+ "Invalid distance metric. Choose from: 'greedy_modularity', 'label_propagation',"
115
+ "'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
116
+ )
117
+
118
+ # Add the sparse neighborhood matrix
119
+ combined_neighborhoods += neighborhoods
120
+
121
+ # Ensure maximum value in each row is set to 1
122
+ combined_neighborhoods = _set_max_row_value_to_one_sparse(combined_neighborhoods)
123
+
124
+ return combined_neighborhoods
125
+
126
+
127
+ def _set_max_row_value_to_one_sparse(matrix: csr_matrix) -> csr_matrix:
128
+ """Set the maximum value in each row of a sparse matrix to 1.
129
+
130
+ Args:
131
+ matrix (csr_matrix): The input sparse matrix.
132
+
133
+ Returns:
134
+ csr_matrix: The modified sparse matrix where only the maximum value in each row is set to 1.
135
+ """
136
+ # Iterate over each row and set the maximum value to 1
137
+ for i in range(matrix.shape[0]):
138
+ row_data = matrix[i].data
139
+ if len(row_data) > 0:
140
+ row_data[:] = (row_data == max(row_data)).astype(int)
141
+
142
+ return matrix
143
+
144
+
145
+ def _set_max_row_value_to_one(matrix: np.ndarray) -> np.ndarray:
146
+ """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0. This is particularly
147
+ useful for neighborhood matrices that have undergone multiple neighborhood detection algorithms, where the
148
+ maximum value in each row represents the most significant relationship per node in the combined neighborhoods.
149
+
150
+ Args:
151
+ matrix (np.ndarray): A 2D numpy array representing the neighborhood matrix.
152
+
153
+ Returns:
154
+ np.ndarray: The modified matrix where only the maximum value(s) in each row is set to 1, and others are set to 0.
155
+ """
156
+ # Find the maximum value in each row (column-wise max operation)
157
+ max_values = np.max(matrix, axis=1, keepdims=True)
158
+ # Create a boolean mask where elements are True if they are the max value in their row
159
+ max_mask = matrix == max_values
160
+ # Set all elements to 0, and then set the maximum value positions to 1
161
+ matrix[:] = 0 # Set everything to 0
162
+ matrix[max_mask] = 1 # Set only the max values to 1
163
+ return matrix
164
+
165
+
166
+ def process_neighborhoods(
167
+ network: nx.Graph,
168
+ neighborhoods: Dict[str, Any],
169
+ impute_depth: int = 0,
170
+ prune_threshold: float = 0.0,
171
+ ) -> Dict[str, Any]:
172
+ """Process neighborhoods based on the imputation and pruning settings.
173
+
174
+ Args:
175
+ network (nx.Graph): The network data structure used for imputing and pruning neighbors.
176
+ neighborhoods (Dict[str, Any]): Dictionary containing 'significance_matrix', 'significant_binary_significance_matrix', and 'significant_significance_matrix'.
177
+ impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
178
+ prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
179
+
180
+ Returns:
181
+ Dict[str, Any]: Processed neighborhoods data, including the updated matrices and significance counts.
182
+ """
183
+ significance_matrix = neighborhoods["significance_matrix"]
184
+ significant_binary_significance_matrix = neighborhoods["significant_binary_significance_matrix"]
185
+ significant_significance_matrix = neighborhoods["significant_significance_matrix"]
186
+ logger.debug(f"Imputation depth: {impute_depth}")
187
+ if impute_depth:
188
+ (
189
+ significance_matrix,
190
+ significant_binary_significance_matrix,
191
+ significant_significance_matrix,
192
+ ) = _impute_neighbors(
193
+ network,
194
+ significance_matrix,
195
+ significant_binary_significance_matrix,
196
+ max_depth=impute_depth,
197
+ )
198
+
199
+ logger.debug(f"Pruning threshold: {prune_threshold}")
200
+ if prune_threshold:
201
+ (
202
+ significance_matrix,
203
+ significant_binary_significance_matrix,
204
+ significant_significance_matrix,
205
+ ) = _prune_neighbors(
206
+ network,
207
+ significance_matrix,
208
+ significant_binary_significance_matrix,
209
+ distance_threshold=prune_threshold,
210
+ )
211
+
212
+ neighborhood_significance_counts = np.sum(significant_binary_significance_matrix, axis=0)
213
+ node_significance_sums = np.sum(significance_matrix, axis=1)
214
+ return {
215
+ "significance_matrix": significance_matrix,
216
+ "significant_binary_significance_matrix": significant_binary_significance_matrix,
217
+ "significant_significance_matrix": significant_significance_matrix,
218
+ "neighborhood_significance_counts": neighborhood_significance_counts,
219
+ "node_significance_sums": node_significance_sums,
220
+ }
221
+
222
+
223
+ def _impute_neighbors(
224
+ network: nx.Graph,
225
+ significance_matrix: np.ndarray,
226
+ significant_binary_significance_matrix: np.ndarray,
227
+ max_depth: int = 3,
228
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
229
+ """Impute rows with sums of zero in the significance matrix based on the closest non-zero neighbors in the network graph.
230
+
231
+ Args:
232
+ network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
233
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
234
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
235
+ max_depth (int): Maximum depth of nodes to traverse for imputing values.
236
+
237
+ Returns:
238
+ Tuple[np.ndarray, np.ndarray, np.ndarray]:
239
+ - np.ndarray: The imputed significance matrix.
240
+ - np.ndarray: The imputed alpha threshold matrix.
241
+ - np.ndarray: The significant significance matrix with non-significant entries set to zero.
242
+ """
243
+ # Calculate the distance threshold value based on the shortest distances
244
+ significance_matrix, significant_binary_significance_matrix = _impute_neighbors_with_similarity(
245
+ network, significance_matrix, significant_binary_significance_matrix, max_depth=max_depth
246
+ )
247
+ # Create a matrix where non-significant entries are set to zero
248
+ significant_significance_matrix = np.where(
249
+ significant_binary_significance_matrix == 1, significance_matrix, 0
250
+ )
251
+
252
+ return (
253
+ significance_matrix,
254
+ significant_binary_significance_matrix,
255
+ significant_significance_matrix,
256
+ )
257
+
258
+
259
+ def _impute_neighbors_with_similarity(
260
+ network: nx.Graph,
261
+ significance_matrix: np.ndarray,
262
+ significant_binary_significance_matrix: np.ndarray,
263
+ max_depth: int = 3,
264
+ ) -> Tuple[np.ndarray, np.ndarray]:
265
+ """Impute non-significant nodes based on the closest significant neighbors' profiles and their similarity.
266
+
267
+ Args:
268
+ network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
269
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
270
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
271
+ max_depth (int): Maximum depth of nodes to traverse for imputing values.
272
+
273
+ Returns:
274
+ Tuple[np.ndarray, np.ndarray]:
275
+ - The imputed significance matrix.
276
+ - The imputed alpha threshold matrix.
277
+ """
278
+ depth = 1
279
+ rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
280
+ while len(rows_to_impute) and depth <= max_depth:
281
+ # Iterate over all significant nodes
282
+ for row_index in range(significant_binary_significance_matrix.shape[0]):
283
+ if significant_binary_significance_matrix[row_index].sum() != 0:
284
+ (
285
+ significance_matrix,
286
+ significant_binary_significance_matrix,
287
+ ) = _process_node_imputation(
288
+ row_index,
289
+ network,
290
+ significance_matrix,
291
+ significant_binary_significance_matrix,
292
+ depth,
293
+ )
294
+
295
+ # Update rows to impute for the next iteration
296
+ rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
297
+ depth += 1
298
+
299
+ return significance_matrix, significant_binary_significance_matrix
300
+
301
+
302
+ def _process_node_imputation(
303
+ row_index: int,
304
+ network: nx.Graph,
305
+ significance_matrix: np.ndarray,
306
+ significant_binary_significance_matrix: np.ndarray,
307
+ depth: int,
308
+ ) -> Tuple[np.ndarray, np.ndarray]:
309
+ """Process the imputation for a single node based on its significant neighbors.
310
+
311
+ Args:
312
+ row_index (int): The index of the significant node being processed.
313
+ network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
314
+ significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
315
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
316
+ depth (int): Current depth for traversal.
317
+
318
+ Returns:
319
+ Tuple[np.ndarray, np.ndarray]: The modified significance matrix and binary threshold matrix.
320
+ """
321
+ # Check neighbors at the current depth
322
+ neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
323
+ # Filter annotated neighbors (already significant)
324
+ annotated_neighbors = [
325
+ n
326
+ for n in neighbors
327
+ if n != row_index
328
+ and significant_binary_significance_matrix[n].sum() != 0
329
+ and significance_matrix[n].sum() != 0
330
+ ]
331
+ # Filter non-significant neighbors
332
+ valid_neighbors = [
333
+ n
334
+ for n in neighbors
335
+ if n != row_index
336
+ and significant_binary_significance_matrix[n].sum() == 0
337
+ and significance_matrix[n].sum() == 0
338
+ ]
339
+ # If there are valid non-significant neighbors
340
+ if valid_neighbors and annotated_neighbors:
341
+ # Calculate distances to annotated neighbors
342
+ distances_to_annotated = [
343
+ _get_euclidean_distance(row_index, n, network) for n in annotated_neighbors
344
+ ]
345
+ # Calculate the IQR to identify outliers
346
+ q1, q3 = np.percentile(distances_to_annotated, [25, 75])
347
+ iqr = q3 - q1
348
+ lower_bound = q1 - 1.5 * iqr
349
+ upper_bound = q3 + 1.5 * iqr
350
+ # Filter valid non-significant neighbors that fall within the IQR bounds
351
+ valid_neighbors_within_iqr = [
352
+ n
353
+ for n in valid_neighbors
354
+ if lower_bound <= _get_euclidean_distance(row_index, n, network) <= upper_bound
355
+ ]
356
+ # If there are any valid neighbors within the IQR
357
+ if valid_neighbors_within_iqr:
358
+ # If more than one valid neighbor is within the IQR, compute pairwise cosine similarities
359
+ if len(valid_neighbors_within_iqr) > 1:
360
+ # Find the most similar neighbor based on pairwise cosine similarities
361
+ def sum_pairwise_cosine_similarities(neighbor):
362
+ return sum(
363
+ cosine_similarity(
364
+ significance_matrix[neighbor].reshape(1, -1),
365
+ significance_matrix[other_neighbor].reshape(1, -1),
366
+ )[0][0]
367
+ for other_neighbor in valid_neighbors_within_iqr
368
+ if other_neighbor != neighbor
369
+ )
370
+
371
+ most_similar_neighbor = max(
372
+ valid_neighbors_within_iqr, key=sum_pairwise_cosine_similarities
373
+ )
374
+ else:
375
+ most_similar_neighbor = valid_neighbors_within_iqr[0]
376
+
377
+ # Impute the most similar non-significant neighbor with the significant node's data, scaled by depth
378
+ significance_matrix[most_similar_neighbor] = significance_matrix[row_index] / np.sqrt(
379
+ depth + 1
380
+ )
381
+ significant_binary_significance_matrix[most_similar_neighbor] = (
382
+ significant_binary_significance_matrix[row_index]
383
+ )
384
+
385
+ return significance_matrix, significant_binary_significance_matrix
386
+
387
+
388
+ def _prune_neighbors(
389
+ network: nx.Graph,
390
+ significance_matrix: np.ndarray,
391
+ significant_binary_significance_matrix: np.ndarray,
392
+ distance_threshold: float = 0.9,
393
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
394
+ """Remove outliers based on their rank for edge lengths.
395
+
396
+ Args:
397
+ network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
398
+ significance_matrix (np.ndarray): The significance matrix.
399
+ significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix.
400
+ distance_threshold (float): Rank threshold (0 to 1) to determine outliers.
401
+
402
+ Returns:
403
+ Tuple[np.ndarray, np.ndarray, np.ndarray]:
404
+ - np.ndarray: The updated significance matrix with outliers set to zero.
405
+ - np.ndarray: The updated alpha threshold matrix with outliers set to zero.
406
+ - np.ndarray: The significant significance matrix, where non-significant entries are set to zero.
407
+ """
408
+ # Identify indices with non-zero rows in the binary significance matrix
409
+ non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
410
+ median_distances = []
411
+ for node in non_zero_indices:
412
+ neighbors = [
413
+ n
414
+ for n in network.neighbors(node)
415
+ if significant_binary_significance_matrix[n].sum() != 0
416
+ ]
417
+ if neighbors:
418
+ median_distance = np.median(
419
+ [_get_euclidean_distance(node, n, network) for n in neighbors]
420
+ )
421
+ median_distances.append(median_distance)
422
+
423
+ # Calculate the distance threshold value based on rank
424
+ distance_threshold_value = _calculate_threshold(median_distances, 1 - distance_threshold)
425
+ # Prune nodes that are outliers based on the distance threshold
426
+ for row_index in non_zero_indices:
427
+ neighbors = [
428
+ n
429
+ for n in network.neighbors(row_index)
430
+ if significant_binary_significance_matrix[n].sum() != 0
431
+ ]
432
+ if neighbors:
433
+ median_distance = np.median(
434
+ [_get_euclidean_distance(row_index, n, network) for n in neighbors]
435
+ )
436
+ if median_distance >= distance_threshold_value:
437
+ significance_matrix[row_index] = 0
438
+ significant_binary_significance_matrix[row_index] = 0
439
+
440
+ # Create a matrix where non-significant entries are set to zero
441
+ significant_significance_matrix = np.where(
442
+ significant_binary_significance_matrix == 1, significance_matrix, 0
443
+ )
444
+
445
+ return (
446
+ significance_matrix,
447
+ significant_binary_significance_matrix,
448
+ significant_significance_matrix,
449
+ )
450
+
451
+
452
+ def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
453
+ """Calculate the Euclidean distance between two nodes in the network.
454
+
455
+ Args:
456
+ node1 (Any): The first node.
457
+ node2 (Any): The second node.
458
+ network (nx.Graph): The network graph containing the nodes.
459
+
460
+ Returns:
461
+ float: The Euclidean distance between the two nodes.
462
+ """
463
+ pos1 = _get_node_position(network, node1)
464
+ pos2 = _get_node_position(network, node2)
465
+ return np.linalg.norm(pos1 - pos2)
466
+
467
+
468
+ def _get_node_position(network: nx.Graph, node: Any) -> np.ndarray:
469
+ """Retrieve the position of a node in the network as a numpy array.
470
+
471
+ Args:
472
+ network (nx.Graph): The network graph containing node positions.
473
+ node (Any): The node for which the position is being retrieved.
474
+
475
+ Returns:
476
+ np.ndarray: A numpy array representing the position of the node in the format [x, y, z].
477
+ """
478
+ return np.array(
479
+ [
480
+ network.nodes[node].get(coord, 0)
481
+ for coord in ["x", "y", "z"]
482
+ if coord in network.nodes[node]
483
+ ]
484
+ )
485
+
486
+
487
+ def _calculate_threshold(median_distances: List, distance_threshold: float) -> float:
488
+ """Calculate the distance threshold based on the given median distances and a percentile threshold.
489
+
490
+ Args:
491
+ median_distances (List): An array of median distances.
492
+ distance_threshold (float): A percentile threshold (0 to 1) used to determine the distance cutoff.
493
+
494
+ Returns:
495
+ float: The calculated distance threshold value.
496
+
497
+ Raises:
498
+ ValueError: If no significant annotations are found in the median distances.
499
+ """
500
+ # Sort the median distances
501
+ sorted_distances = np.sort(median_distances)
502
+ # Compute the rank fractions for the sorted distances
503
+ rank_percentiles = np.linspace(0, 1, len(sorted_distances))
504
+ # Interpolating the ranks to 1000 evenly spaced percentiles
505
+ interpolated_percentiles = np.linspace(0, 1, 1000)
506
+ try:
507
+ smoothed_distances = np.interp(interpolated_percentiles, rank_percentiles, sorted_distances)
508
+ except ValueError as e:
509
+ raise ValueError("No significant annotations found.") from e
510
+
511
+ # Determine the index corresponding to the distance threshold
512
+ threshold_index = int(np.ceil(distance_threshold * len(smoothed_distances))) - 1
513
+ # Return the smoothed distance at the calculated index
514
+ return smoothed_distances[threshold_index]
@@ -0,0 +1,13 @@
1
+ """
2
+ risk/neighborhoods/stats
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from risk.neighborhoods.stats.permutation import compute_permutation_test
7
+ from risk.neighborhoods.stats.tests import (
8
+ compute_binom_test,
9
+ compute_chi2_test,
10
+ compute_hypergeom_test,
11
+ compute_poisson_test,
12
+ compute_zscore_test,
13
+ )
@@ -0,0 +1,6 @@
1
+ """
2
+ risk/neighborhoods/stats/permutation
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from risk.neighborhoods.stats.permutation.permutation import compute_permutation_test