risk-network 0.0.12b0__py3-none-any.whl → 0.0.12b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/__init__.py +10 -0
- risk/annotations/annotations.py +354 -0
- risk/annotations/io.py +241 -0
- risk/annotations/nltk_setup.py +86 -0
- risk/log/__init__.py +11 -0
- risk/log/console.py +141 -0
- risk/log/parameters.py +171 -0
- risk/neighborhoods/__init__.py +7 -0
- risk/neighborhoods/api.py +442 -0
- risk/neighborhoods/community.py +441 -0
- risk/neighborhoods/domains.py +360 -0
- risk/neighborhoods/neighborhoods.py +514 -0
- risk/neighborhoods/stats/__init__.py +13 -0
- risk/neighborhoods/stats/permutation/__init__.py +6 -0
- risk/neighborhoods/stats/permutation/permutation.py +240 -0
- risk/neighborhoods/stats/permutation/test_functions.py +70 -0
- risk/neighborhoods/stats/tests.py +275 -0
- risk/network/__init__.py +4 -0
- risk/network/graph/__init__.py +4 -0
- risk/network/graph/api.py +200 -0
- risk/network/graph/graph.py +268 -0
- risk/network/graph/stats.py +166 -0
- risk/network/graph/summary.py +253 -0
- risk/network/io.py +693 -0
- risk/network/plotter/__init__.py +4 -0
- risk/network/plotter/api.py +54 -0
- risk/network/plotter/canvas.py +291 -0
- risk/network/plotter/contour.py +329 -0
- risk/network/plotter/labels.py +935 -0
- risk/network/plotter/network.py +294 -0
- risk/network/plotter/plotter.py +141 -0
- risk/network/plotter/utils/colors.py +419 -0
- risk/network/plotter/utils/layout.py +94 -0
- risk_network-0.0.12b1.dist-info/METADATA +122 -0
- risk_network-0.0.12b1.dist-info/RECORD +40 -0
- {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/WHEEL +1 -1
- risk_network-0.0.12b0.dist-info/METADATA +0 -796
- risk_network-0.0.12b0.dist-info/RECORD +0 -7
- {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/licenses/LICENSE +0 -0
- {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,514 @@
|
|
1
|
+
"""
|
2
|
+
risk/neighborhoods/neighborhoods
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
import random
|
7
|
+
import warnings
|
8
|
+
from typing import Any, Dict, List, Tuple, Union
|
9
|
+
|
10
|
+
import networkx as nx
|
11
|
+
import numpy as np
|
12
|
+
from scipy.sparse import csr_matrix
|
13
|
+
from sklearn.exceptions import DataConversionWarning
|
14
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
15
|
+
|
16
|
+
from risk.log import logger
|
17
|
+
from risk.neighborhoods.community import (
|
18
|
+
calculate_greedy_modularity_neighborhoods,
|
19
|
+
calculate_label_propagation_neighborhoods,
|
20
|
+
calculate_leiden_neighborhoods,
|
21
|
+
calculate_louvain_neighborhoods,
|
22
|
+
calculate_markov_clustering_neighborhoods,
|
23
|
+
calculate_spinglass_neighborhoods,
|
24
|
+
calculate_walktrap_neighborhoods,
|
25
|
+
)
|
26
|
+
|
27
|
+
# Suppress DataConversionWarning
|
28
|
+
warnings.filterwarnings(action="ignore", category=DataConversionWarning)
|
29
|
+
|
30
|
+
|
31
|
+
def get_network_neighborhoods(
|
32
|
+
network: nx.Graph,
|
33
|
+
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
34
|
+
fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 1.0,
|
35
|
+
louvain_resolution: float = 0.1,
|
36
|
+
leiden_resolution: float = 1.0,
|
37
|
+
random_seed: int = 888,
|
38
|
+
) -> csr_matrix:
|
39
|
+
"""Calculate the combined neighborhoods for each node using sparse matrices.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
network (nx.Graph): The network graph.
|
43
|
+
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use.
|
44
|
+
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction thresholds.
|
45
|
+
louvain_resolution (float, optional): Resolution parameter for the Louvain method.
|
46
|
+
leiden_resolution (float, optional): Resolution parameter for the Leiden method.
|
47
|
+
random_seed (int, optional): Random seed for methods requiring random initialization.
|
48
|
+
|
49
|
+
Returns:
|
50
|
+
csr_matrix: The combined neighborhood matrix.
|
51
|
+
|
52
|
+
Raises:
|
53
|
+
ValueError: If the number of distance metrics does not match the number of edge length thresholds.
|
54
|
+
"""
|
55
|
+
# Set random seed for reproducibility
|
56
|
+
random.seed(random_seed)
|
57
|
+
np.random.seed(random_seed)
|
58
|
+
|
59
|
+
# Ensure distance_metric is a list for multi-algorithm handling
|
60
|
+
if isinstance(distance_metric, (str, np.ndarray)):
|
61
|
+
distance_metric = [distance_metric]
|
62
|
+
# Ensure fraction_shortest_edges is a list for multi-threshold handling
|
63
|
+
if isinstance(fraction_shortest_edges, (float, int)):
|
64
|
+
fraction_shortest_edges = [fraction_shortest_edges] * len(distance_metric)
|
65
|
+
# Validate matching lengths of distance metrics and thresholds
|
66
|
+
if len(distance_metric) != len(fraction_shortest_edges):
|
67
|
+
raise ValueError(
|
68
|
+
"The number of distance metrics must match the number of edge length thresholds."
|
69
|
+
)
|
70
|
+
|
71
|
+
# Initialize a sparse LIL matrix for incremental updates
|
72
|
+
num_nodes = network.number_of_nodes()
|
73
|
+
# Initialize a sparse matrix with the same shape as the network
|
74
|
+
combined_neighborhoods = csr_matrix((num_nodes, num_nodes), dtype=np.uint8)
|
75
|
+
# Loop through each distance metric and corresponding edge rank fraction
|
76
|
+
for metric, percentile in zip(distance_metric, fraction_shortest_edges):
|
77
|
+
# Compute neighborhoods for the specified metric
|
78
|
+
if metric == "greedy_modularity":
|
79
|
+
neighborhoods = calculate_greedy_modularity_neighborhoods(
|
80
|
+
network, fraction_shortest_edges=percentile
|
81
|
+
)
|
82
|
+
elif metric == "label_propagation":
|
83
|
+
neighborhoods = calculate_label_propagation_neighborhoods(
|
84
|
+
network, fraction_shortest_edges=percentile
|
85
|
+
)
|
86
|
+
elif metric == "leiden":
|
87
|
+
neighborhoods = calculate_leiden_neighborhoods(
|
88
|
+
network,
|
89
|
+
resolution=leiden_resolution,
|
90
|
+
fraction_shortest_edges=percentile,
|
91
|
+
random_seed=random_seed,
|
92
|
+
)
|
93
|
+
elif metric == "louvain":
|
94
|
+
neighborhoods = calculate_louvain_neighborhoods(
|
95
|
+
network,
|
96
|
+
resolution=louvain_resolution,
|
97
|
+
fraction_shortest_edges=percentile,
|
98
|
+
random_seed=random_seed,
|
99
|
+
)
|
100
|
+
elif metric == "markov_clustering":
|
101
|
+
neighborhoods = calculate_markov_clustering_neighborhoods(
|
102
|
+
network, fraction_shortest_edges=percentile
|
103
|
+
)
|
104
|
+
elif metric == "spinglass":
|
105
|
+
neighborhoods = calculate_spinglass_neighborhoods(
|
106
|
+
network, fraction_shortest_edges=percentile
|
107
|
+
)
|
108
|
+
elif metric == "walktrap":
|
109
|
+
neighborhoods = calculate_walktrap_neighborhoods(
|
110
|
+
network, fraction_shortest_edges=percentile
|
111
|
+
)
|
112
|
+
else:
|
113
|
+
raise ValueError(
|
114
|
+
"Invalid distance metric. Choose from: 'greedy_modularity', 'label_propagation',"
|
115
|
+
"'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
|
116
|
+
)
|
117
|
+
|
118
|
+
# Add the sparse neighborhood matrix
|
119
|
+
combined_neighborhoods += neighborhoods
|
120
|
+
|
121
|
+
# Ensure maximum value in each row is set to 1
|
122
|
+
combined_neighborhoods = _set_max_row_value_to_one_sparse(combined_neighborhoods)
|
123
|
+
|
124
|
+
return combined_neighborhoods
|
125
|
+
|
126
|
+
|
127
|
+
def _set_max_row_value_to_one_sparse(matrix: csr_matrix) -> csr_matrix:
|
128
|
+
"""Set the maximum value in each row of a sparse matrix to 1.
|
129
|
+
|
130
|
+
Args:
|
131
|
+
matrix (csr_matrix): The input sparse matrix.
|
132
|
+
|
133
|
+
Returns:
|
134
|
+
csr_matrix: The modified sparse matrix where only the maximum value in each row is set to 1.
|
135
|
+
"""
|
136
|
+
# Iterate over each row and set the maximum value to 1
|
137
|
+
for i in range(matrix.shape[0]):
|
138
|
+
row_data = matrix[i].data
|
139
|
+
if len(row_data) > 0:
|
140
|
+
row_data[:] = (row_data == max(row_data)).astype(int)
|
141
|
+
|
142
|
+
return matrix
|
143
|
+
|
144
|
+
|
145
|
+
def _set_max_row_value_to_one(matrix: np.ndarray) -> np.ndarray:
|
146
|
+
"""For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0. This is particularly
|
147
|
+
useful for neighborhood matrices that have undergone multiple neighborhood detection algorithms, where the
|
148
|
+
maximum value in each row represents the most significant relationship per node in the combined neighborhoods.
|
149
|
+
|
150
|
+
Args:
|
151
|
+
matrix (np.ndarray): A 2D numpy array representing the neighborhood matrix.
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
np.ndarray: The modified matrix where only the maximum value(s) in each row is set to 1, and others are set to 0.
|
155
|
+
"""
|
156
|
+
# Find the maximum value in each row (column-wise max operation)
|
157
|
+
max_values = np.max(matrix, axis=1, keepdims=True)
|
158
|
+
# Create a boolean mask where elements are True if they are the max value in their row
|
159
|
+
max_mask = matrix == max_values
|
160
|
+
# Set all elements to 0, and then set the maximum value positions to 1
|
161
|
+
matrix[:] = 0 # Set everything to 0
|
162
|
+
matrix[max_mask] = 1 # Set only the max values to 1
|
163
|
+
return matrix
|
164
|
+
|
165
|
+
|
166
|
+
def process_neighborhoods(
|
167
|
+
network: nx.Graph,
|
168
|
+
neighborhoods: Dict[str, Any],
|
169
|
+
impute_depth: int = 0,
|
170
|
+
prune_threshold: float = 0.0,
|
171
|
+
) -> Dict[str, Any]:
|
172
|
+
"""Process neighborhoods based on the imputation and pruning settings.
|
173
|
+
|
174
|
+
Args:
|
175
|
+
network (nx.Graph): The network data structure used for imputing and pruning neighbors.
|
176
|
+
neighborhoods (Dict[str, Any]): Dictionary containing 'significance_matrix', 'significant_binary_significance_matrix', and 'significant_significance_matrix'.
|
177
|
+
impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
|
178
|
+
prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
|
179
|
+
|
180
|
+
Returns:
|
181
|
+
Dict[str, Any]: Processed neighborhoods data, including the updated matrices and significance counts.
|
182
|
+
"""
|
183
|
+
significance_matrix = neighborhoods["significance_matrix"]
|
184
|
+
significant_binary_significance_matrix = neighborhoods["significant_binary_significance_matrix"]
|
185
|
+
significant_significance_matrix = neighborhoods["significant_significance_matrix"]
|
186
|
+
logger.debug(f"Imputation depth: {impute_depth}")
|
187
|
+
if impute_depth:
|
188
|
+
(
|
189
|
+
significance_matrix,
|
190
|
+
significant_binary_significance_matrix,
|
191
|
+
significant_significance_matrix,
|
192
|
+
) = _impute_neighbors(
|
193
|
+
network,
|
194
|
+
significance_matrix,
|
195
|
+
significant_binary_significance_matrix,
|
196
|
+
max_depth=impute_depth,
|
197
|
+
)
|
198
|
+
|
199
|
+
logger.debug(f"Pruning threshold: {prune_threshold}")
|
200
|
+
if prune_threshold:
|
201
|
+
(
|
202
|
+
significance_matrix,
|
203
|
+
significant_binary_significance_matrix,
|
204
|
+
significant_significance_matrix,
|
205
|
+
) = _prune_neighbors(
|
206
|
+
network,
|
207
|
+
significance_matrix,
|
208
|
+
significant_binary_significance_matrix,
|
209
|
+
distance_threshold=prune_threshold,
|
210
|
+
)
|
211
|
+
|
212
|
+
neighborhood_significance_counts = np.sum(significant_binary_significance_matrix, axis=0)
|
213
|
+
node_significance_sums = np.sum(significance_matrix, axis=1)
|
214
|
+
return {
|
215
|
+
"significance_matrix": significance_matrix,
|
216
|
+
"significant_binary_significance_matrix": significant_binary_significance_matrix,
|
217
|
+
"significant_significance_matrix": significant_significance_matrix,
|
218
|
+
"neighborhood_significance_counts": neighborhood_significance_counts,
|
219
|
+
"node_significance_sums": node_significance_sums,
|
220
|
+
}
|
221
|
+
|
222
|
+
|
223
|
+
def _impute_neighbors(
|
224
|
+
network: nx.Graph,
|
225
|
+
significance_matrix: np.ndarray,
|
226
|
+
significant_binary_significance_matrix: np.ndarray,
|
227
|
+
max_depth: int = 3,
|
228
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
229
|
+
"""Impute rows with sums of zero in the significance matrix based on the closest non-zero neighbors in the network graph.
|
230
|
+
|
231
|
+
Args:
|
232
|
+
network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
|
233
|
+
significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
|
234
|
+
significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
|
235
|
+
max_depth (int): Maximum depth of nodes to traverse for imputing values.
|
236
|
+
|
237
|
+
Returns:
|
238
|
+
Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
239
|
+
- np.ndarray: The imputed significance matrix.
|
240
|
+
- np.ndarray: The imputed alpha threshold matrix.
|
241
|
+
- np.ndarray: The significant significance matrix with non-significant entries set to zero.
|
242
|
+
"""
|
243
|
+
# Calculate the distance threshold value based on the shortest distances
|
244
|
+
significance_matrix, significant_binary_significance_matrix = _impute_neighbors_with_similarity(
|
245
|
+
network, significance_matrix, significant_binary_significance_matrix, max_depth=max_depth
|
246
|
+
)
|
247
|
+
# Create a matrix where non-significant entries are set to zero
|
248
|
+
significant_significance_matrix = np.where(
|
249
|
+
significant_binary_significance_matrix == 1, significance_matrix, 0
|
250
|
+
)
|
251
|
+
|
252
|
+
return (
|
253
|
+
significance_matrix,
|
254
|
+
significant_binary_significance_matrix,
|
255
|
+
significant_significance_matrix,
|
256
|
+
)
|
257
|
+
|
258
|
+
|
259
|
+
def _impute_neighbors_with_similarity(
|
260
|
+
network: nx.Graph,
|
261
|
+
significance_matrix: np.ndarray,
|
262
|
+
significant_binary_significance_matrix: np.ndarray,
|
263
|
+
max_depth: int = 3,
|
264
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
265
|
+
"""Impute non-significant nodes based on the closest significant neighbors' profiles and their similarity.
|
266
|
+
|
267
|
+
Args:
|
268
|
+
network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
|
269
|
+
significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
|
270
|
+
significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
|
271
|
+
max_depth (int): Maximum depth of nodes to traverse for imputing values.
|
272
|
+
|
273
|
+
Returns:
|
274
|
+
Tuple[np.ndarray, np.ndarray]:
|
275
|
+
- The imputed significance matrix.
|
276
|
+
- The imputed alpha threshold matrix.
|
277
|
+
"""
|
278
|
+
depth = 1
|
279
|
+
rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
|
280
|
+
while len(rows_to_impute) and depth <= max_depth:
|
281
|
+
# Iterate over all significant nodes
|
282
|
+
for row_index in range(significant_binary_significance_matrix.shape[0]):
|
283
|
+
if significant_binary_significance_matrix[row_index].sum() != 0:
|
284
|
+
(
|
285
|
+
significance_matrix,
|
286
|
+
significant_binary_significance_matrix,
|
287
|
+
) = _process_node_imputation(
|
288
|
+
row_index,
|
289
|
+
network,
|
290
|
+
significance_matrix,
|
291
|
+
significant_binary_significance_matrix,
|
292
|
+
depth,
|
293
|
+
)
|
294
|
+
|
295
|
+
# Update rows to impute for the next iteration
|
296
|
+
rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
|
297
|
+
depth += 1
|
298
|
+
|
299
|
+
return significance_matrix, significant_binary_significance_matrix
|
300
|
+
|
301
|
+
|
302
|
+
def _process_node_imputation(
|
303
|
+
row_index: int,
|
304
|
+
network: nx.Graph,
|
305
|
+
significance_matrix: np.ndarray,
|
306
|
+
significant_binary_significance_matrix: np.ndarray,
|
307
|
+
depth: int,
|
308
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
309
|
+
"""Process the imputation for a single node based on its significant neighbors.
|
310
|
+
|
311
|
+
Args:
|
312
|
+
row_index (int): The index of the significant node being processed.
|
313
|
+
network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
|
314
|
+
significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
|
315
|
+
significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
|
316
|
+
depth (int): Current depth for traversal.
|
317
|
+
|
318
|
+
Returns:
|
319
|
+
Tuple[np.ndarray, np.ndarray]: The modified significance matrix and binary threshold matrix.
|
320
|
+
"""
|
321
|
+
# Check neighbors at the current depth
|
322
|
+
neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
|
323
|
+
# Filter annotated neighbors (already significant)
|
324
|
+
annotated_neighbors = [
|
325
|
+
n
|
326
|
+
for n in neighbors
|
327
|
+
if n != row_index
|
328
|
+
and significant_binary_significance_matrix[n].sum() != 0
|
329
|
+
and significance_matrix[n].sum() != 0
|
330
|
+
]
|
331
|
+
# Filter non-significant neighbors
|
332
|
+
valid_neighbors = [
|
333
|
+
n
|
334
|
+
for n in neighbors
|
335
|
+
if n != row_index
|
336
|
+
and significant_binary_significance_matrix[n].sum() == 0
|
337
|
+
and significance_matrix[n].sum() == 0
|
338
|
+
]
|
339
|
+
# If there are valid non-significant neighbors
|
340
|
+
if valid_neighbors and annotated_neighbors:
|
341
|
+
# Calculate distances to annotated neighbors
|
342
|
+
distances_to_annotated = [
|
343
|
+
_get_euclidean_distance(row_index, n, network) for n in annotated_neighbors
|
344
|
+
]
|
345
|
+
# Calculate the IQR to identify outliers
|
346
|
+
q1, q3 = np.percentile(distances_to_annotated, [25, 75])
|
347
|
+
iqr = q3 - q1
|
348
|
+
lower_bound = q1 - 1.5 * iqr
|
349
|
+
upper_bound = q3 + 1.5 * iqr
|
350
|
+
# Filter valid non-significant neighbors that fall within the IQR bounds
|
351
|
+
valid_neighbors_within_iqr = [
|
352
|
+
n
|
353
|
+
for n in valid_neighbors
|
354
|
+
if lower_bound <= _get_euclidean_distance(row_index, n, network) <= upper_bound
|
355
|
+
]
|
356
|
+
# If there are any valid neighbors within the IQR
|
357
|
+
if valid_neighbors_within_iqr:
|
358
|
+
# If more than one valid neighbor is within the IQR, compute pairwise cosine similarities
|
359
|
+
if len(valid_neighbors_within_iqr) > 1:
|
360
|
+
# Find the most similar neighbor based on pairwise cosine similarities
|
361
|
+
def sum_pairwise_cosine_similarities(neighbor):
|
362
|
+
return sum(
|
363
|
+
cosine_similarity(
|
364
|
+
significance_matrix[neighbor].reshape(1, -1),
|
365
|
+
significance_matrix[other_neighbor].reshape(1, -1),
|
366
|
+
)[0][0]
|
367
|
+
for other_neighbor in valid_neighbors_within_iqr
|
368
|
+
if other_neighbor != neighbor
|
369
|
+
)
|
370
|
+
|
371
|
+
most_similar_neighbor = max(
|
372
|
+
valid_neighbors_within_iqr, key=sum_pairwise_cosine_similarities
|
373
|
+
)
|
374
|
+
else:
|
375
|
+
most_similar_neighbor = valid_neighbors_within_iqr[0]
|
376
|
+
|
377
|
+
# Impute the most similar non-significant neighbor with the significant node's data, scaled by depth
|
378
|
+
significance_matrix[most_similar_neighbor] = significance_matrix[row_index] / np.sqrt(
|
379
|
+
depth + 1
|
380
|
+
)
|
381
|
+
significant_binary_significance_matrix[most_similar_neighbor] = (
|
382
|
+
significant_binary_significance_matrix[row_index]
|
383
|
+
)
|
384
|
+
|
385
|
+
return significance_matrix, significant_binary_significance_matrix
|
386
|
+
|
387
|
+
|
388
|
+
def _prune_neighbors(
|
389
|
+
network: nx.Graph,
|
390
|
+
significance_matrix: np.ndarray,
|
391
|
+
significant_binary_significance_matrix: np.ndarray,
|
392
|
+
distance_threshold: float = 0.9,
|
393
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
394
|
+
"""Remove outliers based on their rank for edge lengths.
|
395
|
+
|
396
|
+
Args:
|
397
|
+
network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
|
398
|
+
significance_matrix (np.ndarray): The significance matrix.
|
399
|
+
significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix.
|
400
|
+
distance_threshold (float): Rank threshold (0 to 1) to determine outliers.
|
401
|
+
|
402
|
+
Returns:
|
403
|
+
Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
404
|
+
- np.ndarray: The updated significance matrix with outliers set to zero.
|
405
|
+
- np.ndarray: The updated alpha threshold matrix with outliers set to zero.
|
406
|
+
- np.ndarray: The significant significance matrix, where non-significant entries are set to zero.
|
407
|
+
"""
|
408
|
+
# Identify indices with non-zero rows in the binary significance matrix
|
409
|
+
non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
|
410
|
+
median_distances = []
|
411
|
+
for node in non_zero_indices:
|
412
|
+
neighbors = [
|
413
|
+
n
|
414
|
+
for n in network.neighbors(node)
|
415
|
+
if significant_binary_significance_matrix[n].sum() != 0
|
416
|
+
]
|
417
|
+
if neighbors:
|
418
|
+
median_distance = np.median(
|
419
|
+
[_get_euclidean_distance(node, n, network) for n in neighbors]
|
420
|
+
)
|
421
|
+
median_distances.append(median_distance)
|
422
|
+
|
423
|
+
# Calculate the distance threshold value based on rank
|
424
|
+
distance_threshold_value = _calculate_threshold(median_distances, 1 - distance_threshold)
|
425
|
+
# Prune nodes that are outliers based on the distance threshold
|
426
|
+
for row_index in non_zero_indices:
|
427
|
+
neighbors = [
|
428
|
+
n
|
429
|
+
for n in network.neighbors(row_index)
|
430
|
+
if significant_binary_significance_matrix[n].sum() != 0
|
431
|
+
]
|
432
|
+
if neighbors:
|
433
|
+
median_distance = np.median(
|
434
|
+
[_get_euclidean_distance(row_index, n, network) for n in neighbors]
|
435
|
+
)
|
436
|
+
if median_distance >= distance_threshold_value:
|
437
|
+
significance_matrix[row_index] = 0
|
438
|
+
significant_binary_significance_matrix[row_index] = 0
|
439
|
+
|
440
|
+
# Create a matrix where non-significant entries are set to zero
|
441
|
+
significant_significance_matrix = np.where(
|
442
|
+
significant_binary_significance_matrix == 1, significance_matrix, 0
|
443
|
+
)
|
444
|
+
|
445
|
+
return (
|
446
|
+
significance_matrix,
|
447
|
+
significant_binary_significance_matrix,
|
448
|
+
significant_significance_matrix,
|
449
|
+
)
|
450
|
+
|
451
|
+
|
452
|
+
def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
|
453
|
+
"""Calculate the Euclidean distance between two nodes in the network.
|
454
|
+
|
455
|
+
Args:
|
456
|
+
node1 (Any): The first node.
|
457
|
+
node2 (Any): The second node.
|
458
|
+
network (nx.Graph): The network graph containing the nodes.
|
459
|
+
|
460
|
+
Returns:
|
461
|
+
float: The Euclidean distance between the two nodes.
|
462
|
+
"""
|
463
|
+
pos1 = _get_node_position(network, node1)
|
464
|
+
pos2 = _get_node_position(network, node2)
|
465
|
+
return np.linalg.norm(pos1 - pos2)
|
466
|
+
|
467
|
+
|
468
|
+
def _get_node_position(network: nx.Graph, node: Any) -> np.ndarray:
|
469
|
+
"""Retrieve the position of a node in the network as a numpy array.
|
470
|
+
|
471
|
+
Args:
|
472
|
+
network (nx.Graph): The network graph containing node positions.
|
473
|
+
node (Any): The node for which the position is being retrieved.
|
474
|
+
|
475
|
+
Returns:
|
476
|
+
np.ndarray: A numpy array representing the position of the node in the format [x, y, z].
|
477
|
+
"""
|
478
|
+
return np.array(
|
479
|
+
[
|
480
|
+
network.nodes[node].get(coord, 0)
|
481
|
+
for coord in ["x", "y", "z"]
|
482
|
+
if coord in network.nodes[node]
|
483
|
+
]
|
484
|
+
)
|
485
|
+
|
486
|
+
|
487
|
+
def _calculate_threshold(median_distances: List, distance_threshold: float) -> float:
|
488
|
+
"""Calculate the distance threshold based on the given median distances and a percentile threshold.
|
489
|
+
|
490
|
+
Args:
|
491
|
+
median_distances (List): An array of median distances.
|
492
|
+
distance_threshold (float): A percentile threshold (0 to 1) used to determine the distance cutoff.
|
493
|
+
|
494
|
+
Returns:
|
495
|
+
float: The calculated distance threshold value.
|
496
|
+
|
497
|
+
Raises:
|
498
|
+
ValueError: If no significant annotations are found in the median distances.
|
499
|
+
"""
|
500
|
+
# Sort the median distances
|
501
|
+
sorted_distances = np.sort(median_distances)
|
502
|
+
# Compute the rank fractions for the sorted distances
|
503
|
+
rank_percentiles = np.linspace(0, 1, len(sorted_distances))
|
504
|
+
# Interpolating the ranks to 1000 evenly spaced percentiles
|
505
|
+
interpolated_percentiles = np.linspace(0, 1, 1000)
|
506
|
+
try:
|
507
|
+
smoothed_distances = np.interp(interpolated_percentiles, rank_percentiles, sorted_distances)
|
508
|
+
except ValueError as e:
|
509
|
+
raise ValueError("No significant annotations found.") from e
|
510
|
+
|
511
|
+
# Determine the index corresponding to the distance threshold
|
512
|
+
threshold_index = int(np.ceil(distance_threshold * len(smoothed_distances))) - 1
|
513
|
+
# Return the smoothed distance at the calculated index
|
514
|
+
return smoothed_distances[threshold_index]
|
@@ -0,0 +1,13 @@
|
|
1
|
+
"""
|
2
|
+
risk/neighborhoods/stats
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
from risk.neighborhoods.stats.permutation import compute_permutation_test
|
7
|
+
from risk.neighborhoods.stats.tests import (
|
8
|
+
compute_binom_test,
|
9
|
+
compute_chi2_test,
|
10
|
+
compute_hypergeom_test,
|
11
|
+
compute_poisson_test,
|
12
|
+
compute_zscore_test,
|
13
|
+
)
|