risk-network 0.0.11__py3-none-any.whl → 0.0.12b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/risk.py +5 -5
- {risk_network-0.0.11.dist-info → risk_network-0.0.12b0.dist-info}/METADATA +10 -12
- risk_network-0.0.12b0.dist-info/RECORD +7 -0
- {risk_network-0.0.11.dist-info → risk_network-0.0.12b0.dist-info}/WHEEL +1 -1
- risk/annotations/__init__.py +0 -7
- risk/annotations/annotations.py +0 -354
- risk/annotations/io.py +0 -240
- risk/annotations/nltk_setup.py +0 -85
- risk/log/__init__.py +0 -11
- risk/log/console.py +0 -141
- risk/log/parameters.py +0 -172
- risk/neighborhoods/__init__.py +0 -8
- risk/neighborhoods/api.py +0 -442
- risk/neighborhoods/community.py +0 -412
- risk/neighborhoods/domains.py +0 -358
- risk/neighborhoods/neighborhoods.py +0 -508
- risk/network/__init__.py +0 -6
- risk/network/geometry.py +0 -150
- risk/network/graph/__init__.py +0 -6
- risk/network/graph/api.py +0 -200
- risk/network/graph/graph.py +0 -269
- risk/network/graph/summary.py +0 -254
- risk/network/io.py +0 -550
- risk/network/plotter/__init__.py +0 -6
- risk/network/plotter/api.py +0 -54
- risk/network/plotter/canvas.py +0 -291
- risk/network/plotter/contour.py +0 -330
- risk/network/plotter/labels.py +0 -924
- risk/network/plotter/network.py +0 -294
- risk/network/plotter/plotter.py +0 -143
- risk/network/plotter/utils/colors.py +0 -416
- risk/network/plotter/utils/layout.py +0 -94
- risk/stats/__init__.py +0 -15
- risk/stats/permutation/__init__.py +0 -6
- risk/stats/permutation/permutation.py +0 -237
- risk/stats/permutation/test_functions.py +0 -70
- risk/stats/significance.py +0 -166
- risk/stats/stat_tests.py +0 -267
- risk_network-0.0.11.dist-info/RECORD +0 -41
- {risk_network-0.0.11.dist-info → risk_network-0.0.12b0.dist-info/licenses}/LICENSE +0 -0
- {risk_network-0.0.11.dist-info → risk_network-0.0.12b0.dist-info}/top_level.txt +0 -0
@@ -1,508 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
risk/neighborhoods/neighborhoods
|
3
|
-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
|
-
"""
|
5
|
-
|
6
|
-
import random
|
7
|
-
import warnings
|
8
|
-
from typing import Any, Dict, List, Tuple, Union
|
9
|
-
|
10
|
-
import networkx as nx
|
11
|
-
import numpy as np
|
12
|
-
from scipy.sparse import csr_matrix
|
13
|
-
from sklearn.exceptions import DataConversionWarning
|
14
|
-
from sklearn.metrics.pairwise import cosine_similarity
|
15
|
-
|
16
|
-
from risk.neighborhoods.community import (
|
17
|
-
calculate_greedy_modularity_neighborhoods,
|
18
|
-
calculate_label_propagation_neighborhoods,
|
19
|
-
calculate_leiden_neighborhoods,
|
20
|
-
calculate_louvain_neighborhoods,
|
21
|
-
calculate_markov_clustering_neighborhoods,
|
22
|
-
calculate_spinglass_neighborhoods,
|
23
|
-
calculate_walktrap_neighborhoods,
|
24
|
-
)
|
25
|
-
from risk.log import logger
|
26
|
-
|
27
|
-
# Suppress DataConversionWarning
|
28
|
-
warnings.filterwarnings(action="ignore", category=DataConversionWarning)
|
29
|
-
|
30
|
-
|
31
|
-
def get_network_neighborhoods(
|
32
|
-
network: nx.Graph,
|
33
|
-
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
34
|
-
fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 1.0,
|
35
|
-
louvain_resolution: float = 0.1,
|
36
|
-
leiden_resolution: float = 1.0,
|
37
|
-
random_seed: int = 888,
|
38
|
-
) -> csr_matrix:
|
39
|
-
"""Calculate the combined neighborhoods for each node using sparse matrices.
|
40
|
-
|
41
|
-
Args:
|
42
|
-
network (nx.Graph): The network graph.
|
43
|
-
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use.
|
44
|
-
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction thresholds.
|
45
|
-
louvain_resolution (float, optional): Resolution parameter for the Louvain method.
|
46
|
-
leiden_resolution (float, optional): Resolution parameter for the Leiden method.
|
47
|
-
random_seed (int, optional): Random seed for methods requiring random initialization.
|
48
|
-
|
49
|
-
Returns:
|
50
|
-
csr_matrix: The combined neighborhood matrix.
|
51
|
-
"""
|
52
|
-
# Set random seed for reproducibility
|
53
|
-
random.seed(random_seed)
|
54
|
-
np.random.seed(random_seed)
|
55
|
-
|
56
|
-
# Ensure distance_metric is a list for multi-algorithm handling
|
57
|
-
if isinstance(distance_metric, (str, np.ndarray)):
|
58
|
-
distance_metric = [distance_metric]
|
59
|
-
# Ensure fraction_shortest_edges is a list for multi-threshold handling
|
60
|
-
if isinstance(fraction_shortest_edges, (float, int)):
|
61
|
-
fraction_shortest_edges = [fraction_shortest_edges] * len(distance_metric)
|
62
|
-
# Validate matching lengths of distance metrics and thresholds
|
63
|
-
if len(distance_metric) != len(fraction_shortest_edges):
|
64
|
-
raise ValueError(
|
65
|
-
"The number of distance metrics must match the number of edge length thresholds."
|
66
|
-
)
|
67
|
-
|
68
|
-
# Initialize a sparse LIL matrix for incremental updates
|
69
|
-
num_nodes = network.number_of_nodes()
|
70
|
-
# Initialize a sparse matrix with the same shape as the network
|
71
|
-
combined_neighborhoods = csr_matrix((num_nodes, num_nodes), dtype=np.uint8)
|
72
|
-
# Loop through each distance metric and corresponding edge rank fraction
|
73
|
-
for metric, percentile in zip(distance_metric, fraction_shortest_edges):
|
74
|
-
# Compute neighborhoods for the specified metric
|
75
|
-
if metric == "greedy_modularity":
|
76
|
-
neighborhoods = calculate_greedy_modularity_neighborhoods(
|
77
|
-
network, fraction_shortest_edges=percentile
|
78
|
-
)
|
79
|
-
elif metric == "label_propagation":
|
80
|
-
neighborhoods = calculate_label_propagation_neighborhoods(
|
81
|
-
network, fraction_shortest_edges=percentile
|
82
|
-
)
|
83
|
-
elif metric == "leiden":
|
84
|
-
neighborhoods = calculate_leiden_neighborhoods(
|
85
|
-
network,
|
86
|
-
resolution=leiden_resolution,
|
87
|
-
fraction_shortest_edges=percentile,
|
88
|
-
random_seed=random_seed,
|
89
|
-
)
|
90
|
-
elif metric == "louvain":
|
91
|
-
neighborhoods = calculate_louvain_neighborhoods(
|
92
|
-
network,
|
93
|
-
resolution=louvain_resolution,
|
94
|
-
fraction_shortest_edges=percentile,
|
95
|
-
random_seed=random_seed,
|
96
|
-
)
|
97
|
-
elif metric == "markov_clustering":
|
98
|
-
neighborhoods = calculate_markov_clustering_neighborhoods(
|
99
|
-
network, fraction_shortest_edges=percentile
|
100
|
-
)
|
101
|
-
elif metric == "spinglass":
|
102
|
-
neighborhoods = calculate_spinglass_neighborhoods(
|
103
|
-
network, fraction_shortest_edges=percentile
|
104
|
-
)
|
105
|
-
elif metric == "walktrap":
|
106
|
-
neighborhoods = calculate_walktrap_neighborhoods(
|
107
|
-
network, fraction_shortest_edges=percentile
|
108
|
-
)
|
109
|
-
else:
|
110
|
-
raise ValueError(
|
111
|
-
"Invalid distance metric. Choose from: 'greedy_modularity', 'label_propagation',"
|
112
|
-
"'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
|
113
|
-
)
|
114
|
-
|
115
|
-
# Add the sparse neighborhood matrix
|
116
|
-
combined_neighborhoods += neighborhoods
|
117
|
-
|
118
|
-
# Ensure maximum value in each row is set to 1
|
119
|
-
combined_neighborhoods = _set_max_row_value_to_one_sparse(combined_neighborhoods)
|
120
|
-
|
121
|
-
return combined_neighborhoods
|
122
|
-
|
123
|
-
|
124
|
-
def _set_max_row_value_to_one_sparse(matrix: csr_matrix) -> csr_matrix:
|
125
|
-
"""Set the maximum value in each row of a sparse matrix to 1.
|
126
|
-
|
127
|
-
Args:
|
128
|
-
matrix (csr_matrix): The input sparse matrix.
|
129
|
-
|
130
|
-
Returns:
|
131
|
-
csr_matrix: The modified sparse matrix where only the maximum value in each row is set to 1.
|
132
|
-
"""
|
133
|
-
# Iterate over each row and set the maximum value to 1
|
134
|
-
for i in range(matrix.shape[0]):
|
135
|
-
row_data = matrix[i].data
|
136
|
-
if len(row_data) > 0:
|
137
|
-
row_data[:] = (row_data == max(row_data)).astype(int)
|
138
|
-
|
139
|
-
return matrix
|
140
|
-
|
141
|
-
|
142
|
-
def _set_max_row_value_to_one(matrix: np.ndarray) -> np.ndarray:
|
143
|
-
"""For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0. This is particularly
|
144
|
-
useful for neighborhood matrices that have undergone multiple neighborhood detection algorithms, where the
|
145
|
-
maximum value in each row represents the most significant relationship per node in the combined neighborhoods.
|
146
|
-
|
147
|
-
Args:
|
148
|
-
matrix (np.ndarray): A 2D numpy array representing the neighborhood matrix.
|
149
|
-
|
150
|
-
Returns:
|
151
|
-
np.ndarray: The modified matrix where only the maximum value(s) in each row is set to 1, and others are set to 0.
|
152
|
-
"""
|
153
|
-
# Find the maximum value in each row (column-wise max operation)
|
154
|
-
max_values = np.max(matrix, axis=1, keepdims=True)
|
155
|
-
# Create a boolean mask where elements are True if they are the max value in their row
|
156
|
-
max_mask = matrix == max_values
|
157
|
-
# Set all elements to 0, and then set the maximum value positions to 1
|
158
|
-
matrix[:] = 0 # Set everything to 0
|
159
|
-
matrix[max_mask] = 1 # Set only the max values to 1
|
160
|
-
return matrix
|
161
|
-
|
162
|
-
|
163
|
-
def process_neighborhoods(
|
164
|
-
network: nx.Graph,
|
165
|
-
neighborhoods: Dict[str, Any],
|
166
|
-
impute_depth: int = 0,
|
167
|
-
prune_threshold: float = 0.0,
|
168
|
-
) -> Dict[str, Any]:
|
169
|
-
"""Process neighborhoods based on the imputation and pruning settings.
|
170
|
-
|
171
|
-
Args:
|
172
|
-
network (nx.Graph): The network data structure used for imputing and pruning neighbors.
|
173
|
-
neighborhoods (Dict[str, Any]): Dictionary containing 'significance_matrix', 'significant_binary_significance_matrix', and 'significant_significance_matrix'.
|
174
|
-
impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
|
175
|
-
prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
|
176
|
-
|
177
|
-
Returns:
|
178
|
-
Dict[str, Any]: Processed neighborhoods data, including the updated matrices and significance counts.
|
179
|
-
"""
|
180
|
-
significance_matrix = neighborhoods["significance_matrix"]
|
181
|
-
significant_binary_significance_matrix = neighborhoods["significant_binary_significance_matrix"]
|
182
|
-
significant_significance_matrix = neighborhoods["significant_significance_matrix"]
|
183
|
-
logger.debug(f"Imputation depth: {impute_depth}")
|
184
|
-
if impute_depth:
|
185
|
-
(
|
186
|
-
significance_matrix,
|
187
|
-
significant_binary_significance_matrix,
|
188
|
-
significant_significance_matrix,
|
189
|
-
) = _impute_neighbors(
|
190
|
-
network,
|
191
|
-
significance_matrix,
|
192
|
-
significant_binary_significance_matrix,
|
193
|
-
max_depth=impute_depth,
|
194
|
-
)
|
195
|
-
|
196
|
-
logger.debug(f"Pruning threshold: {prune_threshold}")
|
197
|
-
if prune_threshold:
|
198
|
-
(
|
199
|
-
significance_matrix,
|
200
|
-
significant_binary_significance_matrix,
|
201
|
-
significant_significance_matrix,
|
202
|
-
) = _prune_neighbors(
|
203
|
-
network,
|
204
|
-
significance_matrix,
|
205
|
-
significant_binary_significance_matrix,
|
206
|
-
distance_threshold=prune_threshold,
|
207
|
-
)
|
208
|
-
|
209
|
-
neighborhood_significance_counts = np.sum(significant_binary_significance_matrix, axis=0)
|
210
|
-
node_significance_sums = np.sum(significance_matrix, axis=1)
|
211
|
-
return {
|
212
|
-
"significance_matrix": significance_matrix,
|
213
|
-
"significant_binary_significance_matrix": significant_binary_significance_matrix,
|
214
|
-
"significant_significance_matrix": significant_significance_matrix,
|
215
|
-
"neighborhood_significance_counts": neighborhood_significance_counts,
|
216
|
-
"node_significance_sums": node_significance_sums,
|
217
|
-
}
|
218
|
-
|
219
|
-
|
220
|
-
def _impute_neighbors(
|
221
|
-
network: nx.Graph,
|
222
|
-
significance_matrix: np.ndarray,
|
223
|
-
significant_binary_significance_matrix: np.ndarray,
|
224
|
-
max_depth: int = 3,
|
225
|
-
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
226
|
-
"""Impute rows with sums of zero in the significance matrix based on the closest non-zero neighbors in the network graph.
|
227
|
-
|
228
|
-
Args:
|
229
|
-
network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
|
230
|
-
significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
|
231
|
-
significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
|
232
|
-
max_depth (int): Maximum depth of nodes to traverse for imputing values.
|
233
|
-
|
234
|
-
Returns:
|
235
|
-
Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
236
|
-
- np.ndarray: The imputed significance matrix.
|
237
|
-
- np.ndarray: The imputed alpha threshold matrix.
|
238
|
-
- np.ndarray: The significant significance matrix with non-significant entries set to zero.
|
239
|
-
"""
|
240
|
-
# Calculate the distance threshold value based on the shortest distances
|
241
|
-
significance_matrix, significant_binary_significance_matrix = _impute_neighbors_with_similarity(
|
242
|
-
network, significance_matrix, significant_binary_significance_matrix, max_depth=max_depth
|
243
|
-
)
|
244
|
-
# Create a matrix where non-significant entries are set to zero
|
245
|
-
significant_significance_matrix = np.where(
|
246
|
-
significant_binary_significance_matrix == 1, significance_matrix, 0
|
247
|
-
)
|
248
|
-
|
249
|
-
return (
|
250
|
-
significance_matrix,
|
251
|
-
significant_binary_significance_matrix,
|
252
|
-
significant_significance_matrix,
|
253
|
-
)
|
254
|
-
|
255
|
-
|
256
|
-
def _impute_neighbors_with_similarity(
|
257
|
-
network: nx.Graph,
|
258
|
-
significance_matrix: np.ndarray,
|
259
|
-
significant_binary_significance_matrix: np.ndarray,
|
260
|
-
max_depth: int = 3,
|
261
|
-
) -> Tuple[np.ndarray, np.ndarray]:
|
262
|
-
"""Impute non-significant nodes based on the closest significant neighbors' profiles and their similarity.
|
263
|
-
|
264
|
-
Args:
|
265
|
-
network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
|
266
|
-
significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
|
267
|
-
significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
|
268
|
-
max_depth (int): Maximum depth of nodes to traverse for imputing values.
|
269
|
-
|
270
|
-
Returns:
|
271
|
-
Tuple[np.ndarray, np.ndarray]:
|
272
|
-
- The imputed significance matrix.
|
273
|
-
- The imputed alpha threshold matrix.
|
274
|
-
"""
|
275
|
-
depth = 1
|
276
|
-
rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
|
277
|
-
while len(rows_to_impute) and depth <= max_depth:
|
278
|
-
# Iterate over all significant nodes
|
279
|
-
for row_index in range(significant_binary_significance_matrix.shape[0]):
|
280
|
-
if significant_binary_significance_matrix[row_index].sum() != 0:
|
281
|
-
(
|
282
|
-
significance_matrix,
|
283
|
-
significant_binary_significance_matrix,
|
284
|
-
) = _process_node_imputation(
|
285
|
-
row_index,
|
286
|
-
network,
|
287
|
-
significance_matrix,
|
288
|
-
significant_binary_significance_matrix,
|
289
|
-
depth,
|
290
|
-
)
|
291
|
-
|
292
|
-
# Update rows to impute for the next iteration
|
293
|
-
rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
|
294
|
-
depth += 1
|
295
|
-
|
296
|
-
return significance_matrix, significant_binary_significance_matrix
|
297
|
-
|
298
|
-
|
299
|
-
def _process_node_imputation(
|
300
|
-
row_index: int,
|
301
|
-
network: nx.Graph,
|
302
|
-
significance_matrix: np.ndarray,
|
303
|
-
significant_binary_significance_matrix: np.ndarray,
|
304
|
-
depth: int,
|
305
|
-
) -> Tuple[np.ndarray, np.ndarray]:
|
306
|
-
"""Process the imputation for a single node based on its significant neighbors.
|
307
|
-
|
308
|
-
Args:
|
309
|
-
row_index (int): The index of the significant node being processed.
|
310
|
-
network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
|
311
|
-
significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
|
312
|
-
significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
|
313
|
-
depth (int): Current depth for traversal.
|
314
|
-
|
315
|
-
Returns:
|
316
|
-
Tuple[np.ndarray, np.ndarray]: The modified significance matrix and binary threshold matrix.
|
317
|
-
"""
|
318
|
-
# Check neighbors at the current depth
|
319
|
-
neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
|
320
|
-
# Filter annotated neighbors (already significant)
|
321
|
-
annotated_neighbors = [
|
322
|
-
n
|
323
|
-
for n in neighbors
|
324
|
-
if n != row_index
|
325
|
-
and significant_binary_significance_matrix[n].sum() != 0
|
326
|
-
and significance_matrix[n].sum() != 0
|
327
|
-
]
|
328
|
-
# Filter non-significant neighbors
|
329
|
-
valid_neighbors = [
|
330
|
-
n
|
331
|
-
for n in neighbors
|
332
|
-
if n != row_index
|
333
|
-
and significant_binary_significance_matrix[n].sum() == 0
|
334
|
-
and significance_matrix[n].sum() == 0
|
335
|
-
]
|
336
|
-
# If there are valid non-significant neighbors
|
337
|
-
if valid_neighbors and annotated_neighbors:
|
338
|
-
# Calculate distances to annotated neighbors
|
339
|
-
distances_to_annotated = [
|
340
|
-
_get_euclidean_distance(row_index, n, network) for n in annotated_neighbors
|
341
|
-
]
|
342
|
-
# Calculate the IQR to identify outliers
|
343
|
-
q1, q3 = np.percentile(distances_to_annotated, [25, 75])
|
344
|
-
iqr = q3 - q1
|
345
|
-
lower_bound = q1 - 1.5 * iqr
|
346
|
-
upper_bound = q3 + 1.5 * iqr
|
347
|
-
# Filter valid non-significant neighbors that fall within the IQR bounds
|
348
|
-
valid_neighbors_within_iqr = [
|
349
|
-
n
|
350
|
-
for n in valid_neighbors
|
351
|
-
if lower_bound <= _get_euclidean_distance(row_index, n, network) <= upper_bound
|
352
|
-
]
|
353
|
-
# If there are any valid neighbors within the IQR
|
354
|
-
if valid_neighbors_within_iqr:
|
355
|
-
# If more than one valid neighbor is within the IQR, compute pairwise cosine similarities
|
356
|
-
if len(valid_neighbors_within_iqr) > 1:
|
357
|
-
# Find the most similar neighbor based on pairwise cosine similarities
|
358
|
-
def sum_pairwise_cosine_similarities(neighbor):
|
359
|
-
return sum(
|
360
|
-
cosine_similarity(
|
361
|
-
significance_matrix[neighbor].reshape(1, -1),
|
362
|
-
significance_matrix[other_neighbor].reshape(1, -1),
|
363
|
-
)[0][0]
|
364
|
-
for other_neighbor in valid_neighbors_within_iqr
|
365
|
-
if other_neighbor != neighbor
|
366
|
-
)
|
367
|
-
|
368
|
-
most_similar_neighbor = max(
|
369
|
-
valid_neighbors_within_iqr, key=sum_pairwise_cosine_similarities
|
370
|
-
)
|
371
|
-
else:
|
372
|
-
most_similar_neighbor = valid_neighbors_within_iqr[0]
|
373
|
-
|
374
|
-
# Impute the most similar non-significant neighbor with the significant node's data, scaled by depth
|
375
|
-
significance_matrix[most_similar_neighbor] = significance_matrix[row_index] / np.sqrt(
|
376
|
-
depth + 1
|
377
|
-
)
|
378
|
-
significant_binary_significance_matrix[most_similar_neighbor] = (
|
379
|
-
significant_binary_significance_matrix[row_index]
|
380
|
-
)
|
381
|
-
|
382
|
-
return significance_matrix, significant_binary_significance_matrix
|
383
|
-
|
384
|
-
|
385
|
-
def _prune_neighbors(
|
386
|
-
network: nx.Graph,
|
387
|
-
significance_matrix: np.ndarray,
|
388
|
-
significant_binary_significance_matrix: np.ndarray,
|
389
|
-
distance_threshold: float = 0.9,
|
390
|
-
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
391
|
-
"""Remove outliers based on their rank for edge lengths.
|
392
|
-
|
393
|
-
Args:
|
394
|
-
network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
|
395
|
-
significance_matrix (np.ndarray): The significance matrix.
|
396
|
-
significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix.
|
397
|
-
distance_threshold (float): Rank threshold (0 to 1) to determine outliers.
|
398
|
-
|
399
|
-
Returns:
|
400
|
-
Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
401
|
-
- np.ndarray: The updated significance matrix with outliers set to zero.
|
402
|
-
- np.ndarray: The updated alpha threshold matrix with outliers set to zero.
|
403
|
-
- np.ndarray: The significant significance matrix, where non-significant entries are set to zero.
|
404
|
-
"""
|
405
|
-
# Identify indices with non-zero rows in the binary significance matrix
|
406
|
-
non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
|
407
|
-
median_distances = []
|
408
|
-
for node in non_zero_indices:
|
409
|
-
neighbors = [
|
410
|
-
n
|
411
|
-
for n in network.neighbors(node)
|
412
|
-
if significant_binary_significance_matrix[n].sum() != 0
|
413
|
-
]
|
414
|
-
if neighbors:
|
415
|
-
median_distance = np.median(
|
416
|
-
[_get_euclidean_distance(node, n, network) for n in neighbors]
|
417
|
-
)
|
418
|
-
median_distances.append(median_distance)
|
419
|
-
|
420
|
-
# Calculate the distance threshold value based on rank
|
421
|
-
distance_threshold_value = _calculate_threshold(median_distances, 1 - distance_threshold)
|
422
|
-
# Prune nodes that are outliers based on the distance threshold
|
423
|
-
for row_index in non_zero_indices:
|
424
|
-
neighbors = [
|
425
|
-
n
|
426
|
-
for n in network.neighbors(row_index)
|
427
|
-
if significant_binary_significance_matrix[n].sum() != 0
|
428
|
-
]
|
429
|
-
if neighbors:
|
430
|
-
median_distance = np.median(
|
431
|
-
[_get_euclidean_distance(row_index, n, network) for n in neighbors]
|
432
|
-
)
|
433
|
-
if median_distance >= distance_threshold_value:
|
434
|
-
significance_matrix[row_index] = 0
|
435
|
-
significant_binary_significance_matrix[row_index] = 0
|
436
|
-
|
437
|
-
# Create a matrix where non-significant entries are set to zero
|
438
|
-
significant_significance_matrix = np.where(
|
439
|
-
significant_binary_significance_matrix == 1, significance_matrix, 0
|
440
|
-
)
|
441
|
-
|
442
|
-
return (
|
443
|
-
significance_matrix,
|
444
|
-
significant_binary_significance_matrix,
|
445
|
-
significant_significance_matrix,
|
446
|
-
)
|
447
|
-
|
448
|
-
|
449
|
-
def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
|
450
|
-
"""Calculate the Euclidean distance between two nodes in the network.
|
451
|
-
|
452
|
-
Args:
|
453
|
-
node1 (Any): The first node.
|
454
|
-
node2 (Any): The second node.
|
455
|
-
network (nx.Graph): The network graph containing the nodes.
|
456
|
-
|
457
|
-
Returns:
|
458
|
-
float: The Euclidean distance between the two nodes.
|
459
|
-
"""
|
460
|
-
pos1 = _get_node_position(network, node1)
|
461
|
-
pos2 = _get_node_position(network, node2)
|
462
|
-
return np.linalg.norm(pos1 - pos2)
|
463
|
-
|
464
|
-
|
465
|
-
def _get_node_position(network: nx.Graph, node: Any) -> np.ndarray:
|
466
|
-
"""Retrieve the position of a node in the network as a numpy array.
|
467
|
-
|
468
|
-
Args:
|
469
|
-
network (nx.Graph): The network graph containing node positions.
|
470
|
-
node (Any): The node for which the position is being retrieved.
|
471
|
-
|
472
|
-
Returns:
|
473
|
-
np.ndarray: A numpy array representing the position of the node in the format [x, y, z].
|
474
|
-
"""
|
475
|
-
return np.array(
|
476
|
-
[
|
477
|
-
network.nodes[node].get(coord, 0)
|
478
|
-
for coord in ["x", "y", "z"]
|
479
|
-
if coord in network.nodes[node]
|
480
|
-
]
|
481
|
-
)
|
482
|
-
|
483
|
-
|
484
|
-
def _calculate_threshold(median_distances: List, distance_threshold: float) -> float:
|
485
|
-
"""Calculate the distance threshold based on the given median distances and a percentile threshold.
|
486
|
-
|
487
|
-
Args:
|
488
|
-
median_distances (List): An array of median distances.
|
489
|
-
distance_threshold (float): A percentile threshold (0 to 1) used to determine the distance cutoff.
|
490
|
-
|
491
|
-
Returns:
|
492
|
-
float: The calculated distance threshold value.
|
493
|
-
"""
|
494
|
-
# Sort the median distances
|
495
|
-
sorted_distances = np.sort(median_distances)
|
496
|
-
# Compute the rank fractions for the sorted distances
|
497
|
-
rank_percentiles = np.linspace(0, 1, len(sorted_distances))
|
498
|
-
# Interpolating the ranks to 1000 evenly spaced percentiles
|
499
|
-
interpolated_percentiles = np.linspace(0, 1, 1000)
|
500
|
-
try:
|
501
|
-
smoothed_distances = np.interp(interpolated_percentiles, rank_percentiles, sorted_distances)
|
502
|
-
except ValueError as e:
|
503
|
-
raise ValueError("No significant annotations found.") from e
|
504
|
-
|
505
|
-
# Determine the index corresponding to the distance threshold
|
506
|
-
threshold_index = int(np.ceil(distance_threshold * len(smoothed_distances))) - 1
|
507
|
-
# Return the smoothed distance at the calculated index
|
508
|
-
return smoothed_distances[threshold_index]
|
risk/network/__init__.py
DELETED
risk/network/geometry.py
DELETED
@@ -1,150 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
risk/network/geometry
|
3
|
-
~~~~~~~~~~~~~~~~~~~~~
|
4
|
-
"""
|
5
|
-
|
6
|
-
import networkx as nx
|
7
|
-
import numpy as np
|
8
|
-
|
9
|
-
|
10
|
-
def assign_edge_lengths(
|
11
|
-
G: nx.Graph,
|
12
|
-
compute_sphere: bool = True,
|
13
|
-
surface_depth: float = 0.0,
|
14
|
-
) -> nx.Graph:
|
15
|
-
"""Assign edge lengths in the graph, optionally mapping nodes to a sphere.
|
16
|
-
|
17
|
-
Args:
|
18
|
-
G (nx.Graph): The input graph.
|
19
|
-
compute_sphere (bool): Whether to map nodes to a sphere. Defaults to True.
|
20
|
-
surface_depth (float): The surface depth for mapping to a sphere. Defaults to 0.0.
|
21
|
-
|
22
|
-
Returns:
|
23
|
-
nx.Graph: The graph with applied edge lengths.
|
24
|
-
"""
|
25
|
-
|
26
|
-
def compute_distance_vectorized(coords, is_sphere):
|
27
|
-
"""Compute distances between pairs of coordinates."""
|
28
|
-
u_coords, v_coords = coords[:, 0, :], coords[:, 1, :]
|
29
|
-
if is_sphere:
|
30
|
-
u_coords /= np.linalg.norm(u_coords, axis=1, keepdims=True)
|
31
|
-
v_coords /= np.linalg.norm(v_coords, axis=1, keepdims=True)
|
32
|
-
dot_products = np.einsum("ij,ij->i", u_coords, v_coords)
|
33
|
-
return np.arccos(np.clip(dot_products, -1.0, 1.0))
|
34
|
-
return np.linalg.norm(u_coords - v_coords, axis=1)
|
35
|
-
|
36
|
-
# Normalize graph coordinates
|
37
|
-
_normalize_graph_coordinates(G)
|
38
|
-
|
39
|
-
# Map nodes to sphere and adjust depth if required
|
40
|
-
if compute_sphere:
|
41
|
-
_map_to_sphere(G)
|
42
|
-
G_depth = _create_depth(G, surface_depth=surface_depth)
|
43
|
-
else:
|
44
|
-
G_depth = G
|
45
|
-
|
46
|
-
# Precompute edge coordinate arrays and compute distances in bulk
|
47
|
-
edge_data = np.array(
|
48
|
-
[
|
49
|
-
[
|
50
|
-
np.array(
|
51
|
-
[G_depth.nodes[u]["x"], G_depth.nodes[u]["y"], G_depth.nodes[u].get("z", 0)]
|
52
|
-
),
|
53
|
-
np.array(
|
54
|
-
[G_depth.nodes[v]["x"], G_depth.nodes[v]["y"], G_depth.nodes[v].get("z", 0)]
|
55
|
-
),
|
56
|
-
]
|
57
|
-
for u, v in G_depth.edges
|
58
|
-
]
|
59
|
-
)
|
60
|
-
# Compute distances
|
61
|
-
distances = compute_distance_vectorized(edge_data, compute_sphere)
|
62
|
-
# Assign distances back to the graph
|
63
|
-
for (u, v), distance in zip(G_depth.edges, distances):
|
64
|
-
G.edges[u, v]["length"] = distance
|
65
|
-
|
66
|
-
return G
|
67
|
-
|
68
|
-
|
69
|
-
def _map_to_sphere(G: nx.Graph) -> None:
|
70
|
-
"""Map the x and y coordinates of graph nodes onto a 3D sphere.
|
71
|
-
|
72
|
-
Args:
|
73
|
-
G (nx.Graph): The input graph with nodes having 'x' and 'y' coordinates.
|
74
|
-
"""
|
75
|
-
# Extract x, y coordinates as a NumPy array
|
76
|
-
nodes = list(G.nodes)
|
77
|
-
xy_coords = np.array([[G.nodes[node]["x"], G.nodes[node]["y"]] for node in nodes])
|
78
|
-
# Normalize coordinates between [0, 1]
|
79
|
-
min_vals = xy_coords.min(axis=0)
|
80
|
-
max_vals = xy_coords.max(axis=0)
|
81
|
-
normalized_xy = (xy_coords - min_vals) / (max_vals - min_vals)
|
82
|
-
# Convert normalized coordinates to spherical coordinates
|
83
|
-
theta = normalized_xy[:, 0] * np.pi * 2
|
84
|
-
phi = normalized_xy[:, 1] * np.pi
|
85
|
-
# Compute 3D Cartesian coordinates
|
86
|
-
x = np.sin(phi) * np.cos(theta)
|
87
|
-
y = np.sin(phi) * np.sin(theta)
|
88
|
-
z = np.cos(phi)
|
89
|
-
# Assign coordinates back to graph nodes in bulk
|
90
|
-
xyz_coords = {node: {"x": x[i], "y": y[i], "z": z[i]} for i, node in enumerate(nodes)}
|
91
|
-
nx.set_node_attributes(G, xyz_coords)
|
92
|
-
|
93
|
-
|
94
|
-
def _normalize_graph_coordinates(G: nx.Graph) -> None:
|
95
|
-
"""Normalize the x and y coordinates of the nodes in the graph to the [0, 1] range.
|
96
|
-
|
97
|
-
Args:
|
98
|
-
G (nx.Graph): The input graph with nodes having 'x' and 'y' coordinates.
|
99
|
-
"""
|
100
|
-
# Extract x, y coordinates from the graph nodes
|
101
|
-
xy_coords = np.array([[G.nodes[node]["x"], G.nodes[node]["y"]] for node in G.nodes()])
|
102
|
-
# Calculate min and max values for x and y
|
103
|
-
min_vals = np.min(xy_coords, axis=0)
|
104
|
-
max_vals = np.max(xy_coords, axis=0)
|
105
|
-
# Normalize the coordinates to [0, 1]
|
106
|
-
normalized_xy = (xy_coords - min_vals) / (max_vals - min_vals)
|
107
|
-
# Update the node coordinates with the normalized values
|
108
|
-
for i, node in enumerate(G.nodes()):
|
109
|
-
G.nodes[node]["x"], G.nodes[node]["y"] = normalized_xy[i]
|
110
|
-
|
111
|
-
|
112
|
-
def _create_depth(G: nx.Graph, surface_depth: float = 0.0) -> nx.Graph:
|
113
|
-
"""Adjust the 'z' attribute of each node based on the subcluster strengths and normalized surface depth.
|
114
|
-
|
115
|
-
Args:
|
116
|
-
G (nx.Graph): The input graph.
|
117
|
-
surface_depth (float): The maximum surface depth to apply for the strongest subcluster.
|
118
|
-
|
119
|
-
Returns:
|
120
|
-
nx.Graph: The graph with adjusted 'z' attribute for each node.
|
121
|
-
"""
|
122
|
-
if surface_depth >= 1.0:
|
123
|
-
surface_depth -= 1e-6 # Cap the surface depth to prevent a value of 1.0
|
124
|
-
|
125
|
-
# Compute subclusters as connected components
|
126
|
-
connected_components = list(nx.connected_components(G))
|
127
|
-
subcluster_strengths = {}
|
128
|
-
max_strength = 0
|
129
|
-
# Precompute strengths and track the maximum strength
|
130
|
-
for component in connected_components:
|
131
|
-
size = len(component)
|
132
|
-
max_strength = max(max_strength, size)
|
133
|
-
for node in component:
|
134
|
-
subcluster_strengths[node] = size
|
135
|
-
|
136
|
-
# Avoid repeated lookups and computations by pre-fetching node data
|
137
|
-
nodes = list(G.nodes(data=True))
|
138
|
-
node_updates = {}
|
139
|
-
for node, attrs in nodes:
|
140
|
-
strength = subcluster_strengths[node]
|
141
|
-
normalized_surface_depth = (strength / max_strength) * surface_depth
|
142
|
-
x, y, z = attrs["x"], attrs["y"], attrs["z"]
|
143
|
-
norm = np.sqrt(x**2 + y**2 + z**2)
|
144
|
-
adjusted_z = z - (z / norm) * normalized_surface_depth
|
145
|
-
node_updates[node] = {"z": adjusted_z}
|
146
|
-
|
147
|
-
# Batch update node attributes
|
148
|
-
nx.set_node_attributes(G, node_updates)
|
149
|
-
|
150
|
-
return G
|