risk-network 0.0.8b27__py3-none-any.whl → 0.0.9b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/annotations.py +39 -38
- risk/annotations/io.py +8 -6
- risk/log/__init__.py +3 -1
- risk/log/{params.py → parameters.py} +9 -34
- risk/neighborhoods/domains.py +18 -18
- risk/neighborhoods/neighborhoods.py +104 -92
- risk/network/graph/__init__.py +6 -0
- risk/network/{graph.py → graph/network.py} +38 -27
- risk/network/graph/summary.py +239 -0
- risk/network/io.py +3 -3
- risk/network/plot/contour.py +1 -1
- risk/network/plot/labels.py +1 -1
- risk/network/plot/network.py +28 -28
- risk/network/plot/utils/color.py +27 -27
- risk/risk.py +25 -30
- risk/stats/stats.py +13 -13
- {risk_network-0.0.8b27.dist-info → risk_network-0.0.9b2.dist-info}/METADATA +1 -1
- risk_network-0.0.9b2.dist-info/RECORD +39 -0
- risk_network-0.0.8b27.dist-info/RECORD +0 -37
- {risk_network-0.0.8b27.dist-info → risk_network-0.0.9b2.dist-info}/LICENSE +0 -0
- {risk_network-0.0.8b27.dist-info → risk_network-0.0.9b2.dist-info}/WHEEL +0 -0
- {risk_network-0.0.8b27.dist-info → risk_network-0.0.9b2.dist-info}/top_level.txt +0 -0
@@ -171,163 +171,169 @@ def process_neighborhoods(
|
|
171
171
|
|
172
172
|
Args:
|
173
173
|
network (nx.Graph): The network data structure used for imputing and pruning neighbors.
|
174
|
-
neighborhoods (Dict[str, Any]): Dictionary containing '
|
174
|
+
neighborhoods (Dict[str, Any]): Dictionary containing 'significance_matrix', 'significant_binary_significance_matrix', and 'significant_significance_matrix'.
|
175
175
|
impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
|
176
176
|
prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
|
177
177
|
|
178
178
|
Returns:
|
179
|
-
Dict[str, Any]: Processed neighborhoods data, including the updated matrices and
|
179
|
+
Dict[str, Any]: Processed neighborhoods data, including the updated matrices and significance counts.
|
180
180
|
"""
|
181
|
-
|
182
|
-
|
183
|
-
|
181
|
+
significance_matrix = neighborhoods["significance_matrix"]
|
182
|
+
significant_binary_significance_matrix = neighborhoods["significant_binary_significance_matrix"]
|
183
|
+
significant_significance_matrix = neighborhoods["significant_significance_matrix"]
|
184
184
|
logger.debug(f"Imputation depth: {impute_depth}")
|
185
185
|
if impute_depth:
|
186
186
|
(
|
187
|
-
|
188
|
-
|
189
|
-
|
187
|
+
significance_matrix,
|
188
|
+
significant_binary_significance_matrix,
|
189
|
+
significant_significance_matrix,
|
190
190
|
) = _impute_neighbors(
|
191
191
|
network,
|
192
|
-
|
193
|
-
|
192
|
+
significance_matrix,
|
193
|
+
significant_binary_significance_matrix,
|
194
194
|
max_depth=impute_depth,
|
195
195
|
)
|
196
196
|
|
197
197
|
logger.debug(f"Pruning threshold: {prune_threshold}")
|
198
198
|
if prune_threshold:
|
199
199
|
(
|
200
|
-
|
201
|
-
|
202
|
-
|
200
|
+
significance_matrix,
|
201
|
+
significant_binary_significance_matrix,
|
202
|
+
significant_significance_matrix,
|
203
203
|
) = _prune_neighbors(
|
204
204
|
network,
|
205
|
-
|
206
|
-
|
205
|
+
significance_matrix,
|
206
|
+
significant_binary_significance_matrix,
|
207
207
|
distance_threshold=prune_threshold,
|
208
208
|
)
|
209
209
|
|
210
|
-
|
211
|
-
|
210
|
+
neighborhood_significance_counts = np.sum(significant_binary_significance_matrix, axis=0)
|
211
|
+
node_significance_sums = np.sum(significance_matrix, axis=1)
|
212
212
|
return {
|
213
|
-
"
|
214
|
-
"
|
215
|
-
"
|
216
|
-
"
|
217
|
-
"
|
213
|
+
"significance_matrix": significance_matrix,
|
214
|
+
"significant_binary_significance_matrix": significant_binary_significance_matrix,
|
215
|
+
"significant_significance_matrix": significant_significance_matrix,
|
216
|
+
"neighborhood_significance_counts": neighborhood_significance_counts,
|
217
|
+
"node_significance_sums": node_significance_sums,
|
218
218
|
}
|
219
219
|
|
220
220
|
|
221
221
|
def _impute_neighbors(
|
222
222
|
network: nx.Graph,
|
223
|
-
|
224
|
-
|
223
|
+
significance_matrix: np.ndarray,
|
224
|
+
significant_binary_significance_matrix: np.ndarray,
|
225
225
|
max_depth: int = 3,
|
226
226
|
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
227
|
-
"""Impute rows with sums of zero in the
|
227
|
+
"""Impute rows with sums of zero in the significance matrix based on the closest non-zero neighbors in the network graph.
|
228
228
|
|
229
229
|
Args:
|
230
230
|
network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
|
231
|
-
|
232
|
-
|
231
|
+
significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
|
232
|
+
significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
|
233
233
|
max_depth (int): Maximum depth of nodes to traverse for imputing values.
|
234
234
|
|
235
235
|
Returns:
|
236
|
-
|
237
|
-
- np.ndarray: The imputed
|
236
|
+
Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
237
|
+
- np.ndarray: The imputed significance matrix.
|
238
238
|
- np.ndarray: The imputed alpha threshold matrix.
|
239
|
-
- np.ndarray: The significant
|
239
|
+
- np.ndarray: The significant significance matrix with non-significant entries set to zero.
|
240
240
|
"""
|
241
241
|
# Calculate the distance threshold value based on the shortest distances
|
242
|
-
|
243
|
-
network,
|
242
|
+
significance_matrix, significant_binary_significance_matrix = _impute_neighbors_with_similarity(
|
243
|
+
network, significance_matrix, significant_binary_significance_matrix, max_depth=max_depth
|
244
244
|
)
|
245
245
|
# Create a matrix where non-significant entries are set to zero
|
246
|
-
|
247
|
-
|
246
|
+
significant_significance_matrix = np.where(
|
247
|
+
significant_binary_significance_matrix == 1, significance_matrix, 0
|
248
248
|
)
|
249
249
|
|
250
|
-
return
|
250
|
+
return (
|
251
|
+
significance_matrix,
|
252
|
+
significant_binary_significance_matrix,
|
253
|
+
significant_significance_matrix,
|
254
|
+
)
|
251
255
|
|
252
256
|
|
253
257
|
def _impute_neighbors_with_similarity(
|
254
258
|
network: nx.Graph,
|
255
|
-
|
256
|
-
|
259
|
+
significance_matrix: np.ndarray,
|
260
|
+
significant_binary_significance_matrix: np.ndarray,
|
257
261
|
max_depth: int = 3,
|
258
262
|
) -> Tuple[np.ndarray, np.ndarray]:
|
259
|
-
"""Impute non-
|
263
|
+
"""Impute non-significant nodes based on the closest significant neighbors' profiles and their similarity.
|
260
264
|
|
261
265
|
Args:
|
262
266
|
network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
|
263
|
-
|
264
|
-
|
267
|
+
significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
|
268
|
+
significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
|
265
269
|
max_depth (int): Maximum depth of nodes to traverse for imputing values.
|
266
270
|
|
267
271
|
Returns:
|
268
|
-
Tuple[np.ndarray, np.ndarray]:
|
269
|
-
- The imputed
|
272
|
+
Tuple[np.ndarray, np.ndarray]:
|
273
|
+
- The imputed significance matrix.
|
270
274
|
- The imputed alpha threshold matrix.
|
271
275
|
"""
|
272
276
|
depth = 1
|
273
|
-
rows_to_impute = np.where(
|
277
|
+
rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
|
274
278
|
while len(rows_to_impute) and depth <= max_depth:
|
275
|
-
# Iterate over all
|
276
|
-
for row_index in range(
|
277
|
-
if
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
279
|
+
# Iterate over all significant nodes
|
280
|
+
for row_index in range(significant_binary_significance_matrix.shape[0]):
|
281
|
+
if significant_binary_significance_matrix[row_index].sum() != 0:
|
282
|
+
significance_matrix, significant_binary_significance_matrix = (
|
283
|
+
_process_node_imputation(
|
284
|
+
row_index,
|
285
|
+
network,
|
286
|
+
significance_matrix,
|
287
|
+
significant_binary_significance_matrix,
|
288
|
+
depth,
|
289
|
+
)
|
284
290
|
)
|
285
291
|
|
286
292
|
# Update rows to impute for the next iteration
|
287
|
-
rows_to_impute = np.where(
|
293
|
+
rows_to_impute = np.where(significant_binary_significance_matrix.sum(axis=1) == 0)[0]
|
288
294
|
depth += 1
|
289
295
|
|
290
|
-
return
|
296
|
+
return significance_matrix, significant_binary_significance_matrix
|
291
297
|
|
292
298
|
|
293
299
|
def _process_node_imputation(
|
294
300
|
row_index: int,
|
295
301
|
network: nx.Graph,
|
296
|
-
|
297
|
-
|
302
|
+
significance_matrix: np.ndarray,
|
303
|
+
significant_binary_significance_matrix: np.ndarray,
|
298
304
|
depth: int,
|
299
305
|
) -> Tuple[np.ndarray, np.ndarray]:
|
300
|
-
"""Process the imputation for a single node based on its
|
306
|
+
"""Process the imputation for a single node based on its significant neighbors.
|
301
307
|
|
302
308
|
Args:
|
303
|
-
row_index (int): The index of the
|
309
|
+
row_index (int): The index of the significant node being processed.
|
304
310
|
network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
|
305
|
-
|
306
|
-
|
311
|
+
significance_matrix (np.ndarray): The significance matrix with rows to be imputed.
|
312
|
+
significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
|
307
313
|
depth (int): Current depth for traversal.
|
308
314
|
|
309
315
|
Returns:
|
310
|
-
Tuple[np.ndarray, np.ndarray]: The modified
|
316
|
+
Tuple[np.ndarray, np.ndarray]: The modified significance matrix and binary threshold matrix.
|
311
317
|
"""
|
312
318
|
# Check neighbors at the current depth
|
313
319
|
neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
|
314
|
-
# Filter annotated neighbors (already
|
320
|
+
# Filter annotated neighbors (already significant)
|
315
321
|
annotated_neighbors = [
|
316
322
|
n
|
317
323
|
for n in neighbors
|
318
324
|
if n != row_index
|
319
|
-
and
|
320
|
-
and
|
325
|
+
and significant_binary_significance_matrix[n].sum() != 0
|
326
|
+
and significance_matrix[n].sum() != 0
|
321
327
|
]
|
322
|
-
# Filter non-
|
328
|
+
# Filter non-significant neighbors
|
323
329
|
valid_neighbors = [
|
324
330
|
n
|
325
331
|
for n in neighbors
|
326
332
|
if n != row_index
|
327
|
-
and
|
328
|
-
and
|
333
|
+
and significant_binary_significance_matrix[n].sum() == 0
|
334
|
+
and significance_matrix[n].sum() == 0
|
329
335
|
]
|
330
|
-
# If there are valid non-
|
336
|
+
# If there are valid non-significant neighbors
|
331
337
|
if valid_neighbors and annotated_neighbors:
|
332
338
|
# Calculate distances to annotated neighbors
|
333
339
|
distances_to_annotated = [
|
@@ -338,7 +344,7 @@ def _process_node_imputation(
|
|
338
344
|
iqr = q3 - q1
|
339
345
|
lower_bound = q1 - 1.5 * iqr
|
340
346
|
upper_bound = q3 + 1.5 * iqr
|
341
|
-
# Filter valid non-
|
347
|
+
# Filter valid non-significant neighbors that fall within the IQR bounds
|
342
348
|
valid_neighbors_within_iqr = [
|
343
349
|
n
|
344
350
|
for n in valid_neighbors
|
@@ -352,8 +358,8 @@ def _process_node_imputation(
|
|
352
358
|
def sum_pairwise_cosine_similarities(neighbor):
|
353
359
|
return sum(
|
354
360
|
cosine_similarity(
|
355
|
-
|
356
|
-
|
361
|
+
significance_matrix[neighbor].reshape(1, -1),
|
362
|
+
significance_matrix[other_neighbor].reshape(1, -1),
|
357
363
|
)[0][0]
|
358
364
|
for other_neighbor in valid_neighbors_within_iqr
|
359
365
|
if other_neighbor != neighbor
|
@@ -365,43 +371,45 @@ def _process_node_imputation(
|
|
365
371
|
else:
|
366
372
|
most_similar_neighbor = valid_neighbors_within_iqr[0]
|
367
373
|
|
368
|
-
# Impute the most similar non-
|
369
|
-
|
374
|
+
# Impute the most similar non-significant neighbor with the significant node's data, scaled by depth
|
375
|
+
significance_matrix[most_similar_neighbor] = significance_matrix[row_index] / np.sqrt(
|
370
376
|
depth + 1
|
371
377
|
)
|
372
|
-
|
373
|
-
|
378
|
+
significant_binary_significance_matrix[most_similar_neighbor] = (
|
379
|
+
significant_binary_significance_matrix[row_index]
|
374
380
|
)
|
375
381
|
|
376
|
-
return
|
382
|
+
return significance_matrix, significant_binary_significance_matrix
|
377
383
|
|
378
384
|
|
379
385
|
def _prune_neighbors(
|
380
386
|
network: nx.Graph,
|
381
|
-
|
382
|
-
|
387
|
+
significance_matrix: np.ndarray,
|
388
|
+
significant_binary_significance_matrix: np.ndarray,
|
383
389
|
distance_threshold: float = 0.9,
|
384
390
|
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
385
391
|
"""Remove outliers based on their rank for edge lengths.
|
386
392
|
|
387
393
|
Args:
|
388
394
|
network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
|
389
|
-
|
390
|
-
|
395
|
+
significance_matrix (np.ndarray): The significance matrix.
|
396
|
+
significant_binary_significance_matrix (np.ndarray): The alpha threshold matrix.
|
391
397
|
distance_threshold (float): Rank threshold (0 to 1) to determine outliers.
|
392
398
|
|
393
399
|
Returns:
|
394
|
-
|
395
|
-
- np.ndarray: The updated
|
400
|
+
Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
401
|
+
- np.ndarray: The updated significance matrix with outliers set to zero.
|
396
402
|
- np.ndarray: The updated alpha threshold matrix with outliers set to zero.
|
397
|
-
- np.ndarray: The significant
|
403
|
+
- np.ndarray: The significant significance matrix, where non-significant entries are set to zero.
|
398
404
|
"""
|
399
|
-
# Identify indices with non-zero rows in the binary
|
400
|
-
non_zero_indices = np.where(
|
405
|
+
# Identify indices with non-zero rows in the binary significance matrix
|
406
|
+
non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
|
401
407
|
median_distances = []
|
402
408
|
for node in non_zero_indices:
|
403
409
|
neighbors = [
|
404
|
-
n
|
410
|
+
n
|
411
|
+
for n in network.neighbors(node)
|
412
|
+
if significant_binary_significance_matrix[n].sum() != 0
|
405
413
|
]
|
406
414
|
if neighbors:
|
407
415
|
median_distance = np.median(
|
@@ -416,22 +424,26 @@ def _prune_neighbors(
|
|
416
424
|
neighbors = [
|
417
425
|
n
|
418
426
|
for n in network.neighbors(row_index)
|
419
|
-
if
|
427
|
+
if significant_binary_significance_matrix[n].sum() != 0
|
420
428
|
]
|
421
429
|
if neighbors:
|
422
430
|
median_distance = np.median(
|
423
431
|
[_get_euclidean_distance(row_index, n, network) for n in neighbors]
|
424
432
|
)
|
425
433
|
if median_distance >= distance_threshold_value:
|
426
|
-
|
427
|
-
|
434
|
+
significance_matrix[row_index] = 0
|
435
|
+
significant_binary_significance_matrix[row_index] = 0
|
428
436
|
|
429
437
|
# Create a matrix where non-significant entries are set to zero
|
430
|
-
|
431
|
-
|
438
|
+
significant_significance_matrix = np.where(
|
439
|
+
significant_binary_significance_matrix == 1, significance_matrix, 0
|
432
440
|
)
|
433
441
|
|
434
|
-
return
|
442
|
+
return (
|
443
|
+
significance_matrix,
|
444
|
+
significant_binary_significance_matrix,
|
445
|
+
significant_significance_matrix,
|
446
|
+
)
|
435
447
|
|
436
448
|
|
437
449
|
def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
"""
|
2
|
-
risk/network/graph
|
3
|
-
|
2
|
+
risk/network/graph/network
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
from collections import defaultdict
|
@@ -10,35 +10,42 @@ import networkx as nx
|
|
10
10
|
import numpy as np
|
11
11
|
import pandas as pd
|
12
12
|
|
13
|
+
from risk.network.graph.summary import Summary
|
14
|
+
|
13
15
|
|
14
16
|
class NetworkGraph:
|
15
17
|
"""A class to represent a network graph and process its nodes and edges.
|
16
18
|
|
17
19
|
The NetworkGraph class provides functionality to handle and manipulate a network graph,
|
18
|
-
including managing domains, annotations, and node
|
20
|
+
including managing domains, annotations, and node significance data. It also includes methods
|
19
21
|
for transforming and mapping graph coordinates, as well as generating colors based on node
|
20
|
-
|
22
|
+
significance.
|
21
23
|
"""
|
22
24
|
|
23
25
|
def __init__(
|
24
26
|
self,
|
25
27
|
network: nx.Graph,
|
28
|
+
annotations: Dict[str, Any],
|
29
|
+
neighborhoods: Dict[str, Any],
|
26
30
|
top_annotations: pd.DataFrame,
|
27
31
|
domains: pd.DataFrame,
|
28
32
|
trimmed_domains: pd.DataFrame,
|
29
33
|
node_label_to_node_id_map: Dict[str, Any],
|
30
|
-
|
34
|
+
node_significance_sums: np.ndarray,
|
31
35
|
):
|
32
36
|
"""Initialize the NetworkGraph object.
|
33
37
|
|
34
38
|
Args:
|
35
39
|
network (nx.Graph): The network graph.
|
40
|
+
annotations (Dict[str, Any]): The annotations associated with the network.
|
41
|
+
neighborhoods (Dict[str, Any]): Neighborhood significance data.
|
36
42
|
top_annotations (pd.DataFrame): DataFrame containing annotations data for the network nodes.
|
37
43
|
domains (pd.DataFrame): DataFrame containing domain data for the network nodes.
|
38
44
|
trimmed_domains (pd.DataFrame): DataFrame containing trimmed domain data for the network nodes.
|
39
45
|
node_label_to_node_id_map (Dict[str, Any]): A dictionary mapping node labels to their corresponding IDs.
|
40
|
-
|
46
|
+
node_significance_sums (np.ndarray): Array containing the significant sums for the nodes.
|
41
47
|
"""
|
48
|
+
# Initialize self.network downstream of the other attributes
|
42
49
|
self.top_annotations = top_annotations
|
43
50
|
self.domain_id_to_node_ids_map = self._create_domain_id_to_node_ids_map(domains)
|
44
51
|
self.domains = domains
|
@@ -49,21 +56,25 @@ class NetworkGraph:
|
|
49
56
|
trimmed_domains
|
50
57
|
)
|
51
58
|
self.trimmed_domains = trimmed_domains
|
52
|
-
self.
|
53
|
-
self.
|
54
|
-
self.
|
59
|
+
self.node_significance_sums = node_significance_sums
|
60
|
+
self.node_id_to_domain_ids_and_significance_map = (
|
61
|
+
self._create_node_id_to_domain_ids_and_significances(domains)
|
55
62
|
)
|
56
63
|
self.node_id_to_node_label_map = {v: k for k, v in node_label_to_node_id_map.items()}
|
57
|
-
self.
|
58
|
-
zip(node_label_to_node_id_map.keys(),
|
64
|
+
self.node_label_to_significance_map = dict(
|
65
|
+
zip(node_label_to_node_id_map.keys(), node_significance_sums)
|
59
66
|
)
|
60
67
|
self.node_label_to_node_id_map = node_label_to_node_id_map
|
68
|
+
|
61
69
|
# NOTE: Below this point, instance attributes (i.e., self) will be used!
|
62
70
|
self.domain_id_to_node_labels_map = self._create_domain_id_to_node_labels_map()
|
63
71
|
# Unfold the network's 3D coordinates to 2D and extract node coordinates
|
64
72
|
self.network = _unfold_sphere_to_plane(network)
|
65
73
|
self.node_coordinates = _extract_node_coordinates(self.network)
|
66
74
|
|
75
|
+
# NOTE: Only after the above attributes are initialized, we can create the summary
|
76
|
+
self.summary = Summary(annotations, neighborhoods, self)
|
77
|
+
|
67
78
|
@staticmethod
|
68
79
|
def _create_domain_id_to_node_ids_map(domains: pd.DataFrame) -> Dict[int, Any]:
|
69
80
|
"""Create a mapping from domains to the list of node IDs belonging to each domain.
|
@@ -103,25 +114,25 @@ class NetworkGraph:
|
|
103
114
|
def _create_domain_id_to_domain_info_map(
|
104
115
|
trimmed_domains: pd.DataFrame,
|
105
116
|
) -> Dict[int, Dict[str, Any]]:
|
106
|
-
"""Create a mapping from domain IDs to their corresponding full description and
|
117
|
+
"""Create a mapping from domain IDs to their corresponding full description and significance score.
|
107
118
|
|
108
119
|
Args:
|
109
|
-
trimmed_domains (pd.DataFrame): DataFrame containing domain IDs, full descriptions, and
|
120
|
+
trimmed_domains (pd.DataFrame): DataFrame containing domain IDs, full descriptions, and significance scores.
|
110
121
|
|
111
122
|
Returns:
|
112
|
-
Dict[int, Dict[str, Any]]: A dictionary mapping domain IDs (int) to a dictionary with 'full_descriptions' and '
|
123
|
+
Dict[int, Dict[str, Any]]: A dictionary mapping domain IDs (int) to a dictionary with 'full_descriptions' and 'significance_scores'.
|
113
124
|
"""
|
114
125
|
return {
|
115
126
|
int(id_): {
|
116
127
|
"full_descriptions": trimmed_domains.at[id_, "full_descriptions"],
|
117
|
-
"
|
128
|
+
"significance_scores": trimmed_domains.at[id_, "significance_scores"],
|
118
129
|
}
|
119
130
|
for id_ in trimmed_domains.index
|
120
131
|
}
|
121
132
|
|
122
133
|
@staticmethod
|
123
|
-
def
|
124
|
-
"""Creates a dictionary mapping each node ID to its corresponding domain IDs and
|
134
|
+
def _create_node_id_to_domain_ids_and_significances(domains: pd.DataFrame) -> Dict[int, Dict]:
|
135
|
+
"""Creates a dictionary mapping each node ID to its corresponding domain IDs and significance values.
|
125
136
|
|
126
137
|
Args:
|
127
138
|
domains (pd.DataFrame): A DataFrame containing domain information for each node. Assumes the last
|
@@ -129,28 +140,28 @@ class NetworkGraph:
|
|
129
140
|
|
130
141
|
Returns:
|
131
142
|
Dict[int, Dict]: A dictionary where the key is the node ID (index of the DataFrame), and the value is another dictionary
|
132
|
-
with 'domain' (a list of domain IDs with non-zero
|
133
|
-
(a dict of domain IDs and their corresponding
|
143
|
+
with 'domain' (a list of domain IDs with non-zero significance) and 'significance'
|
144
|
+
(a dict of domain IDs and their corresponding significance values).
|
134
145
|
"""
|
135
146
|
# Initialize an empty dictionary to store the result
|
136
|
-
|
147
|
+
node_id_to_domain_ids_and_significances = {}
|
137
148
|
# Get the list of domain columns (excluding 'all domains' and 'primary domain')
|
138
149
|
domain_columns = domains.columns[
|
139
150
|
:-2
|
140
151
|
] # The last two columns are 'all domains' and 'primary domain'
|
141
152
|
# Iterate over each row in the dataframe
|
142
153
|
for idx, row in domains.iterrows():
|
143
|
-
# Get the domains (column names) where the
|
154
|
+
# Get the domains (column names) where the significance score is greater than 0
|
144
155
|
all_domains = domain_columns[row[domain_columns] > 0].tolist()
|
145
|
-
# Get the
|
146
|
-
|
156
|
+
# Get the significance values for those domains
|
157
|
+
significance_values = row[all_domains].to_dict()
|
147
158
|
# Store the result in the dictionary with index as the key
|
148
|
-
|
149
|
-
"domains": all_domains, # The column names where
|
150
|
-
"
|
159
|
+
node_id_to_domain_ids_and_significances[idx] = {
|
160
|
+
"domains": all_domains, # The column names where significance > 0
|
161
|
+
"significances": significance_values, # The actual significance values for those columns
|
151
162
|
}
|
152
163
|
|
153
|
-
return
|
164
|
+
return node_id_to_domain_ids_and_significances
|
154
165
|
|
155
166
|
def _create_domain_id_to_node_labels_map(self) -> Dict[int, List[str]]:
|
156
167
|
"""Create a map from domain IDs to node labels.
|