napistu 0.4.5__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/network/constants.py +33 -3
- napistu/network/neighborhoods.py +430 -230
- napistu/network/ng_utils.py +7 -7
- napistu/network/paths.py +28 -18
- {napistu-0.4.5.dist-info → napistu-0.4.7.dist-info}/METADATA +1 -1
- {napistu-0.4.5.dist-info → napistu-0.4.7.dist-info}/RECORD +13 -13
- tests/test_network_neighborhoods.py +120 -21
- tests/test_network_paths.py +40 -16
- tests/test_network_precompute.py +25 -10
- {napistu-0.4.5.dist-info → napistu-0.4.7.dist-info}/WHEEL +0 -0
- {napistu-0.4.5.dist-info → napistu-0.4.7.dist-info}/entry_points.txt +0 -0
- {napistu-0.4.5.dist-info → napistu-0.4.7.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.4.5.dist-info → napistu-0.4.7.dist-info}/top_level.txt +0 -0
napistu/network/neighborhoods.py
CHANGED
@@ -18,13 +18,26 @@ from napistu import utils
|
|
18
18
|
from napistu.network import ng_utils
|
19
19
|
from napistu.network import paths
|
20
20
|
|
21
|
-
from napistu.constants import
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
21
|
+
from napistu.constants import (
|
22
|
+
MINI_SBO_NAME_TO_POLARITY,
|
23
|
+
MINI_SBO_TO_NAME,
|
24
|
+
NAPISTU_EDGELIST,
|
25
|
+
ONTOLOGIES,
|
26
|
+
SBML_DFS,
|
27
|
+
)
|
28
|
+
|
29
|
+
from napistu.network.constants import (
|
30
|
+
DISTANCES,
|
31
|
+
GRAPH_RELATIONSHIPS,
|
32
|
+
GRAPH_WIRING_APPROACHES,
|
33
|
+
NAPISTU_GRAPH_EDGES,
|
34
|
+
NAPISTU_GRAPH_NODE_TYPES,
|
35
|
+
NAPISTU_GRAPH_VERTICES,
|
36
|
+
NEIGHBORHOOD_DICT_KEYS,
|
37
|
+
NEIGHBORHOOD_NETWORK_TYPES,
|
38
|
+
NET_POLARITY,
|
39
|
+
VALID_NEIGHBORHOOD_NETWORK_TYPES,
|
40
|
+
)
|
28
41
|
|
29
42
|
logger = logging.getLogger(__name__)
|
30
43
|
|
@@ -34,8 +47,9 @@ def find_and_prune_neighborhoods(
|
|
34
47
|
napistu_graph: ig.Graph,
|
35
48
|
compartmentalized_species: str | list[str],
|
36
49
|
precomputed_distances: pd.DataFrame | None = None,
|
50
|
+
min_pw_size: int = 3,
|
37
51
|
source_total_counts: pd.Series | None = None,
|
38
|
-
network_type: str = NEIGHBORHOOD_NETWORK_TYPES.
|
52
|
+
network_type: str = NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
|
39
53
|
order: int = 3,
|
40
54
|
verbose: bool = True,
|
41
55
|
top_n: int = 10,
|
@@ -55,6 +69,8 @@ def find_and_prune_neighborhoods(
|
|
55
69
|
Compartmentalized species IDs for neighborhood centers
|
56
70
|
precomputed_distances : pd.DataFrame or None
|
57
71
|
If provided, an edgelist of origin->destination path weights and lengths
|
72
|
+
min_pw_size: int
|
73
|
+
the minimum size of a pathway to be considered
|
58
74
|
source_total_counts: pd.Series | None
|
59
75
|
Optional, A series of the total counts of each source. As produced by
|
60
76
|
source.get_source_total_counts()
|
@@ -91,6 +107,16 @@ def find_and_prune_neighborhoods(
|
|
91
107
|
if not isinstance(compartmentalized_species, list):
|
92
108
|
raise TypeError("compartmentalized_species must be a list")
|
93
109
|
|
110
|
+
invalid_cspecies = [
|
111
|
+
x
|
112
|
+
for x in compartmentalized_species
|
113
|
+
if x not in sbml_dfs.compartmentalized_species.index
|
114
|
+
]
|
115
|
+
if len(invalid_cspecies) > 0:
|
116
|
+
raise ValueError(
|
117
|
+
f"compartmentalized_species contains invalid species: {invalid_cspecies}"
|
118
|
+
)
|
119
|
+
|
94
120
|
if isinstance(precomputed_distances, pd.DataFrame):
|
95
121
|
logger.info("Pre-computed neighbors based on precomputed_distances")
|
96
122
|
|
@@ -105,18 +131,19 @@ def find_and_prune_neighborhoods(
|
|
105
131
|
else:
|
106
132
|
precomputed_neighbors = None
|
107
133
|
|
108
|
-
|
134
|
+
neighborhood_dicts = find_neighborhoods(
|
109
135
|
sbml_dfs=sbml_dfs,
|
110
136
|
napistu_graph=napistu_graph,
|
111
137
|
compartmentalized_species=compartmentalized_species,
|
112
138
|
network_type=network_type,
|
113
139
|
order=order,
|
114
|
-
verbose=verbose,
|
115
140
|
precomputed_neighbors=precomputed_neighbors,
|
141
|
+
min_pw_size=min_pw_size,
|
116
142
|
source_total_counts=source_total_counts,
|
143
|
+
verbose=verbose,
|
117
144
|
)
|
118
145
|
|
119
|
-
pruned_neighborhoods = prune_neighborhoods(
|
146
|
+
pruned_neighborhoods = prune_neighborhoods(neighborhood_dicts, top_n=top_n)
|
120
147
|
|
121
148
|
return pruned_neighborhoods
|
122
149
|
|
@@ -164,7 +191,7 @@ def load_neighborhoods(
|
|
164
191
|
-------
|
165
192
|
all_neighborhoods_df: pd.DataFrame
|
166
193
|
A table containing all species in each query s_ids neighborhood
|
167
|
-
|
194
|
+
neighborhood_dicts: dict
|
168
195
|
Outputs from find_and_prune_neighborhoods for each s_id
|
169
196
|
|
170
197
|
"""
|
@@ -178,16 +205,16 @@ def load_neighborhoods(
|
|
178
205
|
neighborhood_paths = [vertices_path, networks_path]
|
179
206
|
|
180
207
|
if all([os.path.isfile(x) for x in neighborhood_paths]) and overwrite is False:
|
181
|
-
|
208
|
+
logger.info(f"loading existing neighborhoods for {neighborhood_prefix}")
|
182
209
|
|
183
210
|
all_neighborhoods_df = pd.read_csv(vertices_path, sep="\t")
|
184
211
|
with open(networks_path, "rb") as in_file:
|
185
|
-
|
212
|
+
neighborhood_dicts = pickle.load(in_file)
|
186
213
|
|
187
214
|
else:
|
188
|
-
|
215
|
+
logger.info(f"creating neighborhoods based on {neighborhood_prefix}")
|
189
216
|
|
190
|
-
all_neighborhoods_df,
|
217
|
+
all_neighborhoods_df, neighborhood_dicts = create_neighborhoods(
|
191
218
|
s_ids=s_ids,
|
192
219
|
sbml_dfs=sbml_dfs,
|
193
220
|
napistu_graph=napistu_graph,
|
@@ -202,9 +229,9 @@ def load_neighborhoods(
|
|
202
229
|
|
203
230
|
# pickle neighborhoods
|
204
231
|
with open(networks_path, "wb") as fh:
|
205
|
-
pickle.dump(
|
232
|
+
pickle.dump(neighborhood_dicts, fh)
|
206
233
|
|
207
|
-
return all_neighborhoods_df,
|
234
|
+
return all_neighborhoods_df, neighborhood_dicts
|
208
235
|
|
209
236
|
|
210
237
|
def create_neighborhoods(
|
@@ -242,7 +269,7 @@ def create_neighborhoods(
|
|
242
269
|
-------
|
243
270
|
all_neighborhoods_df: pd.DataFrame
|
244
271
|
A table containing all species in each query s_ids neighborhood
|
245
|
-
|
272
|
+
neighborhood_dicts: dict
|
246
273
|
Outputs from find_and_prune_neighborhoods for each s_id
|
247
274
|
"""
|
248
275
|
|
@@ -263,13 +290,13 @@ def create_neighborhoods(
|
|
263
290
|
raise TypeError(f"top_n was a {type(top_n)} and must be an int")
|
264
291
|
|
265
292
|
neighborhoods_list = list()
|
266
|
-
|
293
|
+
neighborhood_dicts = dict()
|
267
294
|
for s_id in s_ids:
|
268
295
|
query_sc_species = ng_utils.compartmentalize_species(sbml_dfs, s_id)
|
269
296
|
|
270
297
|
compartmentalized_species = query_sc_species[SBML_DFS.SC_ID].tolist()
|
271
298
|
|
272
|
-
|
299
|
+
neighborhood_dicts = find_and_prune_neighborhoods(
|
273
300
|
sbml_dfs,
|
274
301
|
napistu_graph,
|
275
302
|
compartmentalized_species=compartmentalized_species,
|
@@ -283,23 +310,25 @@ def create_neighborhoods(
|
|
283
310
|
|
284
311
|
neighborhood_entities = pd.concat(
|
285
312
|
[
|
286
|
-
|
287
|
-
|
313
|
+
neighborhood_dicts[sc_id][NEIGHBORHOOD_DICT_KEYS.VERTICES].assign(
|
314
|
+
focal_sc_id=sc_id
|
315
|
+
)
|
316
|
+
for sc_id in neighborhood_dicts.keys()
|
288
317
|
]
|
289
318
|
).assign(focal_s_id=s_id)
|
290
319
|
|
291
320
|
neighborhood_species = neighborhood_entities.merge(
|
292
321
|
sbml_dfs.compartmentalized_species[SBML_DFS.S_ID],
|
293
|
-
left_on=
|
322
|
+
left_on=NAPISTU_GRAPH_VERTICES.NAME,
|
294
323
|
right_index=True,
|
295
324
|
)
|
296
325
|
|
297
326
|
neighborhoods_list.append(neighborhood_species)
|
298
|
-
|
327
|
+
neighborhood_dicts[s_id] = neighborhood_dicts
|
299
328
|
|
300
329
|
all_neighborhoods_df = pd.concat(neighborhoods_list).reset_index(drop=True)
|
301
330
|
|
302
|
-
return all_neighborhoods_df,
|
331
|
+
return all_neighborhoods_df, neighborhood_dicts
|
303
332
|
|
304
333
|
|
305
334
|
def create_neighborhood_prefix(network_type: str, order: int, top_n: int) -> str:
|
@@ -321,6 +350,7 @@ def create_neighborhood_prefix(network_type: str, order: int, top_n: int) -> str
|
|
321
350
|
def load_neighborhoods_by_partition(
|
322
351
|
selected_partition: int,
|
323
352
|
neighborhood_outdir: str,
|
353
|
+
cache_dir: str,
|
324
354
|
wiring_approach: str = GRAPH_WIRING_APPROACHES.REGULATORY,
|
325
355
|
) -> None:
|
326
356
|
"""
|
@@ -343,19 +373,18 @@ def load_neighborhoods_by_partition(
|
|
343
373
|
|
344
374
|
"""
|
345
375
|
|
346
|
-
consensus_root = "/group/cpr/consensus"
|
347
|
-
consensus_name = "reactome"
|
348
|
-
consensus_outdir = os.path.join(consensus_root, consensus_name)
|
349
|
-
|
350
376
|
if not os.path.isdir(neighborhood_outdir):
|
351
377
|
raise FileNotFoundError(f"{neighborhood_outdir} does not exist")
|
352
378
|
|
379
|
+
if not os.path.isdir(cache_dir):
|
380
|
+
raise FileNotFoundError(f"{cache_dir} does not exist")
|
381
|
+
|
353
382
|
partition_output = os.path.join(
|
354
383
|
neighborhood_outdir, f"partition_{selected_partition}"
|
355
384
|
)
|
356
385
|
# initialize an empty output
|
357
386
|
if os.path.isdir(partition_output):
|
358
|
-
|
387
|
+
logger.warning(f"removing existing directory: {partition_output}")
|
359
388
|
shutil.rmtree(partition_output)
|
360
389
|
os.makedirs(partition_output)
|
361
390
|
|
@@ -369,13 +398,13 @@ def load_neighborhoods_by_partition(
|
|
369
398
|
if parition_sids_df.shape[0] == 0:
|
370
399
|
raise ValueError(f"No s_ids associated with partition {selected_partition}")
|
371
400
|
|
372
|
-
parition_sids = parition_sids_df[
|
401
|
+
parition_sids = parition_sids_df[SBML_DFS.S_ID].tolist()
|
373
402
|
|
374
403
|
# read pathway and network data
|
375
404
|
|
376
405
|
# read model containing Calico curations. this is primarily to support search programs
|
377
406
|
# to not use these switch to refined.pkl
|
378
|
-
refined_model_pkl_path = os.path.join(
|
407
|
+
refined_model_pkl_path = os.path.join(cache_dir, "curated.pkl")
|
379
408
|
with open(refined_model_pkl_path, "rb") as in_file:
|
380
409
|
refined_model = pickle.load(in_file)
|
381
410
|
refined_model.validate()
|
@@ -383,12 +412,12 @@ def load_neighborhoods_by_partition(
|
|
383
412
|
# load the graph
|
384
413
|
napistu_graph = ng_utils.read_network_pkl(
|
385
414
|
model_prefix="curated",
|
386
|
-
network_dir=
|
415
|
+
network_dir=cache_dir,
|
387
416
|
directed=True,
|
388
417
|
wiring_approach=wiring_approach,
|
389
418
|
)
|
390
419
|
|
391
|
-
|
420
|
+
_, _ = load_neighborhoods(
|
392
421
|
s_ids=parition_sids,
|
393
422
|
sbml_dfs=refined_model,
|
394
423
|
napistu_graph=napistu_graph,
|
@@ -429,7 +458,7 @@ def read_paritioned_neighborhoods(
|
|
429
458
|
-------
|
430
459
|
all_neighborhoods_df: pd.DataFrame
|
431
460
|
A table containing all species in each query s_ids neighborhood
|
432
|
-
|
461
|
+
neighborhood_dicts: dict
|
433
462
|
Outputs from find_and_prune_neighborhoods for each s_id
|
434
463
|
|
435
464
|
"""
|
@@ -494,7 +523,7 @@ def read_paritioned_neighborhoods(
|
|
494
523
|
|
495
524
|
# combine all partitions' dfs and dicts
|
496
525
|
all_neighborhoods_df = pd.concat(neighborhood_paths_list).reset_index(drop=True)
|
497
|
-
|
526
|
+
neighborhood_dicts = dict(ChainMap(*path_dict_list))
|
498
527
|
|
499
528
|
# TO DO - remove s_id duplication (these are present in the vertices table in the partition outputs)
|
500
529
|
if not all(all_neighborhoods_df["s_id_x"] == all_neighborhoods_df["s_id_y"]):
|
@@ -503,14 +532,14 @@ def read_paritioned_neighborhoods(
|
|
503
532
|
{"s_id_x": "s_id"}, axis=1
|
504
533
|
)
|
505
534
|
|
506
|
-
return all_neighborhoods_df,
|
535
|
+
return all_neighborhoods_df, neighborhood_dicts
|
507
536
|
|
508
537
|
|
509
538
|
def find_neighborhoods(
|
510
539
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
511
540
|
napistu_graph: ig.Graph,
|
512
541
|
compartmentalized_species: list[str],
|
513
|
-
network_type: str =
|
542
|
+
network_type: str = NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
|
514
543
|
order: int = 3,
|
515
544
|
min_pw_size: int = 3,
|
516
545
|
precomputed_neighbors: pd.DataFrame | None = None,
|
@@ -544,7 +573,7 @@ def find_neighborhoods(
|
|
544
573
|
the minimum size of a pathway to be considered
|
545
574
|
source_total_counts: pd.Series | None
|
546
575
|
Optional, A series of the total counts of each source. As produced by
|
547
|
-
source.get_source_total_counts()
|
576
|
+
source.get_source_total_counts()
|
548
577
|
verbose: bool
|
549
578
|
Extra reporting
|
550
579
|
|
@@ -557,15 +586,24 @@ def find_neighborhoods(
|
|
557
586
|
if not isinstance(network_type, str):
|
558
587
|
raise TypeError(f"network_type was a {type(network_type)} and must be a str")
|
559
588
|
|
560
|
-
|
561
|
-
if network_type not in valid_network_types:
|
589
|
+
if network_type not in VALID_NEIGHBORHOOD_NETWORK_TYPES:
|
562
590
|
raise ValueError(
|
563
|
-
f"network_type must be one of {', '.join(
|
591
|
+
f"network_type must be one of {', '.join(VALID_NEIGHBORHOOD_NETWORK_TYPES)}"
|
564
592
|
)
|
565
593
|
|
566
594
|
if not isinstance(order, int):
|
567
595
|
raise TypeError(f"order was a {type(order)} and must be an int")
|
568
596
|
|
597
|
+
invalid_cspecies = [
|
598
|
+
x
|
599
|
+
for x in compartmentalized_species
|
600
|
+
if x not in sbml_dfs.compartmentalized_species.index
|
601
|
+
]
|
602
|
+
if len(invalid_cspecies) > 0:
|
603
|
+
raise ValueError(
|
604
|
+
f"compartmentalized_species contains invalid species: {invalid_cspecies}"
|
605
|
+
)
|
606
|
+
|
569
607
|
# create a table which includes cspecies and reaction nearby each of the
|
570
608
|
# focal compartmentalized_speecies
|
571
609
|
neighborhood_df = _build_raw_neighborhood_df(
|
@@ -634,8 +672,8 @@ def create_neighborhood_dict_entry(
|
|
634
672
|
nodes in the neighborhood
|
635
673
|
edges: pd.DataFrame
|
636
674
|
edges in the neighborhood
|
637
|
-
|
638
|
-
models that
|
675
|
+
reaction_sources: pd.DataFrame
|
676
|
+
models that reactions were derived from
|
639
677
|
neighborhood_path_entities: dict
|
640
678
|
upstream and downstream dicts representing entities in paths.
|
641
679
|
If the keys are to be included in a neighborhood, the
|
@@ -643,12 +681,12 @@ def create_neighborhood_dict_entry(
|
|
643
681
|
focal node.
|
644
682
|
"""
|
645
683
|
|
646
|
-
one_neighborhood_df = neighborhood_df[neighborhood_df[
|
684
|
+
one_neighborhood_df = neighborhood_df[neighborhood_df[SBML_DFS.SC_ID] == sc_id]
|
647
685
|
|
648
686
|
if verbose:
|
649
687
|
_create_neighborhood_dict_entry_logging(sc_id, one_neighborhood_df, sbml_dfs)
|
650
688
|
|
651
|
-
if not one_neighborhood_df[
|
689
|
+
if not one_neighborhood_df[NAPISTU_GRAPH_VERTICES.NAME].eq(sc_id).any():
|
652
690
|
raise ValueError(
|
653
691
|
f"The focal node sc_id = {sc_id} was not in 'one_neighborhood_df'.\
|
654
692
|
By convention it should be part of its neighborhood"
|
@@ -664,95 +702,61 @@ def create_neighborhood_dict_entry(
|
|
664
702
|
|
665
703
|
# add edge polarity: whether edges are activating, inhibiting or unknown
|
666
704
|
if edges.shape[0] > 0:
|
667
|
-
edges[
|
668
|
-
edges[
|
705
|
+
edges[NET_POLARITY.LINK_POLARITY] = (
|
706
|
+
edges[SBML_DFS.SBO_TERM]
|
707
|
+
.map(MINI_SBO_TO_NAME)
|
708
|
+
.map(MINI_SBO_NAME_TO_POLARITY)
|
669
709
|
)
|
670
710
|
|
671
711
|
try:
|
672
|
-
|
673
|
-
vertices.rename(columns={
|
712
|
+
reaction_sources = ng_utils.get_minimal_sources_edges(
|
713
|
+
vertices.rename(columns={NAPISTU_GRAPH_VERTICES.NAME: "node"}),
|
674
714
|
sbml_dfs,
|
675
715
|
min_pw_size=min_pw_size,
|
676
716
|
# optional, counts of sources across the whole model
|
677
717
|
source_total_counts=source_total_counts,
|
678
718
|
)
|
679
719
|
except Exception:
|
680
|
-
|
720
|
+
logger.warning(f"Could not get reaction sources for {sc_id}; returning None")
|
721
|
+
reaction_sources = None
|
681
722
|
|
682
723
|
# to add weights to the network solve the shortest path problem
|
683
724
|
# from the focal node to each neighbor
|
684
725
|
# solve this problem separately whether a given neighbor is an
|
685
726
|
# ancestor or descendant
|
686
727
|
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
# igraph throws warnings for each pair of unconnected species
|
697
|
-
warnings.simplefilter("ignore")
|
698
|
-
|
699
|
-
neighborhood_paths = neighborhood_graph.get_shortest_paths(
|
700
|
-
# focal node
|
701
|
-
v=sc_id,
|
702
|
-
to=descendants_list,
|
703
|
-
weights="weights",
|
704
|
-
mode="out",
|
705
|
-
output="epath",
|
706
|
-
)
|
707
|
-
|
708
|
-
downstream_path_attrs, downstream_entity_dict = _calculate_path_attrs(
|
709
|
-
neighborhood_paths, edges, vertices=descendants_list, weight_var="weights"
|
710
|
-
)
|
711
|
-
downstream_path_attrs = downstream_path_attrs.assign(node_orientation="downstream")
|
712
|
-
|
713
|
-
# ancestors -> focal_node
|
714
|
-
|
715
|
-
one_ancestors_df = one_neighborhood_df[
|
716
|
-
one_neighborhood_df["relationship"] == "ancestors"
|
717
|
-
]
|
718
|
-
ancestors_list = list(set(one_ancestors_df["name"].tolist()).union({sc_id}))
|
719
|
-
|
720
|
-
with warnings.catch_warnings():
|
721
|
-
# igraph throws warnings for each pair of unconnected species
|
722
|
-
warnings.simplefilter("ignore")
|
723
|
-
|
724
|
-
neighborhood_paths = neighborhood_graph.get_shortest_paths(
|
725
|
-
v=sc_id,
|
726
|
-
to=ancestors_list,
|
727
|
-
weights="upstream_weights",
|
728
|
-
mode="in",
|
729
|
-
output="epath",
|
730
|
-
)
|
731
|
-
|
732
|
-
upstream_path_attrs, upstream_entity_dict = _calculate_path_attrs(
|
733
|
-
neighborhood_paths,
|
728
|
+
(
|
729
|
+
downstream_path_attrs,
|
730
|
+
downstream_entity_dict,
|
731
|
+
upstream_path_attrs,
|
732
|
+
upstream_entity_dict,
|
733
|
+
) = _find_neighbors_paths(
|
734
|
+
neighborhood_graph,
|
735
|
+
one_neighborhood_df,
|
736
|
+
sc_id,
|
734
737
|
edges,
|
735
|
-
vertices=ancestors_list,
|
736
|
-
weight_var="upstream_weights",
|
737
738
|
)
|
738
|
-
upstream_path_attrs = upstream_path_attrs.assign(node_orientation="upstream")
|
739
739
|
|
740
740
|
# combine upstream and downstream shortest paths
|
741
741
|
# in cases a node is upstream and downstream of the focal node
|
742
742
|
# by taking the lowest path weight
|
743
743
|
vertex_neighborhood_attrs = (
|
744
744
|
pd.concat([downstream_path_attrs, upstream_path_attrs])
|
745
|
-
.sort_values(
|
745
|
+
.sort_values(DISTANCES.PATH_WEIGHTS)
|
746
746
|
.groupby("neighbor")
|
747
747
|
.first()
|
748
748
|
)
|
749
749
|
# label the focal node
|
750
|
-
vertex_neighborhood_attrs.loc[sc_id, "node_orientation"] =
|
750
|
+
vertex_neighborhood_attrs.loc[sc_id, "node_orientation"] = GRAPH_RELATIONSHIPS.FOCAL
|
751
751
|
|
752
752
|
# if the precomputed distances, graph and/or sbml_dfs are inconsistent
|
753
753
|
# then the shortest paths search may just return empty lists
|
754
754
|
# throw a clearer error message in this case.
|
755
|
-
EXPECTED_VERTEX_ATTRS = {
|
755
|
+
EXPECTED_VERTEX_ATTRS = {
|
756
|
+
DISTANCES.FINAL_FROM,
|
757
|
+
DISTANCES.FINAL_TO,
|
758
|
+
NET_POLARITY.NET_POLARITY,
|
759
|
+
}
|
756
760
|
missing_vertex_attrs = EXPECTED_VERTEX_ATTRS.difference(
|
757
761
|
set(vertex_neighborhood_attrs.columns.tolist())
|
758
762
|
)
|
@@ -767,22 +771,22 @@ def create_neighborhood_dict_entry(
|
|
767
771
|
# add net_polarity to edges in addition to nodes
|
768
772
|
edges = edges.merge(
|
769
773
|
vertex_neighborhood_attrs.reset_index()[
|
770
|
-
[
|
774
|
+
[DISTANCES.FINAL_FROM, DISTANCES.FINAL_TO, NET_POLARITY.NET_POLARITY]
|
771
775
|
].dropna(),
|
772
|
-
left_on=[
|
773
|
-
right_on=[
|
776
|
+
left_on=[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO],
|
777
|
+
right_on=[DISTANCES.FINAL_FROM, DISTANCES.FINAL_TO],
|
774
778
|
how="left",
|
775
779
|
)
|
776
780
|
|
777
781
|
vertices = vertices.merge(
|
778
|
-
vertex_neighborhood_attrs, left_on=
|
782
|
+
vertex_neighborhood_attrs, left_on=NAPISTU_GRAPH_VERTICES.NAME, right_index=True
|
779
783
|
)
|
780
784
|
|
781
785
|
# drop nodes with a path length / weight of zero
|
782
786
|
# which are NOT the focal node
|
783
787
|
# these were cases where no path to/from the focal node to the query node was found
|
784
788
|
disconnected_neighbors = vertices.query(
|
785
|
-
"(not node_orientation == '
|
789
|
+
f"(not node_orientation == '{GRAPH_RELATIONSHIPS.FOCAL}') and {DISTANCES.PATH_WEIGHTS} == 0"
|
786
790
|
)
|
787
791
|
vertices = vertices[~vertices.index.isin(disconnected_neighbors.index.tolist())]
|
788
792
|
|
@@ -790,8 +794,8 @@ def create_neighborhood_dict_entry(
|
|
790
794
|
vertices = add_vertices_uri_urls(vertices, sbml_dfs)
|
791
795
|
|
792
796
|
neighborhood_path_entities = {
|
793
|
-
|
794
|
-
|
797
|
+
NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM: downstream_entity_dict,
|
798
|
+
NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM: upstream_entity_dict,
|
795
799
|
}
|
796
800
|
|
797
801
|
# update graph with additional vertex and edge attributes
|
@@ -799,16 +803,16 @@ def create_neighborhood_dict_entry(
|
|
799
803
|
vertices=vertices.to_dict("records"),
|
800
804
|
edges=edges.to_dict("records"),
|
801
805
|
directed=napistu_graph.is_directed(),
|
802
|
-
vertex_name_attr=
|
803
|
-
edge_foreign_keys=(
|
806
|
+
vertex_name_attr=NAPISTU_GRAPH_VERTICES.NAME,
|
807
|
+
edge_foreign_keys=(NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO),
|
804
808
|
)
|
805
809
|
|
806
810
|
outdict = {
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
811
|
+
NEIGHBORHOOD_DICT_KEYS.GRAPH: updated_napistu_graph,
|
812
|
+
NEIGHBORHOOD_DICT_KEYS.VERTICES: vertices,
|
813
|
+
NEIGHBORHOOD_DICT_KEYS.EDGES: edges,
|
814
|
+
NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES: reaction_sources,
|
815
|
+
NEIGHBORHOOD_DICT_KEYS.NEIGHBORHOOD_PATH_ENTITIES: neighborhood_path_entities,
|
812
816
|
}
|
813
817
|
|
814
818
|
return outdict
|
@@ -818,9 +822,11 @@ def _create_neighborhood_dict_entry_logging(
|
|
818
822
|
sc_id: str, one_neighborhood_df: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
|
819
823
|
):
|
820
824
|
df_summary = one_neighborhood_df.copy()
|
821
|
-
df_summary[
|
822
|
-
|
823
|
-
for x in df_summary[
|
825
|
+
df_summary[NAPISTU_GRAPH_VERTICES.NODE_TYPE] = [
|
826
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES if x else NAPISTU_GRAPH_NODE_TYPES.REACTION
|
827
|
+
for x in df_summary[NAPISTU_GRAPH_VERTICES.NAME].isin(
|
828
|
+
sbml_dfs.compartmentalized_species.index
|
829
|
+
)
|
824
830
|
]
|
825
831
|
relationship_counts = df_summary.value_counts(
|
826
832
|
["relationship", "node_type"]
|
@@ -844,22 +850,45 @@ def add_vertices_uri_urls(
|
|
844
850
|
vertices: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
|
845
851
|
) -> pd.DataFrame:
|
846
852
|
"""
|
847
|
-
Add
|
853
|
+
Add URI URLs to neighborhood vertices DataFrame.
|
848
854
|
|
849
|
-
|
855
|
+
This function enriches a vertices DataFrame with URI URLs for both species and
|
856
|
+
reactions. For species, it adds standard reference identifiers and Pharos IDs
|
857
|
+
where available. For reactions, it adds reaction-specific URI URLs.
|
850
858
|
|
851
859
|
Parameters
|
852
860
|
----------
|
853
861
|
vertices: pd.DataFrame
|
854
|
-
|
862
|
+
DataFrame containing neighborhood vertices with the following required columns:
|
863
|
+
- NAPISTU_GRAPH_VERTICES.NAME: The name/identifier of each vertex
|
864
|
+
- NAPISTU_GRAPH_VERTICES.NODE_TYPE: The type of node, either
|
865
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES or NAPISTU_GRAPH_NODE_TYPES.REACTION
|
855
866
|
sbml_dfs: sbml_dfs_core.SBML_dfs
|
856
|
-
|
867
|
+
Pathway model including species, compartmentalized species, reactions and ontologies
|
857
868
|
|
858
869
|
Returns
|
859
870
|
-------
|
860
|
-
|
861
|
-
|
871
|
+
pd.DataFrame
|
872
|
+
Input vertices DataFrame enriched with URI URL columns:
|
873
|
+
- For species: standard reference identifier URLs and Pharos IDs
|
874
|
+
- For reactions: reaction-specific URI URLs
|
875
|
+
- Empty strings for missing URLs
|
862
876
|
|
877
|
+
Raises
|
878
|
+
------
|
879
|
+
ValueError
|
880
|
+
If vertices DataFrame is empty (no rows)
|
881
|
+
TypeError
|
882
|
+
If the output is not a pandas DataFrame
|
883
|
+
ValueError
|
884
|
+
If the output row count doesn't match the input row count
|
885
|
+
|
886
|
+
Notes
|
887
|
+
-----
|
888
|
+
- Species vertices are merged with compartmentalized_species to get s_id mappings
|
889
|
+
- Reaction vertices are processed directly using their names
|
890
|
+
- Missing URLs are filled with empty strings
|
891
|
+
- The function preserves the original row order and count
|
863
892
|
"""
|
864
893
|
|
865
894
|
if vertices.shape[0] <= 0:
|
@@ -868,35 +897,54 @@ def add_vertices_uri_urls(
|
|
868
897
|
# add uri urls for each node
|
869
898
|
|
870
899
|
# add s_ids
|
871
|
-
neighborhood_species = vertices[
|
872
|
-
|
873
|
-
|
900
|
+
neighborhood_species = vertices[
|
901
|
+
vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == NAPISTU_GRAPH_NODE_TYPES.SPECIES
|
902
|
+
].merge(
|
903
|
+
sbml_dfs.compartmentalized_species[SBML_DFS.S_ID],
|
904
|
+
left_on=NAPISTU_GRAPH_VERTICES.NAME,
|
874
905
|
right_index=True,
|
875
906
|
how="left",
|
876
907
|
)
|
877
908
|
|
878
909
|
# add a standard reference identifier
|
879
910
|
neighborhood_species_aug = neighborhood_species.merge(
|
880
|
-
sbml_dfs.get_uri_urls(
|
881
|
-
|
911
|
+
sbml_dfs.get_uri_urls(
|
912
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES, neighborhood_species[SBML_DFS.S_ID]
|
913
|
+
),
|
914
|
+
left_on=SBML_DFS.S_ID,
|
882
915
|
right_index=True,
|
883
916
|
how="left",
|
884
917
|
# add pharos ids where available
|
885
918
|
).merge(
|
886
919
|
sbml_dfs.get_uri_urls(
|
887
|
-
|
888
|
-
|
889
|
-
|
920
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES,
|
921
|
+
neighborhood_species[SBML_DFS.S_ID],
|
922
|
+
required_ontology=ONTOLOGIES.PHAROS,
|
923
|
+
).rename(ONTOLOGIES.PHAROS),
|
924
|
+
left_on=SBML_DFS.S_ID,
|
890
925
|
right_index=True,
|
891
926
|
how="left",
|
892
927
|
)
|
893
928
|
|
894
|
-
if
|
895
|
-
|
929
|
+
if (
|
930
|
+
sum(
|
931
|
+
vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
932
|
+
== NAPISTU_GRAPH_NODE_TYPES.REACTION
|
933
|
+
)
|
934
|
+
> 0
|
935
|
+
):
|
936
|
+
neighborhood_reactions = vertices[
|
937
|
+
vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
938
|
+
== NAPISTU_GRAPH_NODE_TYPES.REACTION
|
939
|
+
].merge(
|
896
940
|
sbml_dfs.get_uri_urls(
|
897
|
-
|
941
|
+
SBML_DFS.REACTIONS,
|
942
|
+
vertices[
|
943
|
+
vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
944
|
+
== NAPISTU_GRAPH_NODE_TYPES.REACTION
|
945
|
+
][NAPISTU_GRAPH_VERTICES.NAME],
|
898
946
|
),
|
899
|
-
left_on=
|
947
|
+
left_on=NAPISTU_GRAPH_VERTICES.NAME,
|
900
948
|
right_index=True,
|
901
949
|
how="left",
|
902
950
|
)
|
@@ -945,7 +993,7 @@ def prune_neighborhoods(neighborhoods: dict, top_n: int = 100) -> dict:
|
|
945
993
|
if not isinstance(top_n, int):
|
946
994
|
raise TypeError(f"top_n was a {type(top_n)} and must be an int")
|
947
995
|
|
948
|
-
|
996
|
+
pruned_neighborhood_dicts = dict()
|
949
997
|
|
950
998
|
for an_sc_id in neighborhoods.keys():
|
951
999
|
one_neighborhood = neighborhoods[an_sc_id]
|
@@ -955,41 +1003,58 @@ def prune_neighborhoods(neighborhoods: dict, top_n: int = 100) -> dict:
|
|
955
1003
|
pruned_vertices = _prune_vertex_set(one_neighborhood, top_n=top_n)
|
956
1004
|
|
957
1005
|
# reduce neighborhood to this set of high-weight vertices
|
958
|
-
all_neighbors = pd.DataFrame(
|
1006
|
+
all_neighbors = pd.DataFrame(
|
1007
|
+
{
|
1008
|
+
NAPISTU_GRAPH_VERTICES.NAME: one_neighborhood[
|
1009
|
+
NEIGHBORHOOD_DICT_KEYS.GRAPH
|
1010
|
+
].vs[NAPISTU_GRAPH_VERTICES.NAME]
|
1011
|
+
}
|
1012
|
+
)
|
959
1013
|
pruned_vertices_indices = all_neighbors[
|
960
|
-
all_neighbors[
|
1014
|
+
all_neighbors[NAPISTU_GRAPH_VERTICES.NAME].isin(
|
1015
|
+
pruned_vertices[NAPISTU_GRAPH_VERTICES.NAME]
|
1016
|
+
)
|
961
1017
|
].index.tolist()
|
962
1018
|
|
963
|
-
pruned_neighborhood = one_neighborhood[
|
964
|
-
one_neighborhood[
|
1019
|
+
pruned_neighborhood = one_neighborhood[NEIGHBORHOOD_DICT_KEYS.GRAPH].subgraph(
|
1020
|
+
one_neighborhood[NEIGHBORHOOD_DICT_KEYS.GRAPH].vs[pruned_vertices_indices],
|
965
1021
|
implementation="auto",
|
966
1022
|
)
|
967
1023
|
|
968
1024
|
pruned_edges = pd.DataFrame([e.attributes() for e in pruned_neighborhood.es])
|
969
1025
|
|
970
|
-
pruned_reactions = pruned_vertices[
|
971
|
-
|
972
|
-
|
1026
|
+
pruned_reactions = pruned_vertices[
|
1027
|
+
pruned_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
1028
|
+
== NAPISTU_GRAPH_NODE_TYPES.REACTION
|
1029
|
+
][NAPISTU_GRAPH_VERTICES.NAME]
|
973
1030
|
|
974
1031
|
if pruned_reactions.shape[0] != 0:
|
975
|
-
if one_neighborhood[
|
1032
|
+
if one_neighborhood[NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES] is None:
|
976
1033
|
# allow for missing source information since this is currently optional
|
977
|
-
|
1034
|
+
pruned_reaction_sources = one_neighborhood[
|
1035
|
+
NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES
|
1036
|
+
]
|
978
1037
|
else:
|
979
|
-
|
980
|
-
|
1038
|
+
pruned_reaction_sources = one_neighborhood[
|
1039
|
+
NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES
|
1040
|
+
][
|
1041
|
+
one_neighborhood[NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES][
|
1042
|
+
SBML_DFS.R_ID
|
1043
|
+
].isin(pruned_reactions)
|
981
1044
|
]
|
982
1045
|
else:
|
983
|
-
|
1046
|
+
pruned_reaction_sources = one_neighborhood[
|
1047
|
+
NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES
|
1048
|
+
]
|
984
1049
|
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
1050
|
+
pruned_neighborhood_dicts[an_sc_id] = {
|
1051
|
+
NEIGHBORHOOD_DICT_KEYS.GRAPH: pruned_neighborhood,
|
1052
|
+
NEIGHBORHOOD_DICT_KEYS.VERTICES: pruned_vertices,
|
1053
|
+
NEIGHBORHOOD_DICT_KEYS.EDGES: pruned_edges,
|
1054
|
+
NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES: pruned_reaction_sources,
|
990
1055
|
}
|
991
1056
|
|
992
|
-
return
|
1057
|
+
return pruned_neighborhood_dicts
|
993
1058
|
|
994
1059
|
|
995
1060
|
def plot_neighborhood(
|
@@ -1029,16 +1094,16 @@ def plot_neighborhood(
|
|
1029
1094
|
"focal disease": "lime",
|
1030
1095
|
"disease": "aquamarine",
|
1031
1096
|
"focal": "lightcoral",
|
1032
|
-
|
1033
|
-
|
1097
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES: "firebrick",
|
1098
|
+
NAPISTU_GRAPH_NODE_TYPES.REACTION: "dodgerblue",
|
1034
1099
|
}
|
1035
1100
|
|
1036
1101
|
edge_polarity_colors = {
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1102
|
+
NET_POLARITY.AMBIGUOUS: "dimgray",
|
1103
|
+
NET_POLARITY.ACTIVATION: "gold",
|
1104
|
+
NET_POLARITY.INHIBITION: "royalblue",
|
1105
|
+
NET_POLARITY.AMBIGUOUS_ACTIVATION: "palegoldenrod",
|
1106
|
+
NET_POLARITY.AMBIGUOUS_INHIBITION: "powerblue",
|
1042
1107
|
np.nan: "dimgray",
|
1043
1108
|
}
|
1044
1109
|
|
@@ -1047,17 +1112,19 @@ def plot_neighborhood(
|
|
1047
1112
|
visual_style["vertex_size"] = 10
|
1048
1113
|
if name_nodes:
|
1049
1114
|
visual_style["vertex_label"] = [
|
1050
|
-
textwrap.fill(x, 15)
|
1115
|
+
textwrap.fill(x, 15)
|
1116
|
+
for x in neighborhood_graph.vs[NAPISTU_GRAPH_VERTICES.NODE_NAME]
|
1051
1117
|
]
|
1052
1118
|
visual_style["vertex_label_color"] = "white"
|
1053
1119
|
visual_style["vertex_label_size"] = 8
|
1054
1120
|
visual_style["vertex_label_angle"] = 90
|
1055
1121
|
visual_style["vertex_label_dist"] = 3
|
1056
1122
|
visual_style["vertex_color"] = [
|
1057
|
-
color_dict[x] for x in neighborhood_graph.vs[
|
1123
|
+
color_dict[x] for x in neighborhood_graph.vs[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
1058
1124
|
]
|
1059
1125
|
visual_style["edge_color"] = [
|
1060
|
-
edge_polarity_colors[x]
|
1126
|
+
edge_polarity_colors[x]
|
1127
|
+
for x in neighborhood_graph.es[NET_POLARITY.NET_POLARITY]
|
1061
1128
|
]
|
1062
1129
|
visual_style["layout"] = neighborhood_graph_layout
|
1063
1130
|
visual_style["bbox"] = (plot_size, plot_size)
|
@@ -1089,8 +1156,8 @@ def _precompute_neighbors(
|
|
1089
1156
|
|
1090
1157
|
# check that compartmentalized_species are included in precomputed_distances
|
1091
1158
|
all_cspecies = {
|
1092
|
-
*precomputed_distances[
|
1093
|
-
*precomputed_distances[
|
1159
|
+
*precomputed_distances[NAPISTU_EDGELIST.SC_ID_ORIGIN].tolist(),
|
1160
|
+
*precomputed_distances[NAPISTU_EDGELIST.SC_ID_DEST].tolist(),
|
1094
1161
|
}
|
1095
1162
|
missing_cspecies = set(compartmentalized_species).difference(all_cspecies)
|
1096
1163
|
if len(missing_cspecies) > 0:
|
@@ -1105,14 +1172,16 @@ def _precompute_neighbors(
|
|
1105
1172
|
NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM,
|
1106
1173
|
NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
|
1107
1174
|
]:
|
1108
|
-
valid_origin = precomputed_distances[
|
1175
|
+
valid_origin = precomputed_distances[NAPISTU_EDGELIST.SC_ID_ORIGIN].isin(
|
1109
1176
|
compartmentalized_species
|
1110
1177
|
)
|
1111
1178
|
if network_type in [
|
1112
1179
|
NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM,
|
1113
1180
|
NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
|
1114
1181
|
]:
|
1115
|
-
valid_dest = precomputed_distances[
|
1182
|
+
valid_dest = precomputed_distances[NAPISTU_EDGELIST.SC_ID_DEST].isin(
|
1183
|
+
compartmentalized_species
|
1184
|
+
)
|
1116
1185
|
|
1117
1186
|
if network_type == NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS:
|
1118
1187
|
cspecies_subset_precomputed_distances = precomputed_distances[
|
@@ -1133,7 +1202,7 @@ def _precompute_neighbors(
|
|
1133
1202
|
|
1134
1203
|
# filter by distance
|
1135
1204
|
close_cspecies_subset_precomputed_distances = cspecies_subset_precomputed_distances[
|
1136
|
-
cspecies_subset_precomputed_distances[
|
1205
|
+
cspecies_subset_precomputed_distances[DISTANCES.PATH_LENGTH] <= order
|
1137
1206
|
]
|
1138
1207
|
|
1139
1208
|
# filter to retain top_n
|
@@ -1143,13 +1212,13 @@ def _precompute_neighbors(
|
|
1143
1212
|
]:
|
1144
1213
|
top_descendants = (
|
1145
1214
|
close_cspecies_subset_precomputed_distances[
|
1146
|
-
close_cspecies_subset_precomputed_distances[
|
1147
|
-
|
1148
|
-
)
|
1215
|
+
close_cspecies_subset_precomputed_distances[
|
1216
|
+
DISTANCES.SC_ID_ORIGIN
|
1217
|
+
].isin(compartmentalized_species)
|
1149
1218
|
]
|
1150
1219
|
# sort by path_weight so we can retain the lowest weight neighbors
|
1151
|
-
.sort_values(
|
1152
|
-
.groupby(
|
1220
|
+
.sort_values(DISTANCES.PATH_WEIGHTS)
|
1221
|
+
.groupby(NAPISTU_EDGELIST.SC_ID_ORIGIN)
|
1153
1222
|
.head(top_n)
|
1154
1223
|
)
|
1155
1224
|
|
@@ -1161,9 +1230,9 @@ def _precompute_neighbors(
|
|
1161
1230
|
]:
|
1162
1231
|
top_ancestors = (
|
1163
1232
|
close_cspecies_subset_precomputed_distances[
|
1164
|
-
close_cspecies_subset_precomputed_distances[
|
1165
|
-
|
1166
|
-
)
|
1233
|
+
close_cspecies_subset_precomputed_distances[
|
1234
|
+
NAPISTU_EDGELIST.SC_ID_DEST
|
1235
|
+
].isin(compartmentalized_species)
|
1167
1236
|
]
|
1168
1237
|
# sort by path_upstream_weights so we can retain the lowest weight neighbors
|
1169
1238
|
# we allow for upstream weights to differ from downstream weights
|
@@ -1176,8 +1245,8 @@ def _precompute_neighbors(
|
|
1176
1245
|
# the logic is flipped if we are looking for ancestors where
|
1177
1246
|
# we penalize based on the number of parents of a node when
|
1178
1247
|
# we use it (i.e., the default upstream_weights).
|
1179
|
-
.sort_values(
|
1180
|
-
.groupby(
|
1248
|
+
.sort_values(DISTANCES.PATH_UPSTREAM_WEIGHTS)
|
1249
|
+
.groupby(NAPISTU_EDGELIST.SC_ID_DEST)
|
1181
1250
|
.head(top_n)
|
1182
1251
|
)
|
1183
1252
|
|
@@ -1193,7 +1262,7 @@ def _precompute_neighbors(
|
|
1193
1262
|
precomputed_neighbors=top_descendants,
|
1194
1263
|
compartmentalized_species=compartmentalized_species,
|
1195
1264
|
sbml_dfs=sbml_dfs,
|
1196
|
-
relationship=
|
1265
|
+
relationship=GRAPH_RELATIONSHIPS.DESCENDANTS,
|
1197
1266
|
)
|
1198
1267
|
|
1199
1268
|
if downstream_reactions is not None:
|
@@ -1207,7 +1276,7 @@ def _precompute_neighbors(
|
|
1207
1276
|
precomputed_neighbors=top_ancestors,
|
1208
1277
|
compartmentalized_species=compartmentalized_species,
|
1209
1278
|
sbml_dfs=sbml_dfs,
|
1210
|
-
relationship=
|
1279
|
+
relationship=GRAPH_RELATIONSHIPS.ANCESTORS,
|
1211
1280
|
)
|
1212
1281
|
|
1213
1282
|
if upstream_reactions is not None:
|
@@ -1217,8 +1286,8 @@ def _precompute_neighbors(
|
|
1217
1286
|
# an sc_id_origin-specific subgraph
|
1218
1287
|
identity_df = pd.DataFrame(
|
1219
1288
|
{
|
1220
|
-
|
1221
|
-
|
1289
|
+
NAPISTU_EDGELIST.SC_ID_ORIGIN: compartmentalized_species,
|
1290
|
+
NAPISTU_EDGELIST.SC_ID_DEST: compartmentalized_species,
|
1222
1291
|
}
|
1223
1292
|
)
|
1224
1293
|
|
@@ -1232,14 +1301,16 @@ def _precompute_neighbors(
|
|
1232
1301
|
downstream_reactions, # type: ignore
|
1233
1302
|
identity_df,
|
1234
1303
|
]
|
1235
|
-
)[
|
1304
|
+
)[
|
1305
|
+
[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST]
|
1306
|
+
].drop_duplicates()
|
1236
1307
|
elif network_type == NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM:
|
1237
1308
|
precomputed_neighbors = pd.concat([top_descendants, downstream_reactions, identity_df])[ # type: ignore
|
1238
|
-
[
|
1309
|
+
[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST]
|
1239
1310
|
].drop_duplicates()
|
1240
1311
|
elif network_type == NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM:
|
1241
1312
|
precomputed_neighbors = pd.concat([top_ancestors, upstream_reactions, identity_df])[ # type: ignore
|
1242
|
-
[
|
1313
|
+
[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST]
|
1243
1314
|
].drop_duplicates()
|
1244
1315
|
else:
|
1245
1316
|
raise ValueError("This error shouldn't happen")
|
@@ -1271,7 +1342,7 @@ def _build_raw_neighborhood_df(
|
|
1271
1342
|
descendants_df = _find_neighbors(
|
1272
1343
|
napistu_graph=napistu_graph,
|
1273
1344
|
compartmentalized_species=compartmentalized_species,
|
1274
|
-
relationship=
|
1345
|
+
relationship=GRAPH_RELATIONSHIPS.DESCENDANTS,
|
1275
1346
|
order=order,
|
1276
1347
|
precomputed_neighbors=precomputed_neighbors,
|
1277
1348
|
)
|
@@ -1284,7 +1355,7 @@ def _build_raw_neighborhood_df(
|
|
1284
1355
|
ancestors_df = _find_neighbors(
|
1285
1356
|
napistu_graph=napistu_graph,
|
1286
1357
|
compartmentalized_species=compartmentalized_species,
|
1287
|
-
relationship=
|
1358
|
+
relationship=GRAPH_RELATIONSHIPS.ANCESTORS,
|
1288
1359
|
order=order,
|
1289
1360
|
precomputed_neighbors=precomputed_neighbors,
|
1290
1361
|
)
|
@@ -1300,8 +1371,9 @@ def _build_raw_neighborhood_df(
|
|
1300
1371
|
raise NotImplementedError("invalid network_type")
|
1301
1372
|
|
1302
1373
|
# add name since this is an easy way to lookup igraph vertices
|
1303
|
-
neighborhood_df[
|
1304
|
-
x[
|
1374
|
+
neighborhood_df[NAPISTU_GRAPH_VERTICES.NAME] = [
|
1375
|
+
x[NAPISTU_GRAPH_VERTICES.NAME]
|
1376
|
+
for x in napistu_graph.vs[neighborhood_df["neighbor"]]
|
1305
1377
|
]
|
1306
1378
|
|
1307
1379
|
return neighborhood_df
|
@@ -1327,17 +1399,23 @@ def _find_neighbors(
|
|
1327
1399
|
if isinstance(precomputed_neighbors, pd.DataFrame):
|
1328
1400
|
# add graph indices to neighbors
|
1329
1401
|
nodes_to_names = (
|
1330
|
-
pd.DataFrame(
|
1402
|
+
pd.DataFrame(
|
1403
|
+
{
|
1404
|
+
NAPISTU_GRAPH_VERTICES.NAME: napistu_graph.vs[
|
1405
|
+
NAPISTU_GRAPH_VERTICES.NAME
|
1406
|
+
]
|
1407
|
+
}
|
1408
|
+
)
|
1331
1409
|
.reset_index()
|
1332
1410
|
.rename({"index": "neighbor"}, axis=1)
|
1333
1411
|
)
|
1334
1412
|
|
1335
|
-
if relationship ==
|
1336
|
-
bait_id =
|
1337
|
-
target_id =
|
1338
|
-
elif relationship ==
|
1339
|
-
bait_id =
|
1340
|
-
target_id =
|
1413
|
+
if relationship == GRAPH_RELATIONSHIPS.DESCENDANTS:
|
1414
|
+
bait_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
|
1415
|
+
target_id = NAPISTU_EDGELIST.SC_ID_DEST
|
1416
|
+
elif relationship == GRAPH_RELATIONSHIPS.ANCESTORS:
|
1417
|
+
bait_id = NAPISTU_EDGELIST.SC_ID_DEST
|
1418
|
+
target_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
|
1341
1419
|
else:
|
1342
1420
|
raise ValueError(
|
1343
1421
|
f"relationship must be 'descendants' or 'ancestors' but was {relationship}"
|
@@ -1347,15 +1425,17 @@ def _find_neighbors(
|
|
1347
1425
|
precomputed_neighbors[
|
1348
1426
|
precomputed_neighbors[bait_id].isin(compartmentalized_species)
|
1349
1427
|
]
|
1350
|
-
.merge(
|
1351
|
-
|
1428
|
+
.merge(
|
1429
|
+
nodes_to_names.rename({NAPISTU_GRAPH_VERTICES.NAME: target_id}, axis=1)
|
1430
|
+
)
|
1431
|
+
.rename({bait_id: SBML_DFS.SC_ID}, axis=1)
|
1352
1432
|
.drop([target_id], axis=1)
|
1353
1433
|
.assign(relationship=relationship)
|
1354
1434
|
)
|
1355
1435
|
else:
|
1356
|
-
if relationship ==
|
1436
|
+
if relationship == GRAPH_RELATIONSHIPS.DESCENDANTS:
|
1357
1437
|
mode_type = "out"
|
1358
|
-
elif relationship ==
|
1438
|
+
elif relationship == GRAPH_RELATIONSHIPS.ANCESTORS:
|
1359
1439
|
mode_type = "in"
|
1360
1440
|
else:
|
1361
1441
|
raise ValueError(
|
@@ -1371,7 +1451,7 @@ def _find_neighbors(
|
|
1371
1451
|
|
1372
1452
|
neighbors_df = pd.concat(
|
1373
1453
|
[
|
1374
|
-
pd.DataFrame({
|
1454
|
+
pd.DataFrame({SBML_DFS.SC_ID: c, "neighbor": x}, index=range(0, len(x)))
|
1375
1455
|
for c, x in zip(compartmentalized_species, neighbors)
|
1376
1456
|
]
|
1377
1457
|
).assign(relationship=relationship)
|
@@ -1401,12 +1481,12 @@ def _find_reactions_by_relationship(
|
|
1401
1481
|
if precomputed_neighbors.shape[0] == 0:
|
1402
1482
|
return None
|
1403
1483
|
|
1404
|
-
if relationship ==
|
1405
|
-
bait_id =
|
1406
|
-
target_id =
|
1407
|
-
elif relationship ==
|
1408
|
-
bait_id =
|
1409
|
-
target_id =
|
1484
|
+
if relationship == GRAPH_RELATIONSHIPS.DESCENDANTS:
|
1485
|
+
bait_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
|
1486
|
+
target_id = NAPISTU_EDGELIST.SC_ID_DEST
|
1487
|
+
elif relationship == GRAPH_RELATIONSHIPS.ANCESTORS:
|
1488
|
+
bait_id = NAPISTU_EDGELIST.SC_ID_DEST
|
1489
|
+
target_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
|
1410
1490
|
else:
|
1411
1491
|
raise ValueError(
|
1412
1492
|
f"relationship must be 'descendants' or 'ancestors' but was {relationship}"
|
@@ -1437,8 +1517,8 @@ def _find_reactions_by_relationship(
|
|
1437
1517
|
relatives_cspecies = {*relatives, *[uq]}
|
1438
1518
|
# count the number of relative cspecies including each reaction
|
1439
1519
|
rxn_species_counts = sbml_dfs.reaction_species[
|
1440
|
-
sbml_dfs.reaction_species[
|
1441
|
-
].value_counts(
|
1520
|
+
sbml_dfs.reaction_species[SBML_DFS.SC_ID].isin(relatives_cspecies)
|
1521
|
+
].value_counts(SBML_DFS.R_ID)
|
1442
1522
|
|
1443
1523
|
# retain reactions involving 2+ cspecies.
|
1444
1524
|
# some of these reactions will be irrelevant and will be excluded when
|
@@ -1483,10 +1563,11 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
|
|
1483
1563
|
|
1484
1564
|
"""
|
1485
1565
|
|
1486
|
-
neighborhood_vertices = one_neighborhood[
|
1566
|
+
neighborhood_vertices = one_neighborhood[NEIGHBORHOOD_DICT_KEYS.VERTICES]
|
1487
1567
|
|
1488
1568
|
indexed_neighborhood_species = neighborhood_vertices[
|
1489
|
-
neighborhood_vertices[
|
1569
|
+
neighborhood_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
1570
|
+
== NAPISTU_GRAPH_NODE_TYPES.SPECIES
|
1490
1571
|
].set_index("node_orientation")
|
1491
1572
|
|
1492
1573
|
pruned_oriented_neighbors = list()
|
@@ -1496,14 +1577,14 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
|
|
1496
1577
|
# handle cases where only one entry exists to DF->series coercion occurs
|
1497
1578
|
vertex_subset = vertex_subset.to_frame().T
|
1498
1579
|
|
1499
|
-
sorted_vertex_set = vertex_subset.sort_values(
|
1500
|
-
weight_cutoff = sorted_vertex_set[
|
1580
|
+
sorted_vertex_set = vertex_subset.sort_values(DISTANCES.PATH_WEIGHTS)
|
1581
|
+
weight_cutoff = sorted_vertex_set[DISTANCES.PATH_WEIGHTS].iloc[
|
1501
1582
|
min(top_n - 1, sorted_vertex_set.shape[0] - 1)
|
1502
1583
|
]
|
1503
1584
|
|
1504
1585
|
top_neighbors = sorted_vertex_set[
|
1505
|
-
sorted_vertex_set[
|
1506
|
-
][
|
1586
|
+
sorted_vertex_set[DISTANCES.PATH_WEIGHTS] <= weight_cutoff
|
1587
|
+
][NAPISTU_GRAPH_VERTICES.NAME].tolist()
|
1507
1588
|
|
1508
1589
|
# include reactions and other species necessary to reach the top neighbors
|
1509
1590
|
# by pulling in the past solutions to weighted shortest paths problems
|
@@ -1522,7 +1603,7 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
|
|
1522
1603
|
# combine all neighbors
|
1523
1604
|
pruned_neighbors = set().union(*pruned_oriented_neighbors)
|
1524
1605
|
pruned_vertices = neighborhood_vertices[
|
1525
|
-
neighborhood_vertices[
|
1606
|
+
neighborhood_vertices[NAPISTU_GRAPH_VERTICES.NAME].isin(pruned_neighbors)
|
1526
1607
|
].reset_index(drop=True)
|
1527
1608
|
|
1528
1609
|
return pruned_vertices
|
@@ -1532,7 +1613,7 @@ def _calculate_path_attrs(
|
|
1532
1613
|
neighborhood_paths: list[list],
|
1533
1614
|
edges: pd.DataFrame,
|
1534
1615
|
vertices: list,
|
1535
|
-
weight_var: str =
|
1616
|
+
weight_var: str = NAPISTU_GRAPH_EDGES.WEIGHTS,
|
1536
1617
|
) -> tuple[pd.DataFrame, dict[Any, set]]:
|
1537
1618
|
"""
|
1538
1619
|
Calculate Path Attributes
|
@@ -1582,15 +1663,15 @@ def _calculate_path_attrs(
|
|
1582
1663
|
# if all_path_edges.ngroups > 0:
|
1583
1664
|
path_attributes_df = pd.concat(
|
1584
1665
|
[
|
1585
|
-
all_path_edges[weight_var].agg("sum").rename(
|
1586
|
-
all_path_edges.agg("size").rename(
|
1587
|
-
all_path_edges[
|
1666
|
+
all_path_edges[weight_var].agg("sum").rename(DISTANCES.PATH_WEIGHTS),
|
1667
|
+
all_path_edges.agg("size").rename(DISTANCES.PATH_LENGTH),
|
1668
|
+
all_path_edges[NET_POLARITY.LINK_POLARITY]
|
1588
1669
|
.agg(paths._terminal_net_polarity)
|
1589
|
-
.rename(
|
1670
|
+
.rename(NET_POLARITY.NET_POLARITY),
|
1590
1671
|
# add the final edge since this can be used to add path attributes to edges
|
1591
1672
|
# i.e., apply net_polarity to an edge
|
1592
|
-
all_path_edges["from"].agg("last").rename(
|
1593
|
-
all_path_edges["to"].agg("last").rename(
|
1673
|
+
all_path_edges["from"].agg("last").rename(DISTANCES.FINAL_FROM),
|
1674
|
+
all_path_edges["to"].agg("last").rename(DISTANCES.FINAL_TO),
|
1594
1675
|
],
|
1595
1676
|
axis=1,
|
1596
1677
|
).reset_index()
|
@@ -1613,7 +1694,11 @@ def _calculate_path_attrs(
|
|
1613
1694
|
if len(neighborhood_paths[i]) == 0
|
1614
1695
|
]
|
1615
1696
|
edgeles_nodes_df = pd.DataFrame({"neighbor": edgeless_nodes}).assign(
|
1616
|
-
|
1697
|
+
**{
|
1698
|
+
DISTANCES.PATH_LENGTH: 0,
|
1699
|
+
DISTANCES.PATH_WEIGHTS: 0,
|
1700
|
+
NET_POLARITY.NET_POLARITY: None,
|
1701
|
+
}
|
1617
1702
|
)
|
1618
1703
|
|
1619
1704
|
# add edgeless entries as entries in the two outputs
|
@@ -1630,3 +1715,118 @@ def _calculate_path_attrs(
|
|
1630
1715
|
)
|
1631
1716
|
|
1632
1717
|
return path_attributes_df, neighborhood_path_entities
|
1718
|
+
|
1719
|
+
|
1720
|
+
def _find_neighbors_paths(
|
1721
|
+
neighborhood_graph: ig.Graph,
|
1722
|
+
one_neighborhood_df: pd.DataFrame,
|
1723
|
+
sc_id: str,
|
1724
|
+
edges: pd.DataFrame,
|
1725
|
+
) -> tuple[pd.DataFrame, dict[Any, set], pd.DataFrame, dict[Any, set]]:
|
1726
|
+
"""
|
1727
|
+
Find shortest paths between the focal node and its neighbors in both directions.
|
1728
|
+
|
1729
|
+
This function calculates shortest paths from the focal node to its descendants
|
1730
|
+
(downstream) and ancestors (upstream) using igraph's shortest path algorithms.
|
1731
|
+
It uses _calculate_path_attrs to compute path attributes including path weights,
|
1732
|
+
lengths, and polarity information.
|
1733
|
+
|
1734
|
+
Parameters
|
1735
|
+
----------
|
1736
|
+
neighborhood_graph: ig.Graph
|
1737
|
+
The igraph Graph object representing the neighborhood network
|
1738
|
+
one_neighborhood_df: pd.DataFrame
|
1739
|
+
DataFrame containing neighborhood information with 'relationship' column
|
1740
|
+
indicating 'descendants' or 'ancestors' for each node
|
1741
|
+
sc_id: str
|
1742
|
+
The compartmentalized species ID of the focal node
|
1743
|
+
edges: pd.DataFrame
|
1744
|
+
DataFrame containing edge information with columns for 'from', 'to',
|
1745
|
+
weights, and link polarity
|
1746
|
+
|
1747
|
+
Returns
|
1748
|
+
-------
|
1749
|
+
downstream_path_attrs: pd.DataFrame
|
1750
|
+
DataFrame containing path attributes for downstream paths from focal node
|
1751
|
+
to descendants. Includes columns: neighbor, path_weight, path_length,
|
1752
|
+
net_polarity, final_from, final_to, node_orientation
|
1753
|
+
downstream_entity_dict: dict[Any, set]
|
1754
|
+
Dictionary mapping each descendant neighbor to the set of entities
|
1755
|
+
(nodes) connecting it to the focal node
|
1756
|
+
upstream_path_attrs: pd.DataFrame
|
1757
|
+
DataFrame containing path attributes for upstream paths from focal node
|
1758
|
+
to ancestors. Includes columns: neighbor, path_weight, path_length,
|
1759
|
+
net_polarity, final_from, final_to, node_orientation
|
1760
|
+
upstream_entity_dict: dict[Any, set]
|
1761
|
+
Dictionary mapping each ancestor neighbor to the set of entities
|
1762
|
+
(nodes) connecting it to the focal node
|
1763
|
+
"""
|
1764
|
+
|
1765
|
+
one_descendants_df = one_neighborhood_df[
|
1766
|
+
one_neighborhood_df["relationship"] == GRAPH_RELATIONSHIPS.DESCENDANTS
|
1767
|
+
]
|
1768
|
+
descendants_list = list(
|
1769
|
+
set(one_descendants_df[NAPISTU_GRAPH_VERTICES.NAME].tolist()).union({sc_id})
|
1770
|
+
)
|
1771
|
+
|
1772
|
+
# hide warnings which are mostly just Dijkstra complaining about not finding neighbors
|
1773
|
+
with warnings.catch_warnings():
|
1774
|
+
# igraph throws warnings for each pair of unconnected species
|
1775
|
+
warnings.simplefilter("ignore")
|
1776
|
+
|
1777
|
+
neighborhood_paths = neighborhood_graph.get_shortest_paths(
|
1778
|
+
# focal node
|
1779
|
+
v=sc_id,
|
1780
|
+
to=descendants_list,
|
1781
|
+
weights=NAPISTU_GRAPH_EDGES.WEIGHTS,
|
1782
|
+
mode="out",
|
1783
|
+
output="epath",
|
1784
|
+
)
|
1785
|
+
|
1786
|
+
downstream_path_attrs, downstream_entity_dict = _calculate_path_attrs(
|
1787
|
+
neighborhood_paths,
|
1788
|
+
edges,
|
1789
|
+
vertices=descendants_list,
|
1790
|
+
weight_var=NAPISTU_GRAPH_EDGES.WEIGHTS,
|
1791
|
+
)
|
1792
|
+
downstream_path_attrs = downstream_path_attrs.assign(
|
1793
|
+
node_orientation=NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM
|
1794
|
+
)
|
1795
|
+
|
1796
|
+
# ancestors -> focal_node
|
1797
|
+
|
1798
|
+
one_ancestors_df = one_neighborhood_df[
|
1799
|
+
one_neighborhood_df["relationship"] == GRAPH_RELATIONSHIPS.ANCESTORS
|
1800
|
+
]
|
1801
|
+
ancestors_list = list(
|
1802
|
+
set(one_ancestors_df[NAPISTU_GRAPH_VERTICES.NAME].tolist()).union({sc_id})
|
1803
|
+
)
|
1804
|
+
|
1805
|
+
with warnings.catch_warnings():
|
1806
|
+
# igraph throws warnings for each pair of unconnected species
|
1807
|
+
warnings.simplefilter("ignore")
|
1808
|
+
|
1809
|
+
neighborhood_paths = neighborhood_graph.get_shortest_paths(
|
1810
|
+
v=sc_id,
|
1811
|
+
to=ancestors_list,
|
1812
|
+
weights=NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS,
|
1813
|
+
mode="in",
|
1814
|
+
output="epath",
|
1815
|
+
)
|
1816
|
+
|
1817
|
+
upstream_path_attrs, upstream_entity_dict = _calculate_path_attrs(
|
1818
|
+
neighborhood_paths,
|
1819
|
+
edges,
|
1820
|
+
vertices=ancestors_list,
|
1821
|
+
weight_var=NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS,
|
1822
|
+
)
|
1823
|
+
upstream_path_attrs = upstream_path_attrs.assign(
|
1824
|
+
node_orientation=NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM
|
1825
|
+
)
|
1826
|
+
|
1827
|
+
return (
|
1828
|
+
downstream_path_attrs,
|
1829
|
+
downstream_entity_dict,
|
1830
|
+
upstream_path_attrs,
|
1831
|
+
upstream_entity_dict,
|
1832
|
+
)
|