napistu 0.4.6__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/network/constants.py +21 -2
- napistu/network/neighborhoods.py +319 -161
- napistu/network/ng_utils.py +3 -3
- {napistu-0.4.6.dist-info → napistu-0.4.7.dist-info}/METADATA +1 -1
- {napistu-0.4.6.dist-info → napistu-0.4.7.dist-info}/RECORD +11 -11
- tests/test_network_neighborhoods.py +90 -1
- tests/test_network_precompute.py +25 -10
- {napistu-0.4.6.dist-info → napistu-0.4.7.dist-info}/WHEEL +0 -0
- {napistu-0.4.6.dist-info → napistu-0.4.7.dist-info}/entry_points.txt +0 -0
- {napistu-0.4.6.dist-info → napistu-0.4.7.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.4.6.dist-info → napistu-0.4.7.dist-info}/top_level.txt +0 -0
napistu/network/constants.py
CHANGED
@@ -10,8 +10,14 @@ from napistu.constants import SBOTERM_NAMES
|
|
10
10
|
|
11
11
|
NAPISTU_GRAPH = SimpleNamespace(VERTICES="vertices", EDGES="edges", METADATA="metadata")
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
GRAPH_DIRECTEDNESS = SimpleNamespace(DIRECTED="directed", UNDIRECTED="undirected")
|
14
|
+
|
15
|
+
GRAPH_RELATIONSHIPS = SimpleNamespace(
|
16
|
+
ANCESTORS="ancestors",
|
17
|
+
CHILDREN="children",
|
18
|
+
DESCENDANTS="descendants",
|
19
|
+
FOCAL="focal",
|
20
|
+
PARENTS="parents",
|
15
21
|
)
|
16
22
|
|
17
23
|
NAPISTU_GRAPH_VERTICES = SimpleNamespace(
|
@@ -230,4 +236,17 @@ NEIGHBORHOOD_DICT_KEYS = SimpleNamespace(
|
|
230
236
|
VERTICES="vertices",
|
231
237
|
EDGES="edges",
|
232
238
|
REACTION_SOURCES="reaction_sources",
|
239
|
+
NEIGHBORHOOD_PATH_ENTITIES="neighborhood_path_entities",
|
240
|
+
)
|
241
|
+
|
242
|
+
DISTANCES = SimpleNamespace(
|
243
|
+
# core attributes of precomputed distances
|
244
|
+
SC_ID_ORIGIN="sc_id_origin",
|
245
|
+
SC_ID_DEST="sc_id_dest",
|
246
|
+
PATH_LENGTH="path_length",
|
247
|
+
PATH_UPSTREAM_WEIGHTS="path_upstream_weights",
|
248
|
+
PATH_WEIGHTS="path_weights",
|
249
|
+
# other attributes associated with paths/distances
|
250
|
+
FINAL_FROM="final_from",
|
251
|
+
FINAL_TO="final_to",
|
233
252
|
)
|
napistu/network/neighborhoods.py
CHANGED
@@ -22,11 +22,16 @@ from napistu.constants import (
|
|
22
22
|
MINI_SBO_NAME_TO_POLARITY,
|
23
23
|
MINI_SBO_TO_NAME,
|
24
24
|
NAPISTU_EDGELIST,
|
25
|
+
ONTOLOGIES,
|
25
26
|
SBML_DFS,
|
26
27
|
)
|
27
28
|
|
28
29
|
from napistu.network.constants import (
|
30
|
+
DISTANCES,
|
31
|
+
GRAPH_RELATIONSHIPS,
|
29
32
|
GRAPH_WIRING_APPROACHES,
|
33
|
+
NAPISTU_GRAPH_EDGES,
|
34
|
+
NAPISTU_GRAPH_NODE_TYPES,
|
30
35
|
NAPISTU_GRAPH_VERTICES,
|
31
36
|
NEIGHBORHOOD_DICT_KEYS,
|
32
37
|
NEIGHBORHOOD_NETWORK_TYPES,
|
@@ -102,6 +107,16 @@ def find_and_prune_neighborhoods(
|
|
102
107
|
if not isinstance(compartmentalized_species, list):
|
103
108
|
raise TypeError("compartmentalized_species must be a list")
|
104
109
|
|
110
|
+
invalid_cspecies = [
|
111
|
+
x
|
112
|
+
for x in compartmentalized_species
|
113
|
+
if x not in sbml_dfs.compartmentalized_species.index
|
114
|
+
]
|
115
|
+
if len(invalid_cspecies) > 0:
|
116
|
+
raise ValueError(
|
117
|
+
f"compartmentalized_species contains invalid species: {invalid_cspecies}"
|
118
|
+
)
|
119
|
+
|
105
120
|
if isinstance(precomputed_distances, pd.DataFrame):
|
106
121
|
logger.info("Pre-computed neighbors based on precomputed_distances")
|
107
122
|
|
@@ -116,7 +131,7 @@ def find_and_prune_neighborhoods(
|
|
116
131
|
else:
|
117
132
|
precomputed_neighbors = None
|
118
133
|
|
119
|
-
|
134
|
+
neighborhood_dicts = find_neighborhoods(
|
120
135
|
sbml_dfs=sbml_dfs,
|
121
136
|
napistu_graph=napistu_graph,
|
122
137
|
compartmentalized_species=compartmentalized_species,
|
@@ -128,7 +143,7 @@ def find_and_prune_neighborhoods(
|
|
128
143
|
verbose=verbose,
|
129
144
|
)
|
130
145
|
|
131
|
-
pruned_neighborhoods = prune_neighborhoods(
|
146
|
+
pruned_neighborhoods = prune_neighborhoods(neighborhood_dicts, top_n=top_n)
|
132
147
|
|
133
148
|
return pruned_neighborhoods
|
134
149
|
|
@@ -176,7 +191,7 @@ def load_neighborhoods(
|
|
176
191
|
-------
|
177
192
|
all_neighborhoods_df: pd.DataFrame
|
178
193
|
A table containing all species in each query s_ids neighborhood
|
179
|
-
|
194
|
+
neighborhood_dicts: dict
|
180
195
|
Outputs from find_and_prune_neighborhoods for each s_id
|
181
196
|
|
182
197
|
"""
|
@@ -190,16 +205,16 @@ def load_neighborhoods(
|
|
190
205
|
neighborhood_paths = [vertices_path, networks_path]
|
191
206
|
|
192
207
|
if all([os.path.isfile(x) for x in neighborhood_paths]) and overwrite is False:
|
193
|
-
|
208
|
+
logger.info(f"loading existing neighborhoods for {neighborhood_prefix}")
|
194
209
|
|
195
210
|
all_neighborhoods_df = pd.read_csv(vertices_path, sep="\t")
|
196
211
|
with open(networks_path, "rb") as in_file:
|
197
|
-
|
212
|
+
neighborhood_dicts = pickle.load(in_file)
|
198
213
|
|
199
214
|
else:
|
200
|
-
|
215
|
+
logger.info(f"creating neighborhoods based on {neighborhood_prefix}")
|
201
216
|
|
202
|
-
all_neighborhoods_df,
|
217
|
+
all_neighborhoods_df, neighborhood_dicts = create_neighborhoods(
|
203
218
|
s_ids=s_ids,
|
204
219
|
sbml_dfs=sbml_dfs,
|
205
220
|
napistu_graph=napistu_graph,
|
@@ -214,9 +229,9 @@ def load_neighborhoods(
|
|
214
229
|
|
215
230
|
# pickle neighborhoods
|
216
231
|
with open(networks_path, "wb") as fh:
|
217
|
-
pickle.dump(
|
232
|
+
pickle.dump(neighborhood_dicts, fh)
|
218
233
|
|
219
|
-
return all_neighborhoods_df,
|
234
|
+
return all_neighborhoods_df, neighborhood_dicts
|
220
235
|
|
221
236
|
|
222
237
|
def create_neighborhoods(
|
@@ -254,7 +269,7 @@ def create_neighborhoods(
|
|
254
269
|
-------
|
255
270
|
all_neighborhoods_df: pd.DataFrame
|
256
271
|
A table containing all species in each query s_ids neighborhood
|
257
|
-
|
272
|
+
neighborhood_dicts: dict
|
258
273
|
Outputs from find_and_prune_neighborhoods for each s_id
|
259
274
|
"""
|
260
275
|
|
@@ -275,13 +290,13 @@ def create_neighborhoods(
|
|
275
290
|
raise TypeError(f"top_n was a {type(top_n)} and must be an int")
|
276
291
|
|
277
292
|
neighborhoods_list = list()
|
278
|
-
|
293
|
+
neighborhood_dicts = dict()
|
279
294
|
for s_id in s_ids:
|
280
295
|
query_sc_species = ng_utils.compartmentalize_species(sbml_dfs, s_id)
|
281
296
|
|
282
297
|
compartmentalized_species = query_sc_species[SBML_DFS.SC_ID].tolist()
|
283
298
|
|
284
|
-
|
299
|
+
neighborhood_dicts = find_and_prune_neighborhoods(
|
285
300
|
sbml_dfs,
|
286
301
|
napistu_graph,
|
287
302
|
compartmentalized_species=compartmentalized_species,
|
@@ -295,25 +310,25 @@ def create_neighborhoods(
|
|
295
310
|
|
296
311
|
neighborhood_entities = pd.concat(
|
297
312
|
[
|
298
|
-
|
313
|
+
neighborhood_dicts[sc_id][NEIGHBORHOOD_DICT_KEYS.VERTICES].assign(
|
299
314
|
focal_sc_id=sc_id
|
300
315
|
)
|
301
|
-
for sc_id in
|
316
|
+
for sc_id in neighborhood_dicts.keys()
|
302
317
|
]
|
303
318
|
).assign(focal_s_id=s_id)
|
304
319
|
|
305
320
|
neighborhood_species = neighborhood_entities.merge(
|
306
321
|
sbml_dfs.compartmentalized_species[SBML_DFS.S_ID],
|
307
|
-
left_on=
|
322
|
+
left_on=NAPISTU_GRAPH_VERTICES.NAME,
|
308
323
|
right_index=True,
|
309
324
|
)
|
310
325
|
|
311
326
|
neighborhoods_list.append(neighborhood_species)
|
312
|
-
|
327
|
+
neighborhood_dicts[s_id] = neighborhood_dicts
|
313
328
|
|
314
329
|
all_neighborhoods_df = pd.concat(neighborhoods_list).reset_index(drop=True)
|
315
330
|
|
316
|
-
return all_neighborhoods_df,
|
331
|
+
return all_neighborhoods_df, neighborhood_dicts
|
317
332
|
|
318
333
|
|
319
334
|
def create_neighborhood_prefix(network_type: str, order: int, top_n: int) -> str:
|
@@ -383,7 +398,7 @@ def load_neighborhoods_by_partition(
|
|
383
398
|
if parition_sids_df.shape[0] == 0:
|
384
399
|
raise ValueError(f"No s_ids associated with partition {selected_partition}")
|
385
400
|
|
386
|
-
parition_sids = parition_sids_df[
|
401
|
+
parition_sids = parition_sids_df[SBML_DFS.S_ID].tolist()
|
387
402
|
|
388
403
|
# read pathway and network data
|
389
404
|
|
@@ -443,7 +458,7 @@ def read_paritioned_neighborhoods(
|
|
443
458
|
-------
|
444
459
|
all_neighborhoods_df: pd.DataFrame
|
445
460
|
A table containing all species in each query s_ids neighborhood
|
446
|
-
|
461
|
+
neighborhood_dicts: dict
|
447
462
|
Outputs from find_and_prune_neighborhoods for each s_id
|
448
463
|
|
449
464
|
"""
|
@@ -508,7 +523,7 @@ def read_paritioned_neighborhoods(
|
|
508
523
|
|
509
524
|
# combine all partitions' dfs and dicts
|
510
525
|
all_neighborhoods_df = pd.concat(neighborhood_paths_list).reset_index(drop=True)
|
511
|
-
|
526
|
+
neighborhood_dicts = dict(ChainMap(*path_dict_list))
|
512
527
|
|
513
528
|
# TO DO - remove s_id duplication (these are present in the vertices table in the partition outputs)
|
514
529
|
if not all(all_neighborhoods_df["s_id_x"] == all_neighborhoods_df["s_id_y"]):
|
@@ -517,7 +532,7 @@ def read_paritioned_neighborhoods(
|
|
517
532
|
{"s_id_x": "s_id"}, axis=1
|
518
533
|
)
|
519
534
|
|
520
|
-
return all_neighborhoods_df,
|
535
|
+
return all_neighborhoods_df, neighborhood_dicts
|
521
536
|
|
522
537
|
|
523
538
|
def find_neighborhoods(
|
@@ -558,7 +573,7 @@ def find_neighborhoods(
|
|
558
573
|
the minimum size of a pathway to be considered
|
559
574
|
source_total_counts: pd.Series | None
|
560
575
|
Optional, A series of the total counts of each source. As produced by
|
561
|
-
source.get_source_total_counts()
|
576
|
+
source.get_source_total_counts()
|
562
577
|
verbose: bool
|
563
578
|
Extra reporting
|
564
579
|
|
@@ -579,6 +594,16 @@ def find_neighborhoods(
|
|
579
594
|
if not isinstance(order, int):
|
580
595
|
raise TypeError(f"order was a {type(order)} and must be an int")
|
581
596
|
|
597
|
+
invalid_cspecies = [
|
598
|
+
x
|
599
|
+
for x in compartmentalized_species
|
600
|
+
if x not in sbml_dfs.compartmentalized_species.index
|
601
|
+
]
|
602
|
+
if len(invalid_cspecies) > 0:
|
603
|
+
raise ValueError(
|
604
|
+
f"compartmentalized_species contains invalid species: {invalid_cspecies}"
|
605
|
+
)
|
606
|
+
|
582
607
|
# create a table which includes cspecies and reaction nearby each of the
|
583
608
|
# focal compartmentalized_speecies
|
584
609
|
neighborhood_df = _build_raw_neighborhood_df(
|
@@ -656,12 +681,12 @@ def create_neighborhood_dict_entry(
|
|
656
681
|
focal node.
|
657
682
|
"""
|
658
683
|
|
659
|
-
one_neighborhood_df = neighborhood_df[neighborhood_df[
|
684
|
+
one_neighborhood_df = neighborhood_df[neighborhood_df[SBML_DFS.SC_ID] == sc_id]
|
660
685
|
|
661
686
|
if verbose:
|
662
687
|
_create_neighborhood_dict_entry_logging(sc_id, one_neighborhood_df, sbml_dfs)
|
663
688
|
|
664
|
-
if not one_neighborhood_df[
|
689
|
+
if not one_neighborhood_df[NAPISTU_GRAPH_VERTICES.NAME].eq(sc_id).any():
|
665
690
|
raise ValueError(
|
666
691
|
f"The focal node sc_id = {sc_id} was not in 'one_neighborhood_df'.\
|
667
692
|
By convention it should be part of its neighborhood"
|
@@ -677,19 +702,22 @@ def create_neighborhood_dict_entry(
|
|
677
702
|
|
678
703
|
# add edge polarity: whether edges are activating, inhibiting or unknown
|
679
704
|
if edges.shape[0] > 0:
|
680
|
-
edges[
|
681
|
-
edges[
|
705
|
+
edges[NET_POLARITY.LINK_POLARITY] = (
|
706
|
+
edges[SBML_DFS.SBO_TERM]
|
707
|
+
.map(MINI_SBO_TO_NAME)
|
708
|
+
.map(MINI_SBO_NAME_TO_POLARITY)
|
682
709
|
)
|
683
710
|
|
684
711
|
try:
|
685
712
|
reaction_sources = ng_utils.get_minimal_sources_edges(
|
686
|
-
vertices.rename(columns={
|
713
|
+
vertices.rename(columns={NAPISTU_GRAPH_VERTICES.NAME: "node"}),
|
687
714
|
sbml_dfs,
|
688
715
|
min_pw_size=min_pw_size,
|
689
716
|
# optional, counts of sources across the whole model
|
690
717
|
source_total_counts=source_total_counts,
|
691
718
|
)
|
692
719
|
except Exception:
|
720
|
+
logger.warning(f"Could not get reaction sources for {sc_id}; returning None")
|
693
721
|
reaction_sources = None
|
694
722
|
|
695
723
|
# to add weights to the network solve the shortest path problem
|
@@ -697,75 +725,38 @@ def create_neighborhood_dict_entry(
|
|
697
725
|
# solve this problem separately whether a given neighbor is an
|
698
726
|
# ancestor or descendant
|
699
727
|
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
# igraph throws warnings for each pair of unconnected species
|
710
|
-
warnings.simplefilter("ignore")
|
711
|
-
|
712
|
-
neighborhood_paths = neighborhood_graph.get_shortest_paths(
|
713
|
-
# focal node
|
714
|
-
v=sc_id,
|
715
|
-
to=descendants_list,
|
716
|
-
weights="weights",
|
717
|
-
mode="out",
|
718
|
-
output="epath",
|
719
|
-
)
|
720
|
-
|
721
|
-
downstream_path_attrs, downstream_entity_dict = _calculate_path_attrs(
|
722
|
-
neighborhood_paths, edges, vertices=descendants_list, weight_var="weights"
|
723
|
-
)
|
724
|
-
downstream_path_attrs = downstream_path_attrs.assign(node_orientation="downstream")
|
725
|
-
|
726
|
-
# ancestors -> focal_node
|
727
|
-
|
728
|
-
one_ancestors_df = one_neighborhood_df[
|
729
|
-
one_neighborhood_df["relationship"] == "ancestors"
|
730
|
-
]
|
731
|
-
ancestors_list = list(set(one_ancestors_df["name"].tolist()).union({sc_id}))
|
732
|
-
|
733
|
-
with warnings.catch_warnings():
|
734
|
-
# igraph throws warnings for each pair of unconnected species
|
735
|
-
warnings.simplefilter("ignore")
|
736
|
-
|
737
|
-
neighborhood_paths = neighborhood_graph.get_shortest_paths(
|
738
|
-
v=sc_id,
|
739
|
-
to=ancestors_list,
|
740
|
-
weights="upstream_weights",
|
741
|
-
mode="in",
|
742
|
-
output="epath",
|
743
|
-
)
|
744
|
-
|
745
|
-
upstream_path_attrs, upstream_entity_dict = _calculate_path_attrs(
|
746
|
-
neighborhood_paths,
|
728
|
+
(
|
729
|
+
downstream_path_attrs,
|
730
|
+
downstream_entity_dict,
|
731
|
+
upstream_path_attrs,
|
732
|
+
upstream_entity_dict,
|
733
|
+
) = _find_neighbors_paths(
|
734
|
+
neighborhood_graph,
|
735
|
+
one_neighborhood_df,
|
736
|
+
sc_id,
|
747
737
|
edges,
|
748
|
-
vertices=ancestors_list,
|
749
|
-
weight_var="upstream_weights",
|
750
738
|
)
|
751
|
-
upstream_path_attrs = upstream_path_attrs.assign(node_orientation="upstream")
|
752
739
|
|
753
740
|
# combine upstream and downstream shortest paths
|
754
741
|
# in cases a node is upstream and downstream of the focal node
|
755
742
|
# by taking the lowest path weight
|
756
743
|
vertex_neighborhood_attrs = (
|
757
744
|
pd.concat([downstream_path_attrs, upstream_path_attrs])
|
758
|
-
.sort_values(
|
745
|
+
.sort_values(DISTANCES.PATH_WEIGHTS)
|
759
746
|
.groupby("neighbor")
|
760
747
|
.first()
|
761
748
|
)
|
762
749
|
# label the focal node
|
763
|
-
vertex_neighborhood_attrs.loc[sc_id, "node_orientation"] =
|
750
|
+
vertex_neighborhood_attrs.loc[sc_id, "node_orientation"] = GRAPH_RELATIONSHIPS.FOCAL
|
764
751
|
|
765
752
|
# if the precomputed distances, graph and/or sbml_dfs are inconsistent
|
766
753
|
# then the shortest paths search may just return empty lists
|
767
754
|
# throw a clearer error message in this case.
|
768
|
-
EXPECTED_VERTEX_ATTRS = {
|
755
|
+
EXPECTED_VERTEX_ATTRS = {
|
756
|
+
DISTANCES.FINAL_FROM,
|
757
|
+
DISTANCES.FINAL_TO,
|
758
|
+
NET_POLARITY.NET_POLARITY,
|
759
|
+
}
|
769
760
|
missing_vertex_attrs = EXPECTED_VERTEX_ATTRS.difference(
|
770
761
|
set(vertex_neighborhood_attrs.columns.tolist())
|
771
762
|
)
|
@@ -780,22 +771,22 @@ def create_neighborhood_dict_entry(
|
|
780
771
|
# add net_polarity to edges in addition to nodes
|
781
772
|
edges = edges.merge(
|
782
773
|
vertex_neighborhood_attrs.reset_index()[
|
783
|
-
[
|
774
|
+
[DISTANCES.FINAL_FROM, DISTANCES.FINAL_TO, NET_POLARITY.NET_POLARITY]
|
784
775
|
].dropna(),
|
785
|
-
left_on=[
|
786
|
-
right_on=[
|
776
|
+
left_on=[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO],
|
777
|
+
right_on=[DISTANCES.FINAL_FROM, DISTANCES.FINAL_TO],
|
787
778
|
how="left",
|
788
779
|
)
|
789
780
|
|
790
781
|
vertices = vertices.merge(
|
791
|
-
vertex_neighborhood_attrs, left_on=
|
782
|
+
vertex_neighborhood_attrs, left_on=NAPISTU_GRAPH_VERTICES.NAME, right_index=True
|
792
783
|
)
|
793
784
|
|
794
785
|
# drop nodes with a path length / weight of zero
|
795
786
|
# which are NOT the focal node
|
796
787
|
# these were cases where no path to/from the focal node to the query node was found
|
797
788
|
disconnected_neighbors = vertices.query(
|
798
|
-
"(not node_orientation == '
|
789
|
+
f"(not node_orientation == '{GRAPH_RELATIONSHIPS.FOCAL}') and {DISTANCES.PATH_WEIGHTS} == 0"
|
799
790
|
)
|
800
791
|
vertices = vertices[~vertices.index.isin(disconnected_neighbors.index.tolist())]
|
801
792
|
|
@@ -803,8 +794,8 @@ def create_neighborhood_dict_entry(
|
|
803
794
|
vertices = add_vertices_uri_urls(vertices, sbml_dfs)
|
804
795
|
|
805
796
|
neighborhood_path_entities = {
|
806
|
-
|
807
|
-
|
797
|
+
NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM: downstream_entity_dict,
|
798
|
+
NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM: upstream_entity_dict,
|
808
799
|
}
|
809
800
|
|
810
801
|
# update graph with additional vertex and edge attributes
|
@@ -812,16 +803,16 @@ def create_neighborhood_dict_entry(
|
|
812
803
|
vertices=vertices.to_dict("records"),
|
813
804
|
edges=edges.to_dict("records"),
|
814
805
|
directed=napistu_graph.is_directed(),
|
815
|
-
vertex_name_attr=
|
816
|
-
edge_foreign_keys=(
|
806
|
+
vertex_name_attr=NAPISTU_GRAPH_VERTICES.NAME,
|
807
|
+
edge_foreign_keys=(NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO),
|
817
808
|
)
|
818
809
|
|
819
810
|
outdict = {
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
811
|
+
NEIGHBORHOOD_DICT_KEYS.GRAPH: updated_napistu_graph,
|
812
|
+
NEIGHBORHOOD_DICT_KEYS.VERTICES: vertices,
|
813
|
+
NEIGHBORHOOD_DICT_KEYS.EDGES: edges,
|
814
|
+
NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES: reaction_sources,
|
815
|
+
NEIGHBORHOOD_DICT_KEYS.NEIGHBORHOOD_PATH_ENTITIES: neighborhood_path_entities,
|
825
816
|
}
|
826
817
|
|
827
818
|
return outdict
|
@@ -831,9 +822,11 @@ def _create_neighborhood_dict_entry_logging(
|
|
831
822
|
sc_id: str, one_neighborhood_df: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
|
832
823
|
):
|
833
824
|
df_summary = one_neighborhood_df.copy()
|
834
|
-
df_summary[
|
835
|
-
|
836
|
-
for x in df_summary[
|
825
|
+
df_summary[NAPISTU_GRAPH_VERTICES.NODE_TYPE] = [
|
826
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES if x else NAPISTU_GRAPH_NODE_TYPES.REACTION
|
827
|
+
for x in df_summary[NAPISTU_GRAPH_VERTICES.NAME].isin(
|
828
|
+
sbml_dfs.compartmentalized_species.index
|
829
|
+
)
|
837
830
|
]
|
838
831
|
relationship_counts = df_summary.value_counts(
|
839
832
|
["relationship", "node_type"]
|
@@ -857,22 +850,45 @@ def add_vertices_uri_urls(
|
|
857
850
|
vertices: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
|
858
851
|
) -> pd.DataFrame:
|
859
852
|
"""
|
860
|
-
Add
|
853
|
+
Add URI URLs to neighborhood vertices DataFrame.
|
861
854
|
|
862
|
-
|
855
|
+
This function enriches a vertices DataFrame with URI URLs for both species and
|
856
|
+
reactions. For species, it adds standard reference identifiers and Pharos IDs
|
857
|
+
where available. For reactions, it adds reaction-specific URI URLs.
|
863
858
|
|
864
859
|
Parameters
|
865
860
|
----------
|
866
861
|
vertices: pd.DataFrame
|
867
|
-
|
862
|
+
DataFrame containing neighborhood vertices with the following required columns:
|
863
|
+
- NAPISTU_GRAPH_VERTICES.NAME: The name/identifier of each vertex
|
864
|
+
- NAPISTU_GRAPH_VERTICES.NODE_TYPE: The type of node, either
|
865
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES or NAPISTU_GRAPH_NODE_TYPES.REACTION
|
868
866
|
sbml_dfs: sbml_dfs_core.SBML_dfs
|
869
|
-
|
867
|
+
Pathway model including species, compartmentalized species, reactions and ontologies
|
870
868
|
|
871
869
|
Returns
|
872
870
|
-------
|
873
|
-
|
874
|
-
|
871
|
+
pd.DataFrame
|
872
|
+
Input vertices DataFrame enriched with URI URL columns:
|
873
|
+
- For species: standard reference identifier URLs and Pharos IDs
|
874
|
+
- For reactions: reaction-specific URI URLs
|
875
|
+
- Empty strings for missing URLs
|
875
876
|
|
877
|
+
Raises
|
878
|
+
------
|
879
|
+
ValueError
|
880
|
+
If vertices DataFrame is empty (no rows)
|
881
|
+
TypeError
|
882
|
+
If the output is not a pandas DataFrame
|
883
|
+
ValueError
|
884
|
+
If the output row count doesn't match the input row count
|
885
|
+
|
886
|
+
Notes
|
887
|
+
-----
|
888
|
+
- Species vertices are merged with compartmentalized_species to get s_id mappings
|
889
|
+
- Reaction vertices are processed directly using their names
|
890
|
+
- Missing URLs are filled with empty strings
|
891
|
+
- The function preserves the original row order and count
|
876
892
|
"""
|
877
893
|
|
878
894
|
if vertices.shape[0] <= 0:
|
@@ -882,41 +898,53 @@ def add_vertices_uri_urls(
|
|
882
898
|
|
883
899
|
# add s_ids
|
884
900
|
neighborhood_species = vertices[
|
885
|
-
vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] ==
|
901
|
+
vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == NAPISTU_GRAPH_NODE_TYPES.SPECIES
|
886
902
|
].merge(
|
887
903
|
sbml_dfs.compartmentalized_species[SBML_DFS.S_ID],
|
888
|
-
left_on=
|
904
|
+
left_on=NAPISTU_GRAPH_VERTICES.NAME,
|
889
905
|
right_index=True,
|
890
906
|
how="left",
|
891
907
|
)
|
892
908
|
|
893
909
|
# add a standard reference identifier
|
894
910
|
neighborhood_species_aug = neighborhood_species.merge(
|
895
|
-
sbml_dfs.get_uri_urls(
|
911
|
+
sbml_dfs.get_uri_urls(
|
912
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES, neighborhood_species[SBML_DFS.S_ID]
|
913
|
+
),
|
896
914
|
left_on=SBML_DFS.S_ID,
|
897
915
|
right_index=True,
|
898
916
|
how="left",
|
899
917
|
# add pharos ids where available
|
900
918
|
).merge(
|
901
919
|
sbml_dfs.get_uri_urls(
|
902
|
-
|
903
|
-
|
920
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES,
|
921
|
+
neighborhood_species[SBML_DFS.S_ID],
|
922
|
+
required_ontology=ONTOLOGIES.PHAROS,
|
923
|
+
).rename(ONTOLOGIES.PHAROS),
|
904
924
|
left_on=SBML_DFS.S_ID,
|
905
925
|
right_index=True,
|
906
926
|
how="left",
|
907
927
|
)
|
908
928
|
|
909
|
-
if
|
929
|
+
if (
|
930
|
+
sum(
|
931
|
+
vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
932
|
+
== NAPISTU_GRAPH_NODE_TYPES.REACTION
|
933
|
+
)
|
934
|
+
> 0
|
935
|
+
):
|
910
936
|
neighborhood_reactions = vertices[
|
911
|
-
vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
937
|
+
vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
938
|
+
== NAPISTU_GRAPH_NODE_TYPES.REACTION
|
912
939
|
].merge(
|
913
940
|
sbml_dfs.get_uri_urls(
|
914
|
-
|
915
|
-
vertices[
|
916
|
-
|
917
|
-
|
941
|
+
SBML_DFS.REACTIONS,
|
942
|
+
vertices[
|
943
|
+
vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
944
|
+
== NAPISTU_GRAPH_NODE_TYPES.REACTION
|
945
|
+
][NAPISTU_GRAPH_VERTICES.NAME],
|
918
946
|
),
|
919
|
-
left_on=
|
947
|
+
left_on=NAPISTU_GRAPH_VERTICES.NAME,
|
920
948
|
right_index=True,
|
921
949
|
how="left",
|
922
950
|
)
|
@@ -965,7 +993,7 @@ def prune_neighborhoods(neighborhoods: dict, top_n: int = 100) -> dict:
|
|
965
993
|
if not isinstance(top_n, int):
|
966
994
|
raise TypeError(f"top_n was a {type(top_n)} and must be an int")
|
967
995
|
|
968
|
-
|
996
|
+
pruned_neighborhood_dicts = dict()
|
969
997
|
|
970
998
|
for an_sc_id in neighborhoods.keys():
|
971
999
|
one_neighborhood = neighborhoods[an_sc_id]
|
@@ -977,14 +1005,14 @@ def prune_neighborhoods(neighborhoods: dict, top_n: int = 100) -> dict:
|
|
977
1005
|
# reduce neighborhood to this set of high-weight vertices
|
978
1006
|
all_neighbors = pd.DataFrame(
|
979
1007
|
{
|
980
|
-
NAPISTU_GRAPH_VERTICES.
|
1008
|
+
NAPISTU_GRAPH_VERTICES.NAME: one_neighborhood[
|
981
1009
|
NEIGHBORHOOD_DICT_KEYS.GRAPH
|
982
|
-
].vs[NAPISTU_GRAPH_VERTICES.
|
1010
|
+
].vs[NAPISTU_GRAPH_VERTICES.NAME]
|
983
1011
|
}
|
984
1012
|
)
|
985
1013
|
pruned_vertices_indices = all_neighbors[
|
986
|
-
all_neighbors[NAPISTU_GRAPH_VERTICES.
|
987
|
-
pruned_vertices[NAPISTU_GRAPH_VERTICES.
|
1014
|
+
all_neighbors[NAPISTU_GRAPH_VERTICES.NAME].isin(
|
1015
|
+
pruned_vertices[NAPISTU_GRAPH_VERTICES.NAME]
|
988
1016
|
)
|
989
1017
|
].index.tolist()
|
990
1018
|
|
@@ -996,8 +1024,9 @@ def prune_neighborhoods(neighborhoods: dict, top_n: int = 100) -> dict:
|
|
996
1024
|
pruned_edges = pd.DataFrame([e.attributes() for e in pruned_neighborhood.es])
|
997
1025
|
|
998
1026
|
pruned_reactions = pruned_vertices[
|
999
|
-
pruned_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
1000
|
-
|
1027
|
+
pruned_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
1028
|
+
== NAPISTU_GRAPH_NODE_TYPES.REACTION
|
1029
|
+
][NAPISTU_GRAPH_VERTICES.NAME]
|
1001
1030
|
|
1002
1031
|
if pruned_reactions.shape[0] != 0:
|
1003
1032
|
if one_neighborhood[NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES] is None:
|
@@ -1018,14 +1047,14 @@ def prune_neighborhoods(neighborhoods: dict, top_n: int = 100) -> dict:
|
|
1018
1047
|
NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES
|
1019
1048
|
]
|
1020
1049
|
|
1021
|
-
|
1050
|
+
pruned_neighborhood_dicts[an_sc_id] = {
|
1022
1051
|
NEIGHBORHOOD_DICT_KEYS.GRAPH: pruned_neighborhood,
|
1023
1052
|
NEIGHBORHOOD_DICT_KEYS.VERTICES: pruned_vertices,
|
1024
1053
|
NEIGHBORHOOD_DICT_KEYS.EDGES: pruned_edges,
|
1025
1054
|
NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES: pruned_reaction_sources,
|
1026
1055
|
}
|
1027
1056
|
|
1028
|
-
return
|
1057
|
+
return pruned_neighborhood_dicts
|
1029
1058
|
|
1030
1059
|
|
1031
1060
|
def plot_neighborhood(
|
@@ -1065,8 +1094,8 @@ def plot_neighborhood(
|
|
1065
1094
|
"focal disease": "lime",
|
1066
1095
|
"disease": "aquamarine",
|
1067
1096
|
"focal": "lightcoral",
|
1068
|
-
|
1069
|
-
|
1097
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES: "firebrick",
|
1098
|
+
NAPISTU_GRAPH_NODE_TYPES.REACTION: "dodgerblue",
|
1070
1099
|
}
|
1071
1100
|
|
1072
1101
|
edge_polarity_colors = {
|
@@ -1173,7 +1202,7 @@ def _precompute_neighbors(
|
|
1173
1202
|
|
1174
1203
|
# filter by distance
|
1175
1204
|
close_cspecies_subset_precomputed_distances = cspecies_subset_precomputed_distances[
|
1176
|
-
cspecies_subset_precomputed_distances[
|
1205
|
+
cspecies_subset_precomputed_distances[DISTANCES.PATH_LENGTH] <= order
|
1177
1206
|
]
|
1178
1207
|
|
1179
1208
|
# filter to retain top_n
|
@@ -1183,12 +1212,12 @@ def _precompute_neighbors(
|
|
1183
1212
|
]:
|
1184
1213
|
top_descendants = (
|
1185
1214
|
close_cspecies_subset_precomputed_distances[
|
1186
|
-
close_cspecies_subset_precomputed_distances[
|
1187
|
-
|
1188
|
-
)
|
1215
|
+
close_cspecies_subset_precomputed_distances[
|
1216
|
+
DISTANCES.SC_ID_ORIGIN
|
1217
|
+
].isin(compartmentalized_species)
|
1189
1218
|
]
|
1190
1219
|
# sort by path_weight so we can retain the lowest weight neighbors
|
1191
|
-
.sort_values(
|
1220
|
+
.sort_values(DISTANCES.PATH_WEIGHTS)
|
1192
1221
|
.groupby(NAPISTU_EDGELIST.SC_ID_ORIGIN)
|
1193
1222
|
.head(top_n)
|
1194
1223
|
)
|
@@ -1216,7 +1245,7 @@ def _precompute_neighbors(
|
|
1216
1245
|
# the logic is flipped if we are looking for ancestors where
|
1217
1246
|
# we penalize based on the number of parents of a node when
|
1218
1247
|
# we use it (i.e., the default upstream_weights).
|
1219
|
-
.sort_values(
|
1248
|
+
.sort_values(DISTANCES.PATH_UPSTREAM_WEIGHTS)
|
1220
1249
|
.groupby(NAPISTU_EDGELIST.SC_ID_DEST)
|
1221
1250
|
.head(top_n)
|
1222
1251
|
)
|
@@ -1233,7 +1262,7 @@ def _precompute_neighbors(
|
|
1233
1262
|
precomputed_neighbors=top_descendants,
|
1234
1263
|
compartmentalized_species=compartmentalized_species,
|
1235
1264
|
sbml_dfs=sbml_dfs,
|
1236
|
-
relationship=
|
1265
|
+
relationship=GRAPH_RELATIONSHIPS.DESCENDANTS,
|
1237
1266
|
)
|
1238
1267
|
|
1239
1268
|
if downstream_reactions is not None:
|
@@ -1247,7 +1276,7 @@ def _precompute_neighbors(
|
|
1247
1276
|
precomputed_neighbors=top_ancestors,
|
1248
1277
|
compartmentalized_species=compartmentalized_species,
|
1249
1278
|
sbml_dfs=sbml_dfs,
|
1250
|
-
relationship=
|
1279
|
+
relationship=GRAPH_RELATIONSHIPS.ANCESTORS,
|
1251
1280
|
)
|
1252
1281
|
|
1253
1282
|
if upstream_reactions is not None:
|
@@ -1313,7 +1342,7 @@ def _build_raw_neighborhood_df(
|
|
1313
1342
|
descendants_df = _find_neighbors(
|
1314
1343
|
napistu_graph=napistu_graph,
|
1315
1344
|
compartmentalized_species=compartmentalized_species,
|
1316
|
-
relationship=
|
1345
|
+
relationship=GRAPH_RELATIONSHIPS.DESCENDANTS,
|
1317
1346
|
order=order,
|
1318
1347
|
precomputed_neighbors=precomputed_neighbors,
|
1319
1348
|
)
|
@@ -1326,7 +1355,7 @@ def _build_raw_neighborhood_df(
|
|
1326
1355
|
ancestors_df = _find_neighbors(
|
1327
1356
|
napistu_graph=napistu_graph,
|
1328
1357
|
compartmentalized_species=compartmentalized_species,
|
1329
|
-
relationship=
|
1358
|
+
relationship=GRAPH_RELATIONSHIPS.ANCESTORS,
|
1330
1359
|
order=order,
|
1331
1360
|
precomputed_neighbors=precomputed_neighbors,
|
1332
1361
|
)
|
@@ -1342,8 +1371,9 @@ def _build_raw_neighborhood_df(
|
|
1342
1371
|
raise NotImplementedError("invalid network_type")
|
1343
1372
|
|
1344
1373
|
# add name since this is an easy way to lookup igraph vertices
|
1345
|
-
neighborhood_df[
|
1346
|
-
x[
|
1374
|
+
neighborhood_df[NAPISTU_GRAPH_VERTICES.NAME] = [
|
1375
|
+
x[NAPISTU_GRAPH_VERTICES.NAME]
|
1376
|
+
for x in napistu_graph.vs[neighborhood_df["neighbor"]]
|
1347
1377
|
]
|
1348
1378
|
|
1349
1379
|
return neighborhood_df
|
@@ -1369,15 +1399,21 @@ def _find_neighbors(
|
|
1369
1399
|
if isinstance(precomputed_neighbors, pd.DataFrame):
|
1370
1400
|
# add graph indices to neighbors
|
1371
1401
|
nodes_to_names = (
|
1372
|
-
pd.DataFrame(
|
1402
|
+
pd.DataFrame(
|
1403
|
+
{
|
1404
|
+
NAPISTU_GRAPH_VERTICES.NAME: napistu_graph.vs[
|
1405
|
+
NAPISTU_GRAPH_VERTICES.NAME
|
1406
|
+
]
|
1407
|
+
}
|
1408
|
+
)
|
1373
1409
|
.reset_index()
|
1374
1410
|
.rename({"index": "neighbor"}, axis=1)
|
1375
1411
|
)
|
1376
1412
|
|
1377
|
-
if relationship ==
|
1413
|
+
if relationship == GRAPH_RELATIONSHIPS.DESCENDANTS:
|
1378
1414
|
bait_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
|
1379
1415
|
target_id = NAPISTU_EDGELIST.SC_ID_DEST
|
1380
|
-
elif relationship ==
|
1416
|
+
elif relationship == GRAPH_RELATIONSHIPS.ANCESTORS:
|
1381
1417
|
bait_id = NAPISTU_EDGELIST.SC_ID_DEST
|
1382
1418
|
target_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
|
1383
1419
|
else:
|
@@ -1389,15 +1425,17 @@ def _find_neighbors(
|
|
1389
1425
|
precomputed_neighbors[
|
1390
1426
|
precomputed_neighbors[bait_id].isin(compartmentalized_species)
|
1391
1427
|
]
|
1392
|
-
.merge(
|
1428
|
+
.merge(
|
1429
|
+
nodes_to_names.rename({NAPISTU_GRAPH_VERTICES.NAME: target_id}, axis=1)
|
1430
|
+
)
|
1393
1431
|
.rename({bait_id: SBML_DFS.SC_ID}, axis=1)
|
1394
1432
|
.drop([target_id], axis=1)
|
1395
1433
|
.assign(relationship=relationship)
|
1396
1434
|
)
|
1397
1435
|
else:
|
1398
|
-
if relationship ==
|
1436
|
+
if relationship == GRAPH_RELATIONSHIPS.DESCENDANTS:
|
1399
1437
|
mode_type = "out"
|
1400
|
-
elif relationship ==
|
1438
|
+
elif relationship == GRAPH_RELATIONSHIPS.ANCESTORS:
|
1401
1439
|
mode_type = "in"
|
1402
1440
|
else:
|
1403
1441
|
raise ValueError(
|
@@ -1443,10 +1481,10 @@ def _find_reactions_by_relationship(
|
|
1443
1481
|
if precomputed_neighbors.shape[0] == 0:
|
1444
1482
|
return None
|
1445
1483
|
|
1446
|
-
if relationship ==
|
1484
|
+
if relationship == GRAPH_RELATIONSHIPS.DESCENDANTS:
|
1447
1485
|
bait_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
|
1448
1486
|
target_id = NAPISTU_EDGELIST.SC_ID_DEST
|
1449
|
-
elif relationship ==
|
1487
|
+
elif relationship == GRAPH_RELATIONSHIPS.ANCESTORS:
|
1450
1488
|
bait_id = NAPISTU_EDGELIST.SC_ID_DEST
|
1451
1489
|
target_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
|
1452
1490
|
else:
|
@@ -1525,10 +1563,11 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
|
|
1525
1563
|
|
1526
1564
|
"""
|
1527
1565
|
|
1528
|
-
neighborhood_vertices = one_neighborhood[
|
1566
|
+
neighborhood_vertices = one_neighborhood[NEIGHBORHOOD_DICT_KEYS.VERTICES]
|
1529
1567
|
|
1530
1568
|
indexed_neighborhood_species = neighborhood_vertices[
|
1531
|
-
neighborhood_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
1569
|
+
neighborhood_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
1570
|
+
== NAPISTU_GRAPH_NODE_TYPES.SPECIES
|
1532
1571
|
].set_index("node_orientation")
|
1533
1572
|
|
1534
1573
|
pruned_oriented_neighbors = list()
|
@@ -1538,14 +1577,14 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
|
|
1538
1577
|
# handle cases where only one entry exists to DF->series coercion occurs
|
1539
1578
|
vertex_subset = vertex_subset.to_frame().T
|
1540
1579
|
|
1541
|
-
sorted_vertex_set = vertex_subset.sort_values(
|
1542
|
-
weight_cutoff = sorted_vertex_set[
|
1580
|
+
sorted_vertex_set = vertex_subset.sort_values(DISTANCES.PATH_WEIGHTS)
|
1581
|
+
weight_cutoff = sorted_vertex_set[DISTANCES.PATH_WEIGHTS].iloc[
|
1543
1582
|
min(top_n - 1, sorted_vertex_set.shape[0] - 1)
|
1544
1583
|
]
|
1545
1584
|
|
1546
1585
|
top_neighbors = sorted_vertex_set[
|
1547
|
-
sorted_vertex_set[
|
1548
|
-
][
|
1586
|
+
sorted_vertex_set[DISTANCES.PATH_WEIGHTS] <= weight_cutoff
|
1587
|
+
][NAPISTU_GRAPH_VERTICES.NAME].tolist()
|
1549
1588
|
|
1550
1589
|
# include reactions and other species necessary to reach the top neighbors
|
1551
1590
|
# by pulling in the past solutions to weighted shortest paths problems
|
@@ -1564,7 +1603,7 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
|
|
1564
1603
|
# combine all neighbors
|
1565
1604
|
pruned_neighbors = set().union(*pruned_oriented_neighbors)
|
1566
1605
|
pruned_vertices = neighborhood_vertices[
|
1567
|
-
neighborhood_vertices[
|
1606
|
+
neighborhood_vertices[NAPISTU_GRAPH_VERTICES.NAME].isin(pruned_neighbors)
|
1568
1607
|
].reset_index(drop=True)
|
1569
1608
|
|
1570
1609
|
return pruned_vertices
|
@@ -1574,7 +1613,7 @@ def _calculate_path_attrs(
|
|
1574
1613
|
neighborhood_paths: list[list],
|
1575
1614
|
edges: pd.DataFrame,
|
1576
1615
|
vertices: list,
|
1577
|
-
weight_var: str =
|
1616
|
+
weight_var: str = NAPISTU_GRAPH_EDGES.WEIGHTS,
|
1578
1617
|
) -> tuple[pd.DataFrame, dict[Any, set]]:
|
1579
1618
|
"""
|
1580
1619
|
Calculate Path Attributes
|
@@ -1624,15 +1663,15 @@ def _calculate_path_attrs(
|
|
1624
1663
|
# if all_path_edges.ngroups > 0:
|
1625
1664
|
path_attributes_df = pd.concat(
|
1626
1665
|
[
|
1627
|
-
all_path_edges[weight_var].agg("sum").rename(
|
1628
|
-
all_path_edges.agg("size").rename(
|
1629
|
-
all_path_edges[
|
1666
|
+
all_path_edges[weight_var].agg("sum").rename(DISTANCES.PATH_WEIGHTS),
|
1667
|
+
all_path_edges.agg("size").rename(DISTANCES.PATH_LENGTH),
|
1668
|
+
all_path_edges[NET_POLARITY.LINK_POLARITY]
|
1630
1669
|
.agg(paths._terminal_net_polarity)
|
1631
|
-
.rename(
|
1670
|
+
.rename(NET_POLARITY.NET_POLARITY),
|
1632
1671
|
# add the final edge since this can be used to add path attributes to edges
|
1633
1672
|
# i.e., apply net_polarity to an edge
|
1634
|
-
all_path_edges["from"].agg("last").rename(
|
1635
|
-
all_path_edges["to"].agg("last").rename(
|
1673
|
+
all_path_edges["from"].agg("last").rename(DISTANCES.FINAL_FROM),
|
1674
|
+
all_path_edges["to"].agg("last").rename(DISTANCES.FINAL_TO),
|
1636
1675
|
],
|
1637
1676
|
axis=1,
|
1638
1677
|
).reset_index()
|
@@ -1655,7 +1694,11 @@ def _calculate_path_attrs(
|
|
1655
1694
|
if len(neighborhood_paths[i]) == 0
|
1656
1695
|
]
|
1657
1696
|
edgeles_nodes_df = pd.DataFrame({"neighbor": edgeless_nodes}).assign(
|
1658
|
-
|
1697
|
+
**{
|
1698
|
+
DISTANCES.PATH_LENGTH: 0,
|
1699
|
+
DISTANCES.PATH_WEIGHTS: 0,
|
1700
|
+
NET_POLARITY.NET_POLARITY: None,
|
1701
|
+
}
|
1659
1702
|
)
|
1660
1703
|
|
1661
1704
|
# add edgeless entries as entries in the two outputs
|
@@ -1672,3 +1715,118 @@ def _calculate_path_attrs(
|
|
1672
1715
|
)
|
1673
1716
|
|
1674
1717
|
return path_attributes_df, neighborhood_path_entities
|
1718
|
+
|
1719
|
+
|
1720
|
+
def _find_neighbors_paths(
|
1721
|
+
neighborhood_graph: ig.Graph,
|
1722
|
+
one_neighborhood_df: pd.DataFrame,
|
1723
|
+
sc_id: str,
|
1724
|
+
edges: pd.DataFrame,
|
1725
|
+
) -> tuple[pd.DataFrame, dict[Any, set], pd.DataFrame, dict[Any, set]]:
|
1726
|
+
"""
|
1727
|
+
Find shortest paths between the focal node and its neighbors in both directions.
|
1728
|
+
|
1729
|
+
This function calculates shortest paths from the focal node to its descendants
|
1730
|
+
(downstream) and ancestors (upstream) using igraph's shortest path algorithms.
|
1731
|
+
It uses _calculate_path_attrs to compute path attributes including path weights,
|
1732
|
+
lengths, and polarity information.
|
1733
|
+
|
1734
|
+
Parameters
|
1735
|
+
----------
|
1736
|
+
neighborhood_graph: ig.Graph
|
1737
|
+
The igraph Graph object representing the neighborhood network
|
1738
|
+
one_neighborhood_df: pd.DataFrame
|
1739
|
+
DataFrame containing neighborhood information with 'relationship' column
|
1740
|
+
indicating 'descendants' or 'ancestors' for each node
|
1741
|
+
sc_id: str
|
1742
|
+
The compartmentalized species ID of the focal node
|
1743
|
+
edges: pd.DataFrame
|
1744
|
+
DataFrame containing edge information with columns for 'from', 'to',
|
1745
|
+
weights, and link polarity
|
1746
|
+
|
1747
|
+
Returns
|
1748
|
+
-------
|
1749
|
+
downstream_path_attrs: pd.DataFrame
|
1750
|
+
DataFrame containing path attributes for downstream paths from focal node
|
1751
|
+
to descendants. Includes columns: neighbor, path_weight, path_length,
|
1752
|
+
net_polarity, final_from, final_to, node_orientation
|
1753
|
+
downstream_entity_dict: dict[Any, set]
|
1754
|
+
Dictionary mapping each descendant neighbor to the set of entities
|
1755
|
+
(nodes) connecting it to the focal node
|
1756
|
+
upstream_path_attrs: pd.DataFrame
|
1757
|
+
DataFrame containing path attributes for upstream paths from focal node
|
1758
|
+
to ancestors. Includes columns: neighbor, path_weight, path_length,
|
1759
|
+
net_polarity, final_from, final_to, node_orientation
|
1760
|
+
upstream_entity_dict: dict[Any, set]
|
1761
|
+
Dictionary mapping each ancestor neighbor to the set of entities
|
1762
|
+
(nodes) connecting it to the focal node
|
1763
|
+
"""
|
1764
|
+
|
1765
|
+
one_descendants_df = one_neighborhood_df[
|
1766
|
+
one_neighborhood_df["relationship"] == GRAPH_RELATIONSHIPS.DESCENDANTS
|
1767
|
+
]
|
1768
|
+
descendants_list = list(
|
1769
|
+
set(one_descendants_df[NAPISTU_GRAPH_VERTICES.NAME].tolist()).union({sc_id})
|
1770
|
+
)
|
1771
|
+
|
1772
|
+
# hide warnings which are mostly just Dijkstra complaining about not finding neighbors
|
1773
|
+
with warnings.catch_warnings():
|
1774
|
+
# igraph throws warnings for each pair of unconnected species
|
1775
|
+
warnings.simplefilter("ignore")
|
1776
|
+
|
1777
|
+
neighborhood_paths = neighborhood_graph.get_shortest_paths(
|
1778
|
+
# focal node
|
1779
|
+
v=sc_id,
|
1780
|
+
to=descendants_list,
|
1781
|
+
weights=NAPISTU_GRAPH_EDGES.WEIGHTS,
|
1782
|
+
mode="out",
|
1783
|
+
output="epath",
|
1784
|
+
)
|
1785
|
+
|
1786
|
+
downstream_path_attrs, downstream_entity_dict = _calculate_path_attrs(
|
1787
|
+
neighborhood_paths,
|
1788
|
+
edges,
|
1789
|
+
vertices=descendants_list,
|
1790
|
+
weight_var=NAPISTU_GRAPH_EDGES.WEIGHTS,
|
1791
|
+
)
|
1792
|
+
downstream_path_attrs = downstream_path_attrs.assign(
|
1793
|
+
node_orientation=NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM
|
1794
|
+
)
|
1795
|
+
|
1796
|
+
# ancestors -> focal_node
|
1797
|
+
|
1798
|
+
one_ancestors_df = one_neighborhood_df[
|
1799
|
+
one_neighborhood_df["relationship"] == GRAPH_RELATIONSHIPS.ANCESTORS
|
1800
|
+
]
|
1801
|
+
ancestors_list = list(
|
1802
|
+
set(one_ancestors_df[NAPISTU_GRAPH_VERTICES.NAME].tolist()).union({sc_id})
|
1803
|
+
)
|
1804
|
+
|
1805
|
+
with warnings.catch_warnings():
|
1806
|
+
# igraph throws warnings for each pair of unconnected species
|
1807
|
+
warnings.simplefilter("ignore")
|
1808
|
+
|
1809
|
+
neighborhood_paths = neighborhood_graph.get_shortest_paths(
|
1810
|
+
v=sc_id,
|
1811
|
+
to=ancestors_list,
|
1812
|
+
weights=NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS,
|
1813
|
+
mode="in",
|
1814
|
+
output="epath",
|
1815
|
+
)
|
1816
|
+
|
1817
|
+
upstream_path_attrs, upstream_entity_dict = _calculate_path_attrs(
|
1818
|
+
neighborhood_paths,
|
1819
|
+
edges,
|
1820
|
+
vertices=ancestors_list,
|
1821
|
+
weight_var=NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS,
|
1822
|
+
)
|
1823
|
+
upstream_path_attrs = upstream_path_attrs.assign(
|
1824
|
+
node_orientation=NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM
|
1825
|
+
)
|
1826
|
+
|
1827
|
+
return (
|
1828
|
+
downstream_path_attrs,
|
1829
|
+
downstream_entity_dict,
|
1830
|
+
upstream_path_attrs,
|
1831
|
+
upstream_entity_dict,
|
1832
|
+
)
|
napistu/network/ng_utils.py
CHANGED
@@ -23,7 +23,7 @@ from napistu.constants import SBML_DFS
|
|
23
23
|
from napistu.constants import SOURCE_SPEC
|
24
24
|
from napistu.identifiers import _validate_assets_sbml_ids
|
25
25
|
from napistu.network.constants import GRAPH_WIRING_APPROACHES
|
26
|
-
from napistu.network.constants import
|
26
|
+
from napistu.network.constants import GRAPH_DIRECTEDNESS
|
27
27
|
|
28
28
|
logger = logging.getLogger(__name__)
|
29
29
|
|
@@ -406,9 +406,9 @@ def _create_network_save_string(
|
|
406
406
|
model_prefix: str, outdir: str, directed: bool, wiring_approach: str
|
407
407
|
) -> str:
|
408
408
|
if directed:
|
409
|
-
directed_str =
|
409
|
+
directed_str = GRAPH_DIRECTEDNESS.DIRECTED
|
410
410
|
else:
|
411
|
-
directed_str =
|
411
|
+
directed_str = GRAPH_DIRECTEDNESS.UNDIRECTED
|
412
412
|
|
413
413
|
export_pkl_path = os.path.join(
|
414
414
|
outdir,
|
@@ -59,15 +59,15 @@ napistu/modify/gaps.py,sha256=CV-bdSfanhrnCIFVWfNuQJbtjvj4hsEwheKYR-Z3tNA,26844
|
|
59
59
|
napistu/modify/pathwayannot.py,sha256=xuBSMDFWbg_d6-Gzv0Td3Q5nnFTa-Qzic48g1b1AZtQ,48081
|
60
60
|
napistu/modify/uncompartmentalize.py,sha256=y5LkXn5x6u80dB_McfAIh88BxZGIAVFLujkP7sPNRh0,9690
|
61
61
|
napistu/network/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
62
|
-
napistu/network/constants.py,sha256=
|
62
|
+
napistu/network/constants.py,sha256=zQkBTeZ2_K_rId6IUvKKTv9chx_i6K5B8vPZ19-LkNQ,7782
|
63
63
|
napistu/network/data_handling.py,sha256=KncrAKjXI3169BgVE-SnY8FkpVF60JnUwfMHtbqvsTc,14725
|
64
64
|
napistu/network/ig_utils.py,sha256=MuyEyOVtSHndil6QuuRCimBZrJ2jTaF5qQESgYlu02M,17042
|
65
|
-
napistu/network/neighborhoods.py,sha256=
|
65
|
+
napistu/network/neighborhoods.py,sha256=RNqaz91MTxuU4jNfrGdEcnm6rCykgdtHDPu2abAE-Kg,65516
|
66
66
|
napistu/network/net_create.py,sha256=66kV_xoWnu4BVLaJZ1TAC7wBSsjPDqjoAXH-X9ShV3s,59091
|
67
67
|
napistu/network/net_create_utils.py,sha256=zajwaz2xAij_9fEnD77SgBw_EnNAnJ8jBCmmK2rk_bA,24672
|
68
68
|
napistu/network/net_propagation.py,sha256=Il5nDOWh3nLz8gRhDFHGp2LxcvJ9C1twiSZjDeiZMUo,23490
|
69
69
|
napistu/network/ng_core.py,sha256=dGnTUKR4WtnvaYMyIHqqF55FY4mJSa7wjA2LZ4cVB6U,11720
|
70
|
-
napistu/network/ng_utils.py,sha256=
|
70
|
+
napistu/network/ng_utils.py,sha256=LX9DzMnz0AQMhJGUh3r8bg4dyEgWs_tym1Olu1FwlbQ,16070
|
71
71
|
napistu/network/paths.py,sha256=ZnIqwBIsgz4C4TLyg3c_pCO5zZ97gmCNepDmq2QNEQc,18020
|
72
72
|
napistu/network/precompute.py,sha256=ARU2tktWnxFISaHAY8chpkg8pusZPv7TT5jSIB9eFF0,10081
|
73
73
|
napistu/ontologies/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
@@ -87,7 +87,7 @@ napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27
|
|
87
87
|
napistu/statistics/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
88
88
|
napistu/statistics/hypothesis_testing.py,sha256=k0mBFAMF0XHVcKwS26aPnEbq_FIUVwXU1gZ6cKfFbCk,2190
|
89
89
|
napistu/statistics/quantiles.py,sha256=1-LnmVzC2CQWxCKUh0yi6YfKrbsZM1-kkD7nu2-aS5s,3042
|
90
|
-
napistu-0.4.
|
90
|
+
napistu-0.4.7.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
|
91
91
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
92
92
|
tests/conftest.py,sha256=Rw0KtnVyykZhRjnlmNu4oV47lNIeYUJVHu4y47RnVq0,9990
|
93
93
|
tests/test_consensus.py,sha256=Hzfrgp4SpkRDnEMVMD3f0UInSycndB8kKzC4wDDvRas,15076
|
@@ -109,14 +109,14 @@ tests/test_mcp_documentation_utils.py,sha256=OW0N2N_2IOktbYTcCWhhWz4bANi8IB60l1q
|
|
109
109
|
tests/test_mcp_server.py,sha256=bP3PWVQsEfX6-lAgXKP32njdg__o65n2WuLvkxTTHkQ,11215
|
110
110
|
tests/test_network_data_handling.py,sha256=4aS8z2AlKkVd-JhK4BQ8fjeiW8_bJ1hZ3cc71Jh7Glk,12716
|
111
111
|
tests/test_network_ig_utils.py,sha256=XihmEpX890sr-LYmsb_t4aN0sKIDWCnXkTpDhpuTDmw,7199
|
112
|
-
tests/test_network_neighborhoods.py,sha256=
|
112
|
+
tests/test_network_neighborhoods.py,sha256=3k0d-Pk_rWtGwxTg-Jpjv3CsVSj4qMn71MEHEKcqHII,8746
|
113
113
|
tests/test_network_net_create.py,sha256=L0U91b4jVHDuC3DFo-_BUFVuv4GuSxZuLAo7r-7EJxY,12877
|
114
114
|
tests/test_network_net_create_utils.py,sha256=0J6KIh2HBc4koFsvwMaul1QRtj5x92kR9HBdDZajnAw,18971
|
115
115
|
tests/test_network_net_propagation.py,sha256=kZeDHD93iMrLVvxO4OyfRH5_vgsYeQyC40OI9Dsb0xY,14999
|
116
116
|
tests/test_network_ng_core.py,sha256=w-iNBTtenennJhaLFauk952pEsk7W0-Fa8lPvIRqHyY,628
|
117
117
|
tests/test_network_ng_utils.py,sha256=QVVuRnvCRfTSIlGdwQTIF9lr0wOwoc5gGeXAUY_AdgE,713
|
118
118
|
tests/test_network_paths.py,sha256=Bx1uqyIAPw_i27s94dyjCQcV_04O9yovlrQgr1lFjS4,2143
|
119
|
-
tests/test_network_precompute.py,sha256=
|
119
|
+
tests/test_network_precompute.py,sha256=MhmT6LQF-JcMR9YR78W-cLHbkQnp8ro-MHJ5yvMVhJE,10409
|
120
120
|
tests/test_ontologies_genodexito.py,sha256=6fINyUiubHZqu7qxye09DQfJXw28ZMAJc3clPb-cCoY,2298
|
121
121
|
tests/test_ontologies_id_tables.py,sha256=CpwpbmQvTc1BaVd6jbDKHAVE2etwN0vx93nC8jpnMlE,7265
|
122
122
|
tests/test_ontologies_mygene.py,sha256=VkdRcKIWmcG6V-2dpfvsBiOJN5dO-j0RqZNxtJRcyBU,1583
|
@@ -136,8 +136,8 @@ tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRa
|
|
136
136
|
tests/test_utils.py,sha256=qPSpV-Q9b6vmdycgaDmQqtcvzKnAVnN9j5xJ9x-T6bg,23959
|
137
137
|
tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
|
138
138
|
tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
139
|
-
napistu-0.4.
|
140
|
-
napistu-0.4.
|
141
|
-
napistu-0.4.
|
142
|
-
napistu-0.4.
|
143
|
-
napistu-0.4.
|
139
|
+
napistu-0.4.7.dist-info/METADATA,sha256=PgIDsBflFe6QmORKY6hfoEI9_Qqrpwa7Oc9126D47jc,4078
|
140
|
+
napistu-0.4.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
141
|
+
napistu-0.4.7.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
|
142
|
+
napistu-0.4.7.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
|
143
|
+
napistu-0.4.7.dist-info/RECORD,,
|
@@ -1,11 +1,17 @@
|
|
1
1
|
import pandas as pd
|
2
|
+
import pytest
|
2
3
|
|
3
4
|
from napistu.network import ng_utils
|
4
5
|
from napistu.network import neighborhoods
|
5
6
|
from napistu import source
|
6
7
|
|
7
8
|
from napistu.constants import SBML_DFS
|
8
|
-
from napistu.network.constants import
|
9
|
+
from napistu.network.constants import (
|
10
|
+
NAPISTU_GRAPH_VERTICES,
|
11
|
+
NAPISTU_GRAPH_NODE_TYPES,
|
12
|
+
NEIGHBORHOOD_DICT_KEYS,
|
13
|
+
NEIGHBORHOOD_NETWORK_TYPES,
|
14
|
+
)
|
9
15
|
|
10
16
|
|
11
17
|
def test_neighborhood(sbml_dfs, napistu_graph):
|
@@ -87,6 +93,7 @@ def test_find_and_prune_neighborhoods_with_source_counts(
|
|
87
93
|
assert isinstance(
|
88
94
|
neighborhood[NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES], pd.DataFrame
|
89
95
|
)
|
96
|
+
assert neighborhood[NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES].shape[0] > 0
|
90
97
|
|
91
98
|
# Check reaction_sources has expected columns
|
92
99
|
expected_columns = [SBML_DFS.R_ID, "pathway_id", "name"]
|
@@ -139,3 +146,85 @@ def test_find_and_prune_neighborhoods_with_source_counts(
|
|
139
146
|
if with_source is not None and without_source is not None:
|
140
147
|
assert isinstance(with_source, pd.DataFrame)
|
141
148
|
assert isinstance(without_source, pd.DataFrame)
|
149
|
+
|
150
|
+
# Test error handling for invalid parameters
|
151
|
+
# Test invalid network_type
|
152
|
+
with pytest.raises(ValueError):
|
153
|
+
neighborhoods.find_and_prune_neighborhoods(
|
154
|
+
sbml_dfs=sbml_dfs_metabolism,
|
155
|
+
napistu_graph=napistu_graph_metabolism,
|
156
|
+
compartmentalized_species=compartmentalized_species,
|
157
|
+
source_total_counts=source_total_counts,
|
158
|
+
min_pw_size=1,
|
159
|
+
network_type="invalid_network_type",
|
160
|
+
order=3,
|
161
|
+
verbose=False,
|
162
|
+
top_n=10,
|
163
|
+
)
|
164
|
+
|
165
|
+
# Test invalid order (negative)
|
166
|
+
with pytest.raises(ValueError):
|
167
|
+
neighborhoods.find_and_prune_neighborhoods(
|
168
|
+
sbml_dfs=sbml_dfs_metabolism,
|
169
|
+
napistu_graph=napistu_graph_metabolism,
|
170
|
+
compartmentalized_species=compartmentalized_species,
|
171
|
+
source_total_counts=source_total_counts,
|
172
|
+
min_pw_size=1,
|
173
|
+
network_type=NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
|
174
|
+
order=-1,
|
175
|
+
verbose=False,
|
176
|
+
top_n=10,
|
177
|
+
)
|
178
|
+
|
179
|
+
|
180
|
+
def test_add_vertices_uri_urls(sbml_dfs):
|
181
|
+
"""
|
182
|
+
Test add_vertices_uri_urls function.
|
183
|
+
|
184
|
+
This test verifies that the function correctly adds URI URLs to vertices
|
185
|
+
DataFrame for both species and reactions.
|
186
|
+
"""
|
187
|
+
|
188
|
+
# Get real species and reaction names from the sbml_dfs fixture
|
189
|
+
real_species = sbml_dfs.compartmentalized_species.index[0] # Get first species
|
190
|
+
real_reaction = sbml_dfs.reactions.index[0] # Get first reaction
|
191
|
+
|
192
|
+
# Create a test vertices DataFrame with real species and reactions
|
193
|
+
test_vertices = pd.DataFrame(
|
194
|
+
{
|
195
|
+
NAPISTU_GRAPH_VERTICES.NAME: [real_species, real_reaction],
|
196
|
+
NAPISTU_GRAPH_VERTICES.NODE_TYPE: [
|
197
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES,
|
198
|
+
NAPISTU_GRAPH_NODE_TYPES.REACTION,
|
199
|
+
],
|
200
|
+
}
|
201
|
+
)
|
202
|
+
|
203
|
+
# Test basic functionality
|
204
|
+
result = neighborhoods.add_vertices_uri_urls(test_vertices, sbml_dfs)
|
205
|
+
|
206
|
+
# Verify basic structure
|
207
|
+
assert isinstance(result, pd.DataFrame)
|
208
|
+
assert result.shape[0] == test_vertices.shape[0] # Same number of rows
|
209
|
+
assert result.shape[1] >= test_vertices.shape[1] # At least same number of columns
|
210
|
+
|
211
|
+
# Verify original columns are preserved
|
212
|
+
for col in test_vertices.columns:
|
213
|
+
assert col in result.columns
|
214
|
+
assert all(result[col] == test_vertices[col])
|
215
|
+
|
216
|
+
# Verify species vertices have s_id column
|
217
|
+
species_vertices = result[
|
218
|
+
result[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == NAPISTU_GRAPH_NODE_TYPES.SPECIES
|
219
|
+
]
|
220
|
+
assert SBML_DFS.S_ID in species_vertices.columns
|
221
|
+
|
222
|
+
# Test error handling
|
223
|
+
import pytest
|
224
|
+
|
225
|
+
# Test with empty DataFrame
|
226
|
+
empty_vertices = pd.DataFrame(
|
227
|
+
columns=[NAPISTU_GRAPH_VERTICES.NAME, NAPISTU_GRAPH_VERTICES.NODE_TYPE]
|
228
|
+
)
|
229
|
+
with pytest.raises(ValueError, match="vertices must have at least one row"):
|
230
|
+
neighborhoods.add_vertices_uri_urls(empty_vertices, sbml_dfs)
|
tests/test_network_precompute.py
CHANGED
@@ -14,6 +14,12 @@ from napistu.network import net_create
|
|
14
14
|
from napistu.network import paths
|
15
15
|
from napistu.network import precompute
|
16
16
|
|
17
|
+
from napistu.network.constants import (
|
18
|
+
NAPISTU_GRAPH_VERTICES,
|
19
|
+
DISTANCES,
|
20
|
+
NEIGHBORHOOD_NETWORK_TYPES,
|
21
|
+
)
|
22
|
+
|
17
23
|
test_path = os.path.abspath(os.path.join(__file__, os.pardir))
|
18
24
|
sbml_path = os.path.join(test_path, "test_data", "reactome_glucose_metabolism.sbml")
|
19
25
|
if not os.path.isfile(sbml_path):
|
@@ -138,6 +144,7 @@ def test_precomputed_distances_shortest_paths():
|
|
138
144
|
|
139
145
|
|
140
146
|
def test_precomputed_distances_neighborhoods():
|
147
|
+
|
141
148
|
compartmentalized_species = sbml_dfs.compartmentalized_species[
|
142
149
|
sbml_dfs.compartmentalized_species["s_id"] == "S00000000"
|
143
150
|
].index.tolist()
|
@@ -169,8 +176,12 @@ def test_precomputed_distances_neighborhoods():
|
|
169
176
|
pruned_vert_otf = pruned_neighborhoods_otf[key]["vertices"]
|
170
177
|
pruned_vert_precomp = pruned_neighborhoods_precomputed[key]["vertices"]
|
171
178
|
|
172
|
-
join_key = [
|
173
|
-
|
179
|
+
join_key = [
|
180
|
+
NAPISTU_GRAPH_VERTICES.NAME,
|
181
|
+
NAPISTU_GRAPH_VERTICES.NODE_NAME,
|
182
|
+
"node_orientation",
|
183
|
+
]
|
184
|
+
join_key_w_vars = [*join_key, *[DISTANCES.PATH_WEIGHTS, DISTANCES.PATH_LENGTH]]
|
174
185
|
neighbor_comparison = (
|
175
186
|
pruned_vert_precomp[join_key_w_vars]
|
176
187
|
.assign(in_precompute=True)
|
@@ -197,23 +208,27 @@ def test_precomputed_distances_neighborhoods():
|
|
197
208
|
# which should be the same if we are pre-selecting the correct neighbors
|
198
209
|
# as part of _precompute_neighbors()
|
199
210
|
downstream_disagreement_w_precompute = (
|
200
|
-
comparison_df[
|
211
|
+
comparison_df[
|
212
|
+
comparison_df["node_orientation"] == NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM
|
213
|
+
]
|
201
214
|
.merge(
|
202
215
|
precomputed_distances,
|
203
|
-
left_on=["focal_sc_id",
|
204
|
-
right_on=[
|
216
|
+
left_on=["focal_sc_id", NAPISTU_GRAPH_VERTICES.NAME],
|
217
|
+
right_on=[DISTANCES.SC_ID_ORIGIN, DISTANCES.SC_ID_DEST],
|
205
218
|
)
|
206
|
-
.query("abs(
|
219
|
+
.query("abs(path_weights_x - path_weights) > 1e-13")
|
207
220
|
)
|
208
221
|
|
209
222
|
upstream_disagreement_w_precompute = (
|
210
|
-
comparison_df[
|
223
|
+
comparison_df[
|
224
|
+
comparison_df["node_orientation"] == NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM
|
225
|
+
]
|
211
226
|
.merge(
|
212
227
|
precomputed_distances,
|
213
|
-
left_on=["focal_sc_id",
|
214
|
-
right_on=[
|
228
|
+
left_on=["focal_sc_id", NAPISTU_GRAPH_VERTICES.NAME],
|
229
|
+
right_on=[DISTANCES.SC_ID_DEST, DISTANCES.SC_ID_ORIGIN],
|
215
230
|
)
|
216
|
-
.query("abs(
|
231
|
+
.query("abs(path_weights_x - path_upstream_weights) > 1e-13")
|
217
232
|
)
|
218
233
|
|
219
234
|
assert downstream_disagreement_w_precompute.shape[0] == 0
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|