napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. napistu/__init__.py +12 -0
  2. napistu/__main__.py +867 -0
  3. napistu/consensus.py +1557 -0
  4. napistu/constants.py +500 -0
  5. napistu/gcs/__init__.py +10 -0
  6. napistu/gcs/constants.py +69 -0
  7. napistu/gcs/downloads.py +180 -0
  8. napistu/identifiers.py +805 -0
  9. napistu/indices.py +227 -0
  10. napistu/ingestion/__init__.py +10 -0
  11. napistu/ingestion/bigg.py +146 -0
  12. napistu/ingestion/constants.py +296 -0
  13. napistu/ingestion/cpr_edgelist.py +106 -0
  14. napistu/ingestion/identifiers_etl.py +148 -0
  15. napistu/ingestion/obo.py +268 -0
  16. napistu/ingestion/psi_mi.py +276 -0
  17. napistu/ingestion/reactome.py +218 -0
  18. napistu/ingestion/sbml.py +621 -0
  19. napistu/ingestion/string.py +356 -0
  20. napistu/ingestion/trrust.py +285 -0
  21. napistu/ingestion/yeast.py +147 -0
  22. napistu/mechanism_matching.py +597 -0
  23. napistu/modify/__init__.py +10 -0
  24. napistu/modify/constants.py +86 -0
  25. napistu/modify/curation.py +628 -0
  26. napistu/modify/gaps.py +635 -0
  27. napistu/modify/pathwayannot.py +1381 -0
  28. napistu/modify/uncompartmentalize.py +264 -0
  29. napistu/network/__init__.py +10 -0
  30. napistu/network/constants.py +117 -0
  31. napistu/network/neighborhoods.py +1594 -0
  32. napistu/network/net_create.py +1647 -0
  33. napistu/network/net_utils.py +652 -0
  34. napistu/network/paths.py +500 -0
  35. napistu/network/precompute.py +221 -0
  36. napistu/rpy2/__init__.py +127 -0
  37. napistu/rpy2/callr.py +168 -0
  38. napistu/rpy2/constants.py +101 -0
  39. napistu/rpy2/netcontextr.py +464 -0
  40. napistu/rpy2/rids.py +697 -0
  41. napistu/sbml_dfs_core.py +2216 -0
  42. napistu/sbml_dfs_utils.py +304 -0
  43. napistu/source.py +394 -0
  44. napistu/utils.py +943 -0
  45. napistu-0.1.0.dist-info/METADATA +56 -0
  46. napistu-0.1.0.dist-info/RECORD +77 -0
  47. napistu-0.1.0.dist-info/WHEEL +5 -0
  48. napistu-0.1.0.dist-info/entry_points.txt +2 -0
  49. napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
  50. napistu-0.1.0.dist-info/top_level.txt +2 -0
  51. tests/__init__.py +0 -0
  52. tests/conftest.py +83 -0
  53. tests/test_consensus.py +255 -0
  54. tests/test_constants.py +20 -0
  55. tests/test_curation.py +134 -0
  56. tests/test_data/__init__.py +0 -0
  57. tests/test_edgelist.py +20 -0
  58. tests/test_gcs.py +23 -0
  59. tests/test_identifiers.py +151 -0
  60. tests/test_igraph.py +353 -0
  61. tests/test_indices.py +88 -0
  62. tests/test_mechanism_matching.py +126 -0
  63. tests/test_net_utils.py +66 -0
  64. tests/test_netcontextr.py +105 -0
  65. tests/test_obo.py +34 -0
  66. tests/test_pathwayannot.py +95 -0
  67. tests/test_precomputed_distances.py +222 -0
  68. tests/test_rpy2.py +61 -0
  69. tests/test_sbml.py +46 -0
  70. tests/test_sbml_dfs_create.py +307 -0
  71. tests/test_sbml_dfs_utils.py +22 -0
  72. tests/test_sbo.py +11 -0
  73. tests/test_set_coverage.py +50 -0
  74. tests/test_source.py +67 -0
  75. tests/test_uncompartmentalize.py +40 -0
  76. tests/test_utils.py +487 -0
  77. tests/utils.py +30 -0
@@ -0,0 +1,126 @@
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+
5
+ from napistu import mechanism_matching
6
+ from napistu.network import net_create
7
+ from napistu.network import precompute
8
+
9
+
10
+ def test_features_to_pathway_species(sbml_dfs):
11
+
12
+ species_identifiers = sbml_dfs.get_identifiers("species")
13
+ feature_identifiers = pd.DataFrame({"chebis": ["17627", "15379", "29105", "-1"]})
14
+
15
+ matching_df = (
16
+ mechanism_matching.features_to_pathway_species(
17
+ feature_identifiers, species_identifiers, {"chebi"}, "chebis"
18
+ )
19
+ .value_counts("identifier")
20
+ .sort_index()
21
+ )
22
+
23
+ assert matching_df.index.tolist() == ["15379", "17627", "29105"]
24
+ assert matching_df.tolist() == [2, 3, 2]
25
+
26
+
27
+ def test_edgelist_to_pathway_species(sbml_dfs):
28
+
29
+ edgelist = pd.DataFrame(
30
+ [
31
+ {"identifier_upstream": "17996", "identifier_downstream": "16526"},
32
+ {"identifier_upstream": "15377", "identifier_downstream": "17544"},
33
+ {"identifier_upstream": "15378", "identifier_downstream": "57945"},
34
+ {"identifier_upstream": "57540", "identifier_downstream": "17996"},
35
+ ]
36
+ )
37
+ species_identifiers = sbml_dfs.get_identifiers("species").query("bqb == 'BQB_IS'")
38
+
39
+ edgelist_w_sids = mechanism_matching.edgelist_to_pathway_species(
40
+ edgelist, species_identifiers, ontologies={"chebi", "uniprot"}
41
+ )
42
+ assert edgelist_w_sids.shape == (4, 4)
43
+
44
+ egelist_w_scids = mechanism_matching.edgelist_to_scids(
45
+ edgelist, sbml_dfs, species_identifiers, ontologies={"chebi"}
46
+ )
47
+
48
+ assert egelist_w_scids.shape == (12, 6)
49
+
50
+ direct_interactions = mechanism_matching.filter_to_direct_mechanistic_interactions(
51
+ edgelist, sbml_dfs, species_identifiers, ontologies={"chebi"}
52
+ )
53
+
54
+ assert direct_interactions.shape == (2, 10)
55
+
56
+
57
+ def test_direct_and_indirect_mechanism_matching(sbml_dfs_glucose_metabolism):
58
+
59
+ cpr_graph = net_create.process_cpr_graph(sbml_dfs_glucose_metabolism)
60
+
61
+ edgelist = pd.DataFrame(
62
+ [
63
+ {
64
+ "identifier_upstream": "17925",
65
+ "identifier_downstream": "32966",
66
+ }, # glu, fbp
67
+ {
68
+ "identifier_upstream": "57634",
69
+ "identifier_downstream": "32966",
70
+ }, # f6p, fbp
71
+ {
72
+ "identifier_upstream": "32966",
73
+ "identifier_downstream": "57642",
74
+ }, # fbp, dhap
75
+ {
76
+ "identifier_upstream": "17925",
77
+ "identifier_downstream": "15361",
78
+ }, # glu, pyr
79
+ ]
80
+ )
81
+
82
+ species_identifiers = sbml_dfs_glucose_metabolism.get_identifiers("species")
83
+
84
+ direct_interactions = mechanism_matching.filter_to_direct_mechanistic_interactions(
85
+ formatted_edgelist=edgelist,
86
+ sbml_dfs=sbml_dfs_glucose_metabolism,
87
+ species_identifiers=species_identifiers,
88
+ ontologies={"chebi"},
89
+ )
90
+
91
+ assert direct_interactions.shape == (2, 10)
92
+
93
+ indirect_interactions = (
94
+ mechanism_matching.filter_to_indirect_mechanistic_interactions(
95
+ formatted_edgelist=edgelist,
96
+ sbml_dfs=sbml_dfs_glucose_metabolism,
97
+ species_identifiers=species_identifiers,
98
+ cpr_graph=cpr_graph,
99
+ ontologies={"chebi"},
100
+ precomputed_distances=None,
101
+ max_path_length=10,
102
+ )
103
+ )
104
+
105
+ assert indirect_interactions.shape == (6, 12)
106
+
107
+ # confirm that we get the same thing even when using precomputed distances
108
+ precomputed_distances = precompute.precompute_distances(
109
+ cpr_graph, weights_vars=["weights"]
110
+ )
111
+
112
+ indirect_interactions_w_precompute = (
113
+ mechanism_matching.filter_to_indirect_mechanistic_interactions(
114
+ formatted_edgelist=edgelist,
115
+ sbml_dfs=sbml_dfs_glucose_metabolism,
116
+ species_identifiers=species_identifiers,
117
+ cpr_graph=cpr_graph,
118
+ ontologies={"chebi"},
119
+ precomputed_distances=precomputed_distances,
120
+ max_path_length=10,
121
+ )
122
+ )
123
+
124
+ assert all(
125
+ indirect_interactions["weight"] == indirect_interactions_w_precompute["weight"]
126
+ )
@@ -0,0 +1,66 @@
1
+ from __future__ import annotations
2
+
3
+ import pytest
4
+
5
+ import igraph as ig
6
+ import pandas as pd
7
+ from napistu.network import net_utils
8
+ from napistu.network import net_create
9
+
10
+
11
+ def test_safe_fill():
12
+ safe_fill_test = ["a_very_long stringggg", ""]
13
+ assert [net_utils.safe_fill(x) for x in safe_fill_test] == [
14
+ "a_very_long\nstringggg",
15
+ "",
16
+ ]
17
+
18
+
19
+ def test_cpr_graph_to_pandas_dfs():
20
+ graph_data = [
21
+ (0, 1),
22
+ (0, 2),
23
+ (2, 3),
24
+ (3, 4),
25
+ (4, 2),
26
+ (2, 5),
27
+ (5, 0),
28
+ (6, 3),
29
+ (5, 6),
30
+ ]
31
+
32
+ g = ig.Graph(graph_data, directed=True)
33
+ vs, es = net_utils.cpr_graph_to_pandas_dfs(g)
34
+
35
+ assert all(vs["index"] == list(range(0, 7)))
36
+ assert (
37
+ pd.DataFrame(graph_data)
38
+ .rename({0: "source", 1: "target"}, axis=1)
39
+ .sort_values(["source", "target"])
40
+ .equals(es.sort_values(["source", "target"]))
41
+ )
42
+
43
+
44
+ def test_validate_graph_attributes(sbml_dfs):
45
+
46
+ cpr_graph = net_create.process_cpr_graph(
47
+ sbml_dfs, directed=True, weighting_strategy="topology"
48
+ )
49
+
50
+ assert (
51
+ net_utils._validate_edge_attributes(cpr_graph, ["weights", "upstream_weights"])
52
+ is None
53
+ )
54
+ assert net_utils._validate_vertex_attributes(cpr_graph, "node_type") is None
55
+ with pytest.raises(ValueError):
56
+ net_utils._validate_vertex_attributes(cpr_graph, "baz")
57
+
58
+
59
+ ################################################
60
+ # __main__
61
+ ################################################
62
+
63
+ if __name__ == "__main__":
64
+ test_safe_fill()
65
+ test_cpr_graph_to_pandas_dfs()
66
+ test_validate_graph_attributes()
@@ -0,0 +1,105 @@
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+ import pytest
5
+ from napistu import sbml_dfs_core
6
+ from napistu.identifiers import Identifiers
7
+ from napistu.rpy2 import netcontextr
8
+ from napistu.source import Source
9
+
10
+
11
+ @pytest.fixture
12
+ def sbml_dfs_one_reaction():
13
+ """An sbml_dfs with one reaction and one annotated reactant"""
14
+ interaction_edgelist = pd.DataFrame(
15
+ {
16
+ "upstream_name": ["a"],
17
+ "downstream_name": ["b"],
18
+ "upstream_compartment": ["nucleoplasm"],
19
+ "downstream_compartment": ["nucleoplasm"],
20
+ "r_name": ["a b of a"],
21
+ "sbo_term": ["SBO:0000010"],
22
+ "r_Identifiers": Identifiers([]),
23
+ "r_isreversible": False,
24
+ }
25
+ )
26
+ species = pd.DataFrame(
27
+ {
28
+ "s_name": ["a", "b"],
29
+ "s_Identifiers": [
30
+ Identifiers([{"ontology": "ensembl_gene", "identifier": "test"}]),
31
+ Identifiers([]),
32
+ ],
33
+ }
34
+ )
35
+ compartments = pd.DataFrame(
36
+ {"c_name": ["nucleoplasm"], "c_Identifiers": Identifiers([])}
37
+ )
38
+ interaction_source = Source(init=True)
39
+ sbml_dfs = sbml_dfs_core.sbml_dfs_from_edgelist(
40
+ interaction_edgelist, species, compartments, interaction_source
41
+ )
42
+ return sbml_dfs
43
+
44
+
45
+ @pytest.fixture
46
+ def sbml_dfs_one_reaction_duplicated_identifiers():
47
+ """
48
+ An sbml_dfs with one reactions and one annotated reactant
49
+ that has two identifiers
50
+ """
51
+ interaction_edgelist = pd.DataFrame(
52
+ {
53
+ "upstream_name": ["a"],
54
+ "downstream_name": ["b"],
55
+ "upstream_compartment": ["nucleoplasm"],
56
+ "downstream_compartment": ["nucleoplasm"],
57
+ "r_name": ["a b of a"],
58
+ "sbo_term": ["SBO:0000010"],
59
+ "r_Identifiers": Identifiers([]),
60
+ "r_isreversible": False,
61
+ }
62
+ )
63
+ species = pd.DataFrame(
64
+ {
65
+ "s_name": ["a", "b"],
66
+ "s_Identifiers": [
67
+ Identifiers(
68
+ [
69
+ {"ontology": "ensembl_gene", "identifier": "test"},
70
+ {"ontology": "ensembl_gene", "identifier": "test2"},
71
+ ]
72
+ ),
73
+ Identifiers([]),
74
+ ],
75
+ }
76
+ )
77
+ compartments = pd.DataFrame(
78
+ {"c_name": ["nucleoplasm"], "c_Identifiers": Identifiers([])}
79
+ )
80
+ interaction_source = Source(init=True)
81
+ sbml_dfs = sbml_dfs_core.sbml_dfs_from_edgelist(
82
+ interaction_edgelist, species, compartments, interaction_source
83
+ )
84
+ return sbml_dfs
85
+
86
+
87
+ def test_get_reactions_one_reaction(sbml_dfs_one_reaction):
88
+ reactions = netcontextr._get_reactions(sbml_dfs_one_reaction)
89
+ assert not reactions[netcontextr.COL_GENE].isna().any()
90
+ assert reactions.shape[0] == 1
91
+
92
+
93
+ def test_get_reactions_outcols(sbml_dfs_one_reaction):
94
+ reactions = netcontextr._get_reactions(sbml_dfs_one_reaction)
95
+ assert netcontextr.COL_GENE in reactions.columns
96
+ assert netcontextr.COL_REACTION_ID in reactions.columns
97
+ assert netcontextr.COL_ROLE in reactions.columns
98
+
99
+
100
+ def test_get_reactions_one_reaction_duplicated_ids(
101
+ sbml_dfs_one_reaction_duplicated_identifiers,
102
+ ):
103
+ reactions = netcontextr._get_reactions(sbml_dfs_one_reaction_duplicated_identifiers)
104
+ assert not reactions[netcontextr.COL_GENE].isna().any()
105
+ assert reactions.shape[0] == 2
tests/test_obo.py ADDED
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+
3
+ from napistu.ingestion import obo
4
+
5
+
6
+ def test_formatting_obo_attributes():
7
+ assert obo._format_entry_tuple("foo: bar: baz") == ("foo", "bar: baz")
8
+ assert obo._format_entry_tuple("foo") is None
9
+
10
+
11
+ def test_formatting_go_isa_attributes():
12
+ go_parents_test_entries = [
13
+ ([], list()),
14
+ (["foo ! bar"], [{"parent_id": "foo", "parent_name": "bar"}]),
15
+ (
16
+ ["foo ! bar", "fun ! baz"],
17
+ [
18
+ {"parent_id": "foo", "parent_name": "bar"},
19
+ {"parent_id": "fun", "parent_name": "baz"},
20
+ ],
21
+ ),
22
+ ]
23
+
24
+ for val_list, expected in go_parents_test_entries:
25
+ assert obo._isa_str_list_to_dict_list(val_list) == expected
26
+
27
+
28
+ ################################################
29
+ # __main__
30
+ ################################################
31
+
32
+ if __name__ == "__main__":
33
+ test_formatting_obo_attributes()
34
+ test_formatting_go_isa_attributes()
@@ -0,0 +1,95 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ from napistu.modify import pathwayannot
6
+
7
+ from napistu.constants import SBML_DFS
8
+ from napistu.modify.constants import COFACTOR_SCHEMA
9
+ from napistu.modify.constants import COFACTOR_CHEBI_IDS
10
+
11
+ test_path = os.path.abspath(os.path.join(__file__, os.pardir))
12
+ test_data = os.path.join(test_path, "test_data")
13
+ reduced_neo4j_members_path = os.path.join(test_data, "reduced_neo4j_members.csv")
14
+ reduced_neo4j_cross_refs_path = os.path.join(test_data, "reduced_neo4j_members.csv")
15
+
16
+
17
+ def test_cofactor_specifications():
18
+ # cofactors are manually annotated with their ChEBI ID so that they can be matched
19
+ # to the corresponding ChEBI ID of molecular species
20
+
21
+ cofactor_labels_set = set(COFACTOR_CHEBI_IDS["cofactor"].tolist())
22
+
23
+ deep_cofactor_list = [list(v.values()) for v in COFACTOR_SCHEMA.values()]
24
+ shallow_cofactor_list = [item for sublist in deep_cofactor_list for item in sublist]
25
+ unique_cofactors = {item for sublist in shallow_cofactor_list for item in sublist}
26
+
27
+ # check whether all species in cofactor schema are tied with an ID
28
+ undefined_labels = unique_cofactors.difference(cofactor_labels_set)
29
+ if len(undefined_labels) != 0:
30
+ raise ValueError(
31
+ f"{', '.join(undefined_labels)} are not defined in \"COFACTOR_CHEBI_IDS\""
32
+ )
33
+
34
+
35
+ def test_add_reactome_entity_sets(sbml_dfs_glucose_metabolism):
36
+
37
+ # annotate the GPCR sbml_df with a reduced subset of the
38
+ # Reactome entity set exports
39
+
40
+ sbml_dfs_with_entity_sets = pathwayannot.add_entity_sets(
41
+ sbml_dfs_glucose_metabolism, reduced_neo4j_members_path
42
+ )
43
+
44
+ lost_species = set(sbml_dfs_glucose_metabolism.species[SBML_DFS.S_NAME]).difference(
45
+ sbml_dfs_with_entity_sets.species[SBML_DFS.S_NAME]
46
+ )
47
+ assert len(lost_species) == 0
48
+ new_species = set(sbml_dfs_with_entity_sets.species[SBML_DFS.S_NAME]).difference(
49
+ sbml_dfs_glucose_metabolism.species[SBML_DFS.S_NAME]
50
+ )
51
+ assert new_species == {
52
+ "HK1",
53
+ "HK2",
54
+ "HK3",
55
+ "PRKACA",
56
+ "PRKACB",
57
+ "PRKACG",
58
+ "SLC25A12",
59
+ "SLC25A13",
60
+ "SLC37A1",
61
+ }
62
+
63
+ lost_reactions = set(
64
+ sbml_dfs_glucose_metabolism.reactions[SBML_DFS.R_NAME]
65
+ ).difference(sbml_dfs_with_entity_sets.reactions[SBML_DFS.R_NAME])
66
+ assert len(lost_reactions) == 0
67
+ new_reactions = set(
68
+ sbml_dfs_with_entity_sets.reactions[SBML_DFS.R_NAME]
69
+ ).difference(sbml_dfs_glucose_metabolism.reactions[SBML_DFS.R_NAME])
70
+ assert len(new_reactions) == 10
71
+
72
+
73
+ def test_add_reactome_cross_refs(sbml_dfs_glucose_metabolism):
74
+
75
+ # test adding cross-references to a Reactome model
76
+
77
+ sbml_dfs_with_cross_refs = pathwayannot.add_reactome_identifiers(
78
+ sbml_dfs_glucose_metabolism,
79
+ os.path.join(test_data, "reduced_neo4j_cross_refs.csv"),
80
+ )
81
+
82
+ sbml_dfs_glucose_metabolism.reaction_species.shape[
83
+ 0
84
+ ] == sbml_dfs_with_cross_refs.reaction_species.shape[0]
85
+
86
+ previous_species_identifiers = sbml_dfs_glucose_metabolism.get_identifiers(
87
+ SBML_DFS.SPECIES
88
+ )
89
+ updated_species_identifiers = sbml_dfs_with_cross_refs.get_identifiers(
90
+ SBML_DFS.SPECIES
91
+ )
92
+ assert (
93
+ updated_species_identifiers.shape[0] - previous_species_identifiers.shape[0]
94
+ == 88
95
+ )
@@ -0,0 +1,222 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from napistu import sbml_dfs_core
8
+ from napistu.ingestion import sbml
9
+ from napistu.network import neighborhoods
10
+ from napistu.network import net_create
11
+ from napistu.network import paths
12
+ from napistu.network import precompute
13
+
14
+ test_path = os.path.abspath(os.path.join(__file__, os.pardir))
15
+ sbml_path = os.path.join(test_path, "test_data", "reactome_glucose_metabolism.sbml")
16
+ if not os.path.isfile(sbml_path):
17
+ raise ValueError(f"{sbml_path} not found")
18
+
19
+ sbml_model = sbml.SBML(sbml_path).model
20
+ sbml_dfs = sbml_dfs_core.SBML_dfs(sbml_model)
21
+ sbml_dfs.validate()
22
+
23
+ cpr_graph = net_create.process_cpr_graph(
24
+ sbml_dfs, graph_type="bipartite", directed=True, weighting_strategy="topology"
25
+ )
26
+
27
+ # number of species to include when finding all x all paths
28
+ N_SPECIES = 12
29
+
30
+ # setting for neighborhoods
31
+ NETWORK_TYPE = "hourglass"
32
+ ORDER = 20
33
+ TOP_N = 20
34
+
35
+ precomputed_distances = precompute.precompute_distances(
36
+ cpr_graph, max_steps=30000, max_score_q=1
37
+ )
38
+
39
+
40
+ def test_precomputed_distances():
41
+ assert precomputed_distances.shape == (3934, 5)
42
+
43
+
44
+ def test_precomputed_distances_shortest_paths():
45
+ cspecies_subset = sbml_dfs.compartmentalized_species.index.tolist()[0:N_SPECIES]
46
+
47
+ # we should get the same answer for shortest paths whether or not we use pre-computed distances
48
+ all_species_pairs = pd.DataFrame(
49
+ np.array([(x, y) for x in cspecies_subset for y in cspecies_subset]),
50
+ columns=["sc_id_origin", "sc_id_dest"],
51
+ )
52
+
53
+ (
54
+ path_vertices,
55
+ _,
56
+ _,
57
+ _,
58
+ ) = paths.find_all_shortest_reaction_paths(cpr_graph, sbml_dfs, all_species_pairs)
59
+
60
+ shortest_path_weights = (
61
+ path_vertices.groupby(["origin", "dest", "path"])["weights"]
62
+ .sum()
63
+ .reset_index()
64
+ .sort_values("weights")
65
+ .groupby(["origin", "dest"])
66
+ .first()
67
+ .reset_index()
68
+ )
69
+
70
+ precomputed_distance_subset_mask = [
71
+ True if x and y else False
72
+ for x, y in zip(
73
+ precomputed_distances["sc_id_origin"].isin(cspecies_subset).tolist(),
74
+ precomputed_distances["sc_id_dest"].isin(cspecies_subset).tolist(),
75
+ )
76
+ ]
77
+ precomputed_distance_subset = precomputed_distances[
78
+ precomputed_distance_subset_mask
79
+ ]
80
+
81
+ path_method_comparison_full_merge = shortest_path_weights.merge(
82
+ precomputed_distance_subset,
83
+ left_on=["origin", "dest"],
84
+ right_on=["sc_id_origin", "sc_id_dest"],
85
+ how="outer",
86
+ )
87
+
88
+ # tables have identical pairs with a valid path
89
+ assert (
90
+ path_method_comparison_full_merge.shape[0]
91
+ == precomputed_distance_subset.shape[0]
92
+ )
93
+ assert path_method_comparison_full_merge.shape[0] == shortest_path_weights.shape[0]
94
+ assert all(
95
+ abs(
96
+ path_method_comparison_full_merge["weights"]
97
+ - path_method_comparison_full_merge["path_weights"]
98
+ )
99
+ < 1e-13
100
+ )
101
+
102
+ # using the precomputed distances generates the same result as excluding it
103
+ (precompute_path_vertices, _, _, _) = paths.find_all_shortest_reaction_paths(
104
+ cpr_graph,
105
+ sbml_dfs,
106
+ all_species_pairs,
107
+ precomputed_distances=precomputed_distances,
108
+ )
109
+
110
+ precompute_shortest_path_weights = (
111
+ precompute_path_vertices.groupby(["origin", "dest", "path"])["weights"]
112
+ .sum()
113
+ .reset_index()
114
+ .sort_values("weights")
115
+ .groupby(["origin", "dest"])
116
+ .first()
117
+ .reset_index()
118
+ )
119
+
120
+ precompute_full_merge = shortest_path_weights.merge(
121
+ precompute_shortest_path_weights,
122
+ left_on=["origin", "dest", "path"],
123
+ right_on=["origin", "dest", "path"],
124
+ how="outer",
125
+ )
126
+
127
+ assert precompute_full_merge.shape[0] == precompute_shortest_path_weights.shape[0]
128
+ assert precompute_full_merge.shape[0] == shortest_path_weights.shape[0]
129
+ assert all(
130
+ abs(precompute_full_merge["weights_x"] - precompute_full_merge["weights_y"])
131
+ < 1e-13
132
+ )
133
+
134
+
135
+ def test_precomputed_distances_neighborhoods():
136
+ compartmentalized_species = sbml_dfs.compartmentalized_species[
137
+ sbml_dfs.compartmentalized_species["s_id"] == "S00000000"
138
+ ].index.tolist()
139
+
140
+ pruned_neighborhoods_precomputed = neighborhoods.find_and_prune_neighborhoods(
141
+ sbml_dfs,
142
+ cpr_graph,
143
+ compartmentalized_species,
144
+ precomputed_distances=precomputed_distances,
145
+ network_type=NETWORK_TYPE,
146
+ order=ORDER,
147
+ verbose=True,
148
+ top_n=TOP_N,
149
+ )
150
+
151
+ pruned_neighborhoods_otf = neighborhoods.find_and_prune_neighborhoods(
152
+ sbml_dfs,
153
+ cpr_graph,
154
+ compartmentalized_species,
155
+ precomputed_distances=None,
156
+ network_type=NETWORK_TYPE,
157
+ order=ORDER,
158
+ verbose=True,
159
+ top_n=TOP_N,
160
+ )
161
+
162
+ comparison_l = list()
163
+ for key in pruned_neighborhoods_precomputed.keys():
164
+ pruned_vert_otf = pruned_neighborhoods_otf[key]["vertices"]
165
+ pruned_vert_precomp = pruned_neighborhoods_precomputed[key]["vertices"]
166
+
167
+ join_key = ["name", "node_name", "node_orientation"]
168
+ join_key_w_vars = [*join_key, *["path_weight", "path_length"]]
169
+ neighbor_comparison = (
170
+ pruned_vert_precomp[join_key_w_vars]
171
+ .assign(in_precompute=True)
172
+ .merge(
173
+ pruned_vert_otf[join_key_w_vars].assign(in_otf=True),
174
+ left_on=join_key,
175
+ right_on=join_key,
176
+ how="outer",
177
+ )
178
+ .fillna(False)
179
+ )
180
+ comparison_l.append(neighbor_comparison.assign(focal_sc_id=key))
181
+
182
+ comparison_df = pd.concat(comparison_l)
183
+ comparison_df_disagreements = comparison_df.query("in_precompute != in_otf")
184
+
185
+ # pruned neighborhoods are identical with and without using precalculated neighbors
186
+ assert comparison_df_disagreements.shape[0] == 0
187
+
188
+ # compare shortest paths calculated through neighborhoods with precomputed distances
189
+ # which should be the same if we are pre-selecting the correct neighbors
190
+ # as part of _precompute_neighbors()
191
+ downstream_disagreement_w_precompute = (
192
+ comparison_df[comparison_df["node_orientation"] == "downstream"]
193
+ .merge(
194
+ precomputed_distances,
195
+ left_on=["focal_sc_id", "name"],
196
+ right_on=["sc_id_origin", "sc_id_dest"],
197
+ )
198
+ .query("abs(path_weight_x - path_weights) > 1e-13")
199
+ )
200
+
201
+ upstream_disagreement_w_precompute = (
202
+ comparison_df[comparison_df["node_orientation"] == "upstream"]
203
+ .merge(
204
+ precomputed_distances,
205
+ left_on=["focal_sc_id", "name"],
206
+ right_on=["sc_id_dest", "sc_id_origin"],
207
+ )
208
+ .query("abs(path_weight_x - path_upstream_weights) > 1e-13")
209
+ )
210
+
211
+ assert downstream_disagreement_w_precompute.shape[0] == 0
212
+ assert upstream_disagreement_w_precompute.shape[0] == 0
213
+
214
+
215
+ ################################################
216
+ # __main__
217
+ ################################################
218
+
219
+ if __name__ == "__main__":
220
+ test_precomputed_distances()
221
+ test_precomputed_distances_shortest_paths()
222
+ test_precomputed_distances_neighborhoods()