napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. napistu/__init__.py +12 -0
  2. napistu/__main__.py +867 -0
  3. napistu/consensus.py +1557 -0
  4. napistu/constants.py +500 -0
  5. napistu/gcs/__init__.py +10 -0
  6. napistu/gcs/constants.py +69 -0
  7. napistu/gcs/downloads.py +180 -0
  8. napistu/identifiers.py +805 -0
  9. napistu/indices.py +227 -0
  10. napistu/ingestion/__init__.py +10 -0
  11. napistu/ingestion/bigg.py +146 -0
  12. napistu/ingestion/constants.py +296 -0
  13. napistu/ingestion/cpr_edgelist.py +106 -0
  14. napistu/ingestion/identifiers_etl.py +148 -0
  15. napistu/ingestion/obo.py +268 -0
  16. napistu/ingestion/psi_mi.py +276 -0
  17. napistu/ingestion/reactome.py +218 -0
  18. napistu/ingestion/sbml.py +621 -0
  19. napistu/ingestion/string.py +356 -0
  20. napistu/ingestion/trrust.py +285 -0
  21. napistu/ingestion/yeast.py +147 -0
  22. napistu/mechanism_matching.py +597 -0
  23. napistu/modify/__init__.py +10 -0
  24. napistu/modify/constants.py +86 -0
  25. napistu/modify/curation.py +628 -0
  26. napistu/modify/gaps.py +635 -0
  27. napistu/modify/pathwayannot.py +1381 -0
  28. napistu/modify/uncompartmentalize.py +264 -0
  29. napistu/network/__init__.py +10 -0
  30. napistu/network/constants.py +117 -0
  31. napistu/network/neighborhoods.py +1594 -0
  32. napistu/network/net_create.py +1647 -0
  33. napistu/network/net_utils.py +652 -0
  34. napistu/network/paths.py +500 -0
  35. napistu/network/precompute.py +221 -0
  36. napistu/rpy2/__init__.py +127 -0
  37. napistu/rpy2/callr.py +168 -0
  38. napistu/rpy2/constants.py +101 -0
  39. napistu/rpy2/netcontextr.py +464 -0
  40. napistu/rpy2/rids.py +697 -0
  41. napistu/sbml_dfs_core.py +2216 -0
  42. napistu/sbml_dfs_utils.py +304 -0
  43. napistu/source.py +394 -0
  44. napistu/utils.py +943 -0
  45. napistu-0.1.0.dist-info/METADATA +56 -0
  46. napistu-0.1.0.dist-info/RECORD +77 -0
  47. napistu-0.1.0.dist-info/WHEEL +5 -0
  48. napistu-0.1.0.dist-info/entry_points.txt +2 -0
  49. napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
  50. napistu-0.1.0.dist-info/top_level.txt +2 -0
  51. tests/__init__.py +0 -0
  52. tests/conftest.py +83 -0
  53. tests/test_consensus.py +255 -0
  54. tests/test_constants.py +20 -0
  55. tests/test_curation.py +134 -0
  56. tests/test_data/__init__.py +0 -0
  57. tests/test_edgelist.py +20 -0
  58. tests/test_gcs.py +23 -0
  59. tests/test_identifiers.py +151 -0
  60. tests/test_igraph.py +353 -0
  61. tests/test_indices.py +88 -0
  62. tests/test_mechanism_matching.py +126 -0
  63. tests/test_net_utils.py +66 -0
  64. tests/test_netcontextr.py +105 -0
  65. tests/test_obo.py +34 -0
  66. tests/test_pathwayannot.py +95 -0
  67. tests/test_precomputed_distances.py +222 -0
  68. tests/test_rpy2.py +61 -0
  69. tests/test_sbml.py +46 -0
  70. tests/test_sbml_dfs_create.py +307 -0
  71. tests/test_sbml_dfs_utils.py +22 -0
  72. tests/test_sbo.py +11 -0
  73. tests/test_set_coverage.py +50 -0
  74. tests/test_source.py +67 -0
  75. tests/test_uncompartmentalize.py +40 -0
  76. tests/test_utils.py +487 -0
  77. tests/utils.py +30 -0
@@ -0,0 +1,264 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ import pandas as pd
6
+ from napistu import consensus
7
+ from napistu import identifiers
8
+ from napistu import indices
9
+ from napistu import sbml_dfs_core
10
+ from napistu import sbml_dfs_utils
11
+ from napistu import source
12
+
13
+ from napistu.constants import SBML_DFS
14
+ from napistu.constants import SOURCE_SPEC
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def uncompartmentalize_sbml_dfs(
20
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
21
+ ) -> sbml_dfs_core.SBML_dfs:
22
+ """Uncompartmentalize SBML_dfs
23
+
24
+ Take a compartmentalized mechanistic model and merge all of the compartments.
25
+
26
+ Args:
27
+ rxn_consensus_species (pd.DataFrame): reactions
28
+ rxnspec_consensus_instances (pd.DataFrame): reaction species
29
+
30
+ Returns:
31
+ reactions (pd.DataFrame): reactions with trivial reactions dropped
32
+ reaction_species (pd.DataFrame): reaction species with trivial reaction species dropped
33
+ """
34
+
35
+ # to remove compartmentalization we can:
36
+ # 1. update the compartments table to the stubbed default level: GO CELLULAR_COMPONENT
37
+ # 2. ignore the species table (it will be the same in the compartmentalized and uncompartmenalzied model)
38
+ # 3. create a 1-1 correspondence between species and new compartmentalized species. w/ GO CELLULAR_COMPONENT
39
+ # 4. update reaction species to the new compartmentalized species
40
+ # 5. drop reactions if:
41
+ # - they are redundant (e.g., the same reaction occurred in multiple compartments)
42
+ # - substrates and products are identical (e.g., a transportation reaction)
43
+
44
+ if sbml_dfs.compartments.shape[0] == 1:
45
+ logger.warning(
46
+ "The sbml_dfs model only contains a single compartment, calling uncompartmentalize_sbml_dfs()"
47
+ " may not be appropriate"
48
+ )
49
+
50
+ # 1. update the compartments table to the stubbed default level: GO CELLULAR_COMPONENT
51
+ stubbed_compartment = sbml_dfs_core._stub_compartments().assign(
52
+ c_Source=_create_stubbed_source()
53
+ )
54
+
55
+ # 3. create a 1-1 correspondence between species and new compartmentalized species. w/ GO CELLULAR_COMPONENT
56
+ compspec_consensus_instances, compspec_lookup_table = _uncompartmentalize_cspecies(
57
+ sbml_dfs, stubbed_compartment
58
+ )
59
+
60
+ # 4. update reaction species to the new compartmentalized species
61
+ # 5. drop reactions if:
62
+ # - they are redundant (e.g., the same reaction occurred in multiple compartments)
63
+ # - substrates and products are identical (e.g., a transportation reaction)
64
+ reactions, reaction_species = _uncompartmentalize_reactions(
65
+ sbml_dfs, compspec_lookup_table
66
+ )
67
+
68
+ sbml_dfs.compartments = stubbed_compartment
69
+ sbml_dfs.compartmentalized_species = compspec_consensus_instances
70
+ sbml_dfs.reactions = reactions
71
+ sbml_dfs.reaction_species = reaction_species
72
+
73
+ sbml_dfs.validate()
74
+
75
+ return sbml_dfs
76
+
77
+
78
+ def _uncompartmentalize_cspecies(
79
+ sbml_dfs: sbml_dfs_core.SBML_dfs, stubbed_compartment: identifiers.Identifiers
80
+ ) -> tuple[pd.Dataframe, pd.DataFrame]:
81
+ """Convert compartmetnalized species into uncompartmentalized ones."""
82
+
83
+ updated_cspecies = (
84
+ sbml_dfs.compartmentalized_species.drop(
85
+ [SBML_DFS.SC_NAME, SBML_DFS.C_ID, SBML_DFS.SC_SOURCE], axis=1
86
+ )
87
+ .merge(
88
+ sbml_dfs.species[[SBML_DFS.S_NAME, SBML_DFS.S_SOURCE]],
89
+ left_on=SBML_DFS.S_ID,
90
+ right_index=True,
91
+ )
92
+ .reset_index()
93
+ .rename(
94
+ {
95
+ SBML_DFS.SC_ID: "sc_id_old",
96
+ SBML_DFS.S_NAME: SBML_DFS.SC_NAME,
97
+ SBML_DFS.S_SOURCE: SBML_DFS.SC_SOURCE,
98
+ },
99
+ axis=1,
100
+ )
101
+ )
102
+
103
+ # define new sc_ids as a 1-1 match to s_ids
104
+ new_sc_ids = updated_cspecies[SBML_DFS.S_ID].drop_duplicates().to_frame()
105
+ new_sc_ids[SBML_DFS.SC_ID] = sbml_dfs_utils.id_formatter(
106
+ range(new_sc_ids.shape[0]), SBML_DFS.SC_ID
107
+ )
108
+
109
+ # add new identifiers
110
+ updated_cspecies = updated_cspecies.merge(new_sc_ids)
111
+ # add new compartment
112
+ updated_cspecies[SBML_DFS.C_ID] = stubbed_compartment.index.tolist()[0]
113
+
114
+ # create a lookup table of old -> new sc_ids
115
+ compspec_lookup_table = (
116
+ updated_cspecies.assign(model="uncompartmentalization")
117
+ .rename({"sc_id_old": SBML_DFS.SC_ID, SBML_DFS.SC_ID: "new_id"}, axis=1)
118
+ .set_index([SOURCE_SPEC.MODEL, SBML_DFS.SC_ID])["new_id"]
119
+ )
120
+
121
+ compspec_consensus_instances = updated_cspecies.groupby(SBML_DFS.SC_ID).first()[
122
+ [SBML_DFS.S_ID, SBML_DFS.C_ID, SBML_DFS.SC_NAME, SBML_DFS.SC_SOURCE]
123
+ ]
124
+
125
+ return compspec_consensus_instances, compspec_lookup_table
126
+
127
+
128
+ def _uncompartmentalize_reactions(
129
+ sbml_dfs: sbml_dfs_core.SBML_dfs, compspec_lookup_table: pd.Series
130
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
131
+ """Update reactions and reaction species to include uncompartmentalized species"""
132
+
133
+ stubbed_index = _create_stubbed_index()
134
+
135
+ # format sbml_dfs as a dict to take advantage of the consensus functions
136
+ sbml_dfs_dict = {"uncompartmentalization": sbml_dfs}
137
+
138
+ # merge reactions with identical stoichiometry
139
+ rxn_consensus_species, rxn_lookup_table = consensus.construct_meta_entities_members(
140
+ sbml_dfs_dict=sbml_dfs_dict, # a single dict entry
141
+ pw_index=stubbed_index,
142
+ table=SBML_DFS.REACTIONS,
143
+ defined_by=SBML_DFS.REACTION_SPECIES,
144
+ defined_lookup_tables={SBML_DFS.SC_ID: compspec_lookup_table},
145
+ defining_attrs=[SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY],
146
+ )
147
+
148
+ (
149
+ rxnspec_consensus_instances,
150
+ rxnspec_lookup_table,
151
+ ) = consensus.construct_meta_entities_fk(
152
+ sbml_dfs_dict=sbml_dfs_dict, # a single dict entry
153
+ pw_index=stubbed_index,
154
+ table=SBML_DFS.REACTION_SPECIES,
155
+ fk_lookup_tables={
156
+ SBML_DFS.R_ID: rxn_lookup_table,
157
+ SBML_DFS.SC_ID: compspec_lookup_table,
158
+ },
159
+ # retain species with different roles
160
+ extra_defining_attrs=[SBML_DFS.SBO_TERM],
161
+ )
162
+
163
+ # drop reactions and reaction species where due to removal of compartments
164
+ # the substrates and products are the same
165
+ # this will mostly remove transporation reactions
166
+ reactions, reaction_species = _filter_trivial_reactions(
167
+ rxn_consensus_species, rxnspec_consensus_instances
168
+ )
169
+
170
+ return reactions, reaction_species
171
+
172
+
173
+ def _filter_trivial_reactions(
174
+ rxn_consensus_species: pd.DataFrame, rxnspec_consensus_instances: pd.DataFrame
175
+ ) -> tuple[pd.Dataframe, pd.DataFrame]:
176
+ """Filter Trivial Reactions
177
+
178
+ Filter reaction species which cancel out as substrates and products in the same reaction.
179
+
180
+ Args:
181
+ rxn_consensus_species (pd.DataFrame): reactions
182
+ rxnspec_consensus_instances (pd.DataFrame): reaction species
183
+
184
+ Returns:
185
+ reactions (pd.DataFrame): reactions with trivial reactions dropped
186
+ reaction_species (pd.DataFrame): reaction species with trivial reaction species dropped
187
+ """
188
+
189
+ # look for reactions where substrates and products cancel out
190
+ reactants = rxnspec_consensus_instances.query("stoichiometry != 0")
191
+ reactants_stoi_sum = (
192
+ reactants[[SBML_DFS.R_ID, SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]]
193
+ .groupby([SBML_DFS.R_ID, SBML_DFS.SC_ID])
194
+ .sum()
195
+ )
196
+
197
+ # identify cspecies which cancel out
198
+ invalid_cspecies_in_reaction = reactants_stoi_sum.query("stoichiometry == 0")
199
+
200
+ if invalid_cspecies_in_reaction.shape[0] > 0:
201
+ logger.info(
202
+ f"{invalid_cspecies_in_reaction.shape[0]} reactions species will be removed because they are substrates"
203
+ " and products in the same reaction"
204
+ )
205
+
206
+ # find all cspecies which cancel outs original rsc_ids
207
+ invalid_reaction_species = reactants.merge(
208
+ invalid_cspecies_in_reaction,
209
+ left_on=[SBML_DFS.R_ID, SBML_DFS.SC_ID],
210
+ right_index=True,
211
+ ).index.tolist()
212
+
213
+ # update the reaction species table to reflect reaction_species which were dropped because
214
+ # they were both substrates and products
215
+ updated_reaction_species = rxnspec_consensus_instances[
216
+ ~rxnspec_consensus_instances.index.isin(invalid_reaction_species)
217
+ ]
218
+
219
+ # identify valid reactions based on their presence in updated_reaction_species
220
+ valid_reactions = rxn_consensus_species.index.isin(
221
+ updated_reaction_species[SBML_DFS.R_ID]
222
+ )
223
+
224
+ invalid_reaction_names = rxn_consensus_species[~valid_reactions][
225
+ SBML_DFS.R_NAME
226
+ ].tolist()
227
+ if len(invalid_reaction_names) > 0:
228
+ logger.info(
229
+ f"{len(invalid_reaction_names)} reactions where substrates and products cancel out"
230
+ f" were dropped including: {' & '.join(invalid_reaction_names[0:5])}"
231
+ )
232
+
233
+ updated_reactions = rxn_consensus_species[valid_reactions]
234
+
235
+ return updated_reactions, updated_reaction_species
236
+
237
+
238
+ def _create_stubbed_index() -> indices.PWIndex:
239
+ """Create a default pathway index for the uncompartmentalized model."""
240
+
241
+ stubbed_index_df = pd.DataFrame(
242
+ {
243
+ SOURCE_SPEC.FILE: None,
244
+ SOURCE_SPEC.SOURCE: None,
245
+ SOURCE_SPEC.SPECIES: None,
246
+ SOURCE_SPEC.PATHWAY_ID: "uncompartmentalization",
247
+ SOURCE_SPEC.NAME: "Merging all compartments",
248
+ SOURCE_SPEC.DATE: None,
249
+ },
250
+ index=[0],
251
+ )
252
+ stubbed_index = indices.PWIndex(stubbed_index_df, validate_paths=False)
253
+
254
+ return stubbed_index
255
+
256
+
257
+ def _create_stubbed_source() -> source.Source:
258
+ """Create a default Source object for the uncompartmetnalized model."""
259
+
260
+ src = source.Source(
261
+ pd.DataFrame([{"model": "uncompartmentalization"}]),
262
+ pw_index=_create_stubbed_index(),
263
+ )
264
+ return src
@@ -0,0 +1,10 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib.metadata import PackageNotFoundError
4
+ from importlib.metadata import version
5
+
6
+ try:
7
+ __version__ = version("calicolabs-cpr")
8
+ except PackageNotFoundError:
9
+ # package is not installed
10
+ pass
@@ -0,0 +1,117 @@
1
+ """Module to contain all constants used for representing and working with networks"""
2
+
3
+ from __future__ import annotations
4
+
5
+ from types import SimpleNamespace
6
+
7
+ from napistu.constants import SBML_DFS
8
+ from napistu.constants import SBOTERM_NAMES
9
+
10
+ CPR_GRAPH_NODES = SimpleNamespace(NAME="name")
11
+
12
+ CPR_GRAPH_EDGES = SimpleNamespace(
13
+ DIRECTED="directed",
14
+ FROM="from",
15
+ R_ID=SBML_DFS.R_ID,
16
+ R_ISREVERSIBLE=SBML_DFS.R_ISREVERSIBLE,
17
+ SBO_TERM=SBML_DFS.SBO_TERM,
18
+ SBO_NAME="sbo_name",
19
+ SC_DEGREE="sc_degree",
20
+ SC_PARENTS="sc_parents",
21
+ SC_CHILDREN="sc_children",
22
+ SPECIES_TYPE="species_type",
23
+ STOICHIOMETRY=SBML_DFS.STOICHIOMETRY,
24
+ TO="to",
25
+ UPSTREAM_WEIGHTS="upstream_weights",
26
+ WEIGHTS="weights",
27
+ )
28
+
29
+ # variables which should be in cpr graph's edges
30
+ CPR_GRAPH_REQUIRED_EDGE_VARS = {
31
+ CPR_GRAPH_EDGES.FROM,
32
+ CPR_GRAPH_EDGES.TO,
33
+ CPR_GRAPH_EDGES.SBO_TERM,
34
+ CPR_GRAPH_EDGES.STOICHIOMETRY,
35
+ CPR_GRAPH_EDGES.SC_PARENTS,
36
+ CPR_GRAPH_EDGES.SC_CHILDREN,
37
+ }
38
+
39
+ # nomenclature for individual fields
40
+
41
+ CPR_GRAPH_NODE_TYPES = SimpleNamespace(REACTION="reaction", SPECIES="species")
42
+
43
+ VALID_CPR_GRAPH_NODE_TYPES = [
44
+ CPR_GRAPH_NODE_TYPES.REACTION,
45
+ CPR_GRAPH_NODE_TYPES.SPECIES,
46
+ ]
47
+
48
+ CPR_GRAPH_EDGE_DIRECTIONS = SimpleNamespace(
49
+ FORWARD="forward", REVERSE="reverse", UNDIRECTED="undirected"
50
+ )
51
+
52
+ # network-level nomenclature
53
+
54
+ CPR_GRAPH_TYPES = SimpleNamespace(
55
+ BIPARTITE="bipartite", REGULATORY="regulatory", SURROGATE="surrogate"
56
+ )
57
+
58
+ VALID_CPR_GRAPH_TYPES = [
59
+ CPR_GRAPH_TYPES.BIPARTITE,
60
+ CPR_GRAPH_TYPES.REGULATORY,
61
+ CPR_GRAPH_TYPES.SURROGATE,
62
+ ]
63
+
64
+ CPR_WEIGHTING_STRATEGIES = SimpleNamespace(
65
+ CALIBRATED="calibrated", MIXED="mixed", TOPOLOGY="topology", UNWEIGHTED="unweighted"
66
+ )
67
+
68
+ VALID_WEIGHTING_STRATEGIES = [
69
+ CPR_WEIGHTING_STRATEGIES.UNWEIGHTED,
70
+ CPR_WEIGHTING_STRATEGIES.TOPOLOGY,
71
+ CPR_WEIGHTING_STRATEGIES.MIXED,
72
+ CPR_WEIGHTING_STRATEGIES.CALIBRATED,
73
+ ]
74
+
75
+ # the regulatory graph defines a hierarchy of upstream and downstream
76
+ # entities in a reaction
77
+ # modifier/stimulator/inhibitor -> catalyst -> reactant -> reaction -> product
78
+
79
+ REGULATORY_GRAPH_HIERARCHY = [
80
+ [SBOTERM_NAMES.MODIFIER, SBOTERM_NAMES.STIMULATOR, SBOTERM_NAMES.INHIBITOR],
81
+ [SBOTERM_NAMES.CATALYST],
82
+ [SBOTERM_NAMES.REACTANT],
83
+ [CPR_GRAPH_NODE_TYPES.REACTION],
84
+ # normally we don't expect interactors to be defined because they are handled by
85
+ # net_create._format_interactors_for_regulatory_graph() but include them here
86
+ # until Issue #102 is solved
87
+ [SBOTERM_NAMES.INTERACTOR],
88
+ [SBOTERM_NAMES.PRODUCT],
89
+ ]
90
+
91
+ # an alternative layout to regulatory where enyzmes are downstream of substrates.
92
+ # this doesn't make much sense from a regulatory perspective because
93
+ # enzymes modify substrates not the other way around. but, its what one might
94
+ # expect if catalysts are a surrogate for reactions as is the case for metabolic
95
+ # network layouts
96
+
97
+ SURROGATE_GRAPH_HIERARCHY = [
98
+ [SBOTERM_NAMES.MODIFIER, SBOTERM_NAMES.STIMULATOR, SBOTERM_NAMES.INHIBITOR],
99
+ [SBOTERM_NAMES.REACTANT],
100
+ [SBOTERM_NAMES.CATALYST],
101
+ [CPR_GRAPH_NODE_TYPES.REACTION],
102
+ # normally we don't expect interactors to be defined because they are handled by
103
+ # net_create._format_interactors_for_regulatory_graph() but include them here
104
+ # until Issue #102 is solved
105
+ [SBOTERM_NAMES.INTERACTOR],
106
+ [SBOTERM_NAMES.PRODUCT],
107
+ ]
108
+
109
+ NEIGHBORHOOD_NETWORK_TYPES = SimpleNamespace(
110
+ DOWNSTREAM="downstream", HOURGLASS="hourglass", UPSTREAM="upstream"
111
+ )
112
+
113
+ VALID_NEIGHBORHOOD_NETWORK_TYPES = [
114
+ NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM,
115
+ NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
116
+ NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM,
117
+ ]