napistu 0.1.0__py3-none-any.whl → 0.2.4.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__init__.py +1 -1
- napistu/consensus.py +1010 -513
- napistu/constants.py +24 -0
- napistu/gcs/constants.py +2 -2
- napistu/gcs/downloads.py +57 -25
- napistu/gcs/utils.py +21 -0
- napistu/identifiers.py +105 -6
- napistu/ingestion/constants.py +0 -1
- napistu/ingestion/obo.py +24 -8
- napistu/ingestion/psi_mi.py +20 -5
- napistu/ingestion/reactome.py +8 -32
- napistu/mcp/__init__.py +69 -0
- napistu/mcp/__main__.py +180 -0
- napistu/mcp/codebase.py +182 -0
- napistu/mcp/codebase_utils.py +298 -0
- napistu/mcp/constants.py +72 -0
- napistu/mcp/documentation.py +166 -0
- napistu/mcp/documentation_utils.py +235 -0
- napistu/mcp/execution.py +382 -0
- napistu/mcp/profiles.py +73 -0
- napistu/mcp/server.py +86 -0
- napistu/mcp/tutorials.py +124 -0
- napistu/mcp/tutorials_utils.py +230 -0
- napistu/mcp/utils.py +47 -0
- napistu/mechanism_matching.py +782 -26
- napistu/modify/constants.py +41 -0
- napistu/modify/curation.py +4 -1
- napistu/modify/gaps.py +243 -156
- napistu/modify/pathwayannot.py +26 -8
- napistu/network/neighborhoods.py +16 -7
- napistu/network/net_create.py +209 -54
- napistu/network/net_propagation.py +118 -0
- napistu/network/net_utils.py +1 -32
- napistu/rpy2/netcontextr.py +10 -7
- napistu/rpy2/rids.py +7 -5
- napistu/sbml_dfs_core.py +46 -29
- napistu/sbml_dfs_utils.py +37 -1
- napistu/source.py +8 -2
- napistu/utils.py +67 -8
- napistu-0.2.4.dev3.dist-info/METADATA +84 -0
- napistu-0.2.4.dev3.dist-info/RECORD +95 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/WHEEL +1 -1
- tests/conftest.py +11 -5
- tests/test_consensus.py +4 -1
- tests/test_gaps.py +127 -0
- tests/test_gcs.py +3 -2
- tests/test_igraph.py +14 -0
- tests/test_mcp_documentation_utils.py +13 -0
- tests/test_mechanism_matching.py +658 -0
- tests/test_net_propagation.py +89 -0
- tests/test_net_utils.py +83 -0
- tests/test_sbml.py +2 -0
- tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
- tests/test_utils.py +81 -0
- napistu-0.1.0.dist-info/METADATA +0 -56
- napistu-0.1.0.dist-info/RECORD +0 -77
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/entry_points.txt +0 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/top_level.txt +0 -0
napistu/modify/constants.py
CHANGED
@@ -2,8 +2,13 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
|
+
from types import SimpleNamespace
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
|
9
|
+
from napistu.constants import IDENTIFIERS
|
10
|
+
from napistu.constants import ONTOLOGIES
|
11
|
+
|
7
12
|
VALID_ANNOTATION_TYPES = [
|
8
13
|
"foci",
|
9
14
|
"reactions",
|
@@ -84,3 +89,39 @@ COFACTOR_CHEBI_IDS = pd.DataFrame(
|
|
84
89
|
],
|
85
90
|
columns=["cofactor", "chebi"],
|
86
91
|
)
|
92
|
+
|
93
|
+
NEO4J_MEMBERS_RAW = SimpleNamespace(
|
94
|
+
SET_NAME="set_name",
|
95
|
+
SET_ID="set_id",
|
96
|
+
MEMBER_NAME="member_name",
|
97
|
+
MEMBER_ID="member_id",
|
98
|
+
IDENTIFIER=IDENTIFIERS.IDENTIFIER,
|
99
|
+
ONTOLOGY=IDENTIFIERS.ONTOLOGY,
|
100
|
+
)
|
101
|
+
|
102
|
+
NEO4_MEMBERS_SET = {
|
103
|
+
NEO4J_MEMBERS_RAW.SET_NAME,
|
104
|
+
NEO4J_MEMBERS_RAW.SET_ID,
|
105
|
+
NEO4J_MEMBERS_RAW.MEMBER_NAME,
|
106
|
+
NEO4J_MEMBERS_RAW.MEMBER_ID,
|
107
|
+
NEO4J_MEMBERS_RAW.IDENTIFIER,
|
108
|
+
NEO4J_MEMBERS_RAW.ONTOLOGY,
|
109
|
+
}
|
110
|
+
|
111
|
+
REACTOME_CROSSREF_RAW = SimpleNamespace(
|
112
|
+
MEMBER_NAME="member_name",
|
113
|
+
REACTOME_ID="reactome_id",
|
114
|
+
UNIPROT=ONTOLOGIES.UNIPROT,
|
115
|
+
IDENTIFIER=IDENTIFIERS.IDENTIFIER,
|
116
|
+
ONTOLOGY=IDENTIFIERS.ONTOLOGY,
|
117
|
+
URL=IDENTIFIERS.URL,
|
118
|
+
)
|
119
|
+
|
120
|
+
REACTOME_CROSSREF_SET = {
|
121
|
+
REACTOME_CROSSREF_RAW.MEMBER_NAME,
|
122
|
+
REACTOME_CROSSREF_RAW.REACTOME_ID,
|
123
|
+
REACTOME_CROSSREF_RAW.UNIPROT,
|
124
|
+
REACTOME_CROSSREF_RAW.IDENTIFIER,
|
125
|
+
REACTOME_CROSSREF_RAW.ONTOLOGY,
|
126
|
+
REACTOME_CROSSREF_RAW.URL,
|
127
|
+
}
|
napistu/modify/curation.py
CHANGED
@@ -244,7 +244,10 @@ def _find_invalid_entities(
|
|
244
244
|
|
245
245
|
# pull out the annotations that start with the table being evaluated
|
246
246
|
remove_df = invalid_entities[invalid_entities["table"] == tab]
|
247
|
-
|
247
|
+
if not isinstance(remove_df, pd.DataFrame):
|
248
|
+
raise TypeError(
|
249
|
+
f"remove_df must be a pandas DataFrame, but got {type(remove_df).__name__}"
|
250
|
+
)
|
248
251
|
|
249
252
|
invalid_remove_vars = (
|
250
253
|
remove_df["variable"][~remove_df["variable"].isin(tab_vars)]
|
napistu/modify/gaps.py
CHANGED
@@ -29,24 +29,25 @@ def add_transportation_reactions(
|
|
29
29
|
exchange_compartment: str = COMPARTMENTS["CYTOSOL"],
|
30
30
|
) -> sbml_dfs_core.SBML_dfs:
|
31
31
|
"""
|
32
|
-
Add
|
32
|
+
Add transportation reactions to connect all forms of a protein across compartments.
|
33
33
|
|
34
34
|
Identifies proteins whose various compartmentalized forms cannot reach one
|
35
35
|
another via existing transportation reactions and then adds transportation
|
36
36
|
reactions which connect all forms of a protein.
|
37
37
|
|
38
|
-
|
38
|
+
Parameters
|
39
|
+
----------
|
40
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
39
41
|
A mechanistic model containing a set of molecular species which exist
|
40
|
-
in multiple compartments and are interconverted by reactions
|
41
|
-
exchange_compartment: str
|
42
|
-
The name of an exchange compartment matching a c_name from sbml_dfs.compartments
|
42
|
+
in multiple compartments and are interconverted by reactions.
|
43
|
+
exchange_compartment : str, optional
|
44
|
+
The name of an exchange compartment matching a c_name from sbml_dfs.compartments.
|
43
45
|
|
44
|
-
Returns
|
45
|
-
|
46
|
-
|
46
|
+
Returns
|
47
|
+
-------
|
48
|
+
sbml_dfs_core.SBML_dfs
|
47
49
|
The input sbml_dfs with additional transport reactions and compartmentalized species
|
48
50
|
(in the exchange compartment) added.
|
49
|
-
|
50
51
|
"""
|
51
52
|
|
52
53
|
# validate arguments
|
@@ -75,28 +76,32 @@ def update_sbml_df_with_exchange(
|
|
75
76
|
exchange_compartment: str = COMPARTMENTS["CYTOSOL"],
|
76
77
|
) -> sbml_dfs_core.SBML_dfs:
|
77
78
|
"""
|
78
|
-
|
79
|
-
Update SBML_dfs With Exchange
|
80
|
-
|
81
79
|
Add transportation reactions between all locations of a set of molecular species by
|
82
80
|
including bidirectional exchange reactions through an exchange compartment.
|
83
81
|
|
84
|
-
|
85
|
-
|
86
|
-
|
82
|
+
This function is modular and delegates to helper functions for each logical step:
|
83
|
+
- Finding new exchange compartmentalized species
|
84
|
+
- Adding new compartmentalized species
|
85
|
+
- Building the transport reaction edgelist
|
86
|
+
- Creating new reactions
|
87
|
+
- Creating new reaction species
|
88
|
+
- Updating and validating the sbml_dfs
|
89
|
+
|
90
|
+
Parameters
|
91
|
+
----------
|
92
|
+
species_needing_transport_rxns : np.ndarray
|
87
93
|
Vector of molecular species (s_ids) with no or insufficient transportation reactions
|
88
|
-
sbml_dfs: sbml_dfs_core.SBML_dfs
|
94
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
89
95
|
A mechanistic model containing a set of molecular species which exist
|
90
96
|
in multiple compartments and are interconverted by reactions
|
91
|
-
exchange_compartment: str
|
97
|
+
exchange_compartment : str, optional
|
92
98
|
The name of an exchange compartment matching a c_name from sbml_dfs.compartments
|
93
99
|
|
94
|
-
Returns
|
95
|
-
|
96
|
-
|
100
|
+
Returns
|
101
|
+
-------
|
102
|
+
sbml_dfs_core.SBML_dfs
|
97
103
|
The input sbml_dfs with additional transport reactions and compartmentalized species
|
98
104
|
(in the exchange compartment) added.
|
99
|
-
|
100
105
|
"""
|
101
106
|
|
102
107
|
exchange_compartment_id = sbml_dfs.compartments[
|
@@ -108,8 +113,6 @@ def update_sbml_df_with_exchange(
|
|
108
113
|
f"{len(exchange_compartment_id)} compartments - this is unexpected behavior"
|
109
114
|
)
|
110
115
|
exchange_compartment_id = exchange_compartment_id[0]
|
111
|
-
|
112
|
-
# create a source object with provenance information for the entities that we'll add to the sbml_dfs
|
113
116
|
gap_filling_source_obj = source.Source(
|
114
117
|
pd.Series(
|
115
118
|
{
|
@@ -121,26 +124,107 @@ def update_sbml_df_with_exchange(
|
|
121
124
|
.to_frame()
|
122
125
|
.T
|
123
126
|
)
|
124
|
-
|
125
|
-
# initialize an empty identifiers object for gap filled reactions
|
126
127
|
gap_filling_id_obj = identifiers.Identifiers([])
|
128
|
+
new_exchange_cspecies = _find_new_exchange_cspecies(
|
129
|
+
species_needing_transport_rxns, sbml_dfs, exchange_compartment_id
|
130
|
+
)
|
131
|
+
logger.info(
|
132
|
+
f"{len(new_exchange_cspecies)} new compartmentalized species must "
|
133
|
+
f"be added to the {exchange_compartment} to add protein transportation gap filling"
|
134
|
+
)
|
135
|
+
new_exchange_cspecies_df = _add_new_exchange_cspecies(
|
136
|
+
new_exchange_cspecies,
|
137
|
+
sbml_dfs,
|
138
|
+
exchange_compartment_id,
|
139
|
+
exchange_compartment,
|
140
|
+
gap_filling_source_obj,
|
141
|
+
)
|
142
|
+
updated_sbml_dfs = copy.deepcopy(sbml_dfs)
|
143
|
+
updated_sbml_dfs.compartmentalized_species = pd.concat(
|
144
|
+
[updated_sbml_dfs.compartmentalized_species, new_exchange_cspecies_df]
|
145
|
+
)
|
146
|
+
transport_rxn_edgelist = _build_transport_rxn_edgelist(
|
147
|
+
updated_sbml_dfs, species_needing_transport_rxns, exchange_compartment_id
|
148
|
+
)
|
149
|
+
new_reactions = _create_new_reactions(
|
150
|
+
transport_rxn_edgelist, sbml_dfs, gap_filling_id_obj, gap_filling_source_obj
|
151
|
+
)
|
152
|
+
logger.info(
|
153
|
+
f"{len(new_reactions)} new reactions must "
|
154
|
+
f"be added to the {exchange_compartment} to add molecular species transportation reactions"
|
155
|
+
)
|
156
|
+
updated_sbml_dfs.reactions = pd.concat([updated_sbml_dfs.reactions, new_reactions])
|
157
|
+
new_reaction_species = _create_new_reaction_species(
|
158
|
+
transport_rxn_edgelist, sbml_dfs
|
159
|
+
)
|
160
|
+
updated_sbml_dfs.reaction_species = pd.concat(
|
161
|
+
[updated_sbml_dfs.reaction_species, new_reaction_species]
|
162
|
+
)
|
163
|
+
updated_sbml_dfs = sbml_dfs_utils.check_entity_data_index_matching(
|
164
|
+
updated_sbml_dfs, SBML_DFS.REACTIONS
|
165
|
+
)
|
166
|
+
updated_sbml_dfs.validate()
|
167
|
+
return updated_sbml_dfs
|
168
|
+
|
127
169
|
|
128
|
-
|
170
|
+
def _find_new_exchange_cspecies(
|
171
|
+
species_needing_transport_rxns: np.ndarray,
|
172
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
173
|
+
exchange_compartment_id: str,
|
174
|
+
) -> set:
|
175
|
+
"""
|
176
|
+
Find species which need exchange reactions but are not currently present in the exchange compartment.
|
177
|
+
|
178
|
+
Parameters
|
179
|
+
----------
|
180
|
+
species_needing_transport_rxns : np.ndarray
|
181
|
+
Vector of molecular species (s_ids) with no or insufficient transportation reactions
|
182
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
183
|
+
The SBML_dfs object
|
184
|
+
exchange_compartment_id : str
|
185
|
+
The compartment ID for the exchange compartment
|
186
|
+
|
187
|
+
Returns
|
188
|
+
-------
|
189
|
+
set
|
190
|
+
Set of s_ids needing new compartmentalized species in the exchange compartment.
|
191
|
+
"""
|
129
192
|
existing_exchange_cspecies = sbml_dfs.compartmentalized_species[
|
130
193
|
sbml_dfs.compartmentalized_species[SBML_DFS.C_ID] == exchange_compartment_id
|
131
194
|
]
|
132
|
-
|
195
|
+
return set(species_needing_transport_rxns).difference(
|
133
196
|
set(existing_exchange_cspecies[SBML_DFS.S_ID].tolist())
|
134
197
|
)
|
135
198
|
|
136
|
-
logger.info(
|
137
|
-
f"{len(new_exchange_cspecies)} new compartmentalized species must "
|
138
|
-
f"be added to the {exchange_compartment} to add protein transportation gap filling"
|
139
|
-
)
|
140
199
|
|
141
|
-
|
142
|
-
|
143
|
-
|
200
|
+
def _add_new_exchange_cspecies(
|
201
|
+
new_exchange_cspecies: set,
|
202
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
203
|
+
exchange_compartment_id: str,
|
204
|
+
exchange_compartment: str,
|
205
|
+
gap_filling_source_obj: source.Source,
|
206
|
+
) -> pd.DataFrame:
|
207
|
+
"""
|
208
|
+
Add new compartmentalized species to the exchange compartment.
|
209
|
+
|
210
|
+
Parameters
|
211
|
+
----------
|
212
|
+
new_exchange_cspecies : set
|
213
|
+
Set of s_ids needing new compartmentalized species in the exchange compartment.
|
214
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
215
|
+
The SBML_dfs object
|
216
|
+
exchange_compartment_id : str
|
217
|
+
The compartment ID for the exchange compartment
|
218
|
+
exchange_compartment : str
|
219
|
+
The name of the exchange compartment
|
220
|
+
gap_filling_source_obj : source.Source
|
221
|
+
Source object for gap-filling
|
222
|
+
|
223
|
+
Returns
|
224
|
+
-------
|
225
|
+
pd.DataFrame
|
226
|
+
DataFrame of new compartmentalized species to add.
|
227
|
+
"""
|
144
228
|
new_exchange_cspecies_fks = (
|
145
229
|
pd.DataFrame({SBML_DFS.S_ID: list(new_exchange_cspecies)})
|
146
230
|
.assign(c_id=exchange_compartment_id)
|
@@ -157,32 +241,42 @@ def update_sbml_df_with_exchange(
|
|
157
241
|
]
|
158
242
|
new_exchange_cspecies_fks = new_exchange_cspecies_fks.drop(SBML_DFS.S_NAME, axis=1)
|
159
243
|
new_exchange_cspecies_fks[SBML_DFS.SC_SOURCE] = gap_filling_source_obj
|
160
|
-
|
161
|
-
# update index by incrementing existing keys
|
162
244
|
existing_sc_ids = sbml_dfs_utils.id_formatter_inv(
|
163
245
|
sbml_dfs.compartmentalized_species.index.tolist()
|
164
246
|
)
|
165
|
-
# filter np.nan which will be introduced if the key is not the default format
|
166
247
|
existing_sc_ids = [x for x in existing_sc_ids if x is not np.nan]
|
167
|
-
current_max_sc_id = max(existing_sc_ids)
|
168
|
-
|
248
|
+
current_max_sc_id = max(existing_sc_ids) if existing_sc_ids else 0
|
169
249
|
new_int_ids = [
|
170
250
|
1 + current_max_sc_id + x for x in new_exchange_cspecies_fks.index.tolist()
|
171
251
|
]
|
172
252
|
new_exchange_cspecies_fks[SBML_DFS.SC_ID] = sbml_dfs_utils.id_formatter(
|
173
253
|
new_int_ids, id_type=SBML_DFS.SC_ID
|
174
254
|
)
|
175
|
-
|
255
|
+
return new_exchange_cspecies_fks.set_index(SBML_DFS.SC_ID)
|
176
256
|
|
177
|
-
# add new compartmentalized species to sbml_dfs model
|
178
|
-
updated_sbml_dfs = copy.deepcopy(sbml_dfs)
|
179
|
-
updated_sbml_dfs.compartmentalized_species = pd.concat(
|
180
|
-
[updated_sbml_dfs.compartmentalized_species, new_exchange_cspecies_df]
|
181
|
-
)
|
182
257
|
|
183
|
-
|
258
|
+
def _build_transport_rxn_edgelist(
|
259
|
+
updated_sbml_dfs: sbml_dfs_core.SBML_dfs,
|
260
|
+
species_needing_transport_rxns: np.ndarray,
|
261
|
+
exchange_compartment_id: str,
|
262
|
+
) -> pd.DataFrame:
|
263
|
+
"""
|
264
|
+
Build the edgelist for new transport reactions, ensuring only one reversible reaction per compartment pair.
|
265
|
+
|
266
|
+
Parameters
|
267
|
+
----------
|
268
|
+
updated_sbml_dfs : sbml_dfs_core.SBML_dfs
|
269
|
+
The updated SBML_dfs object
|
270
|
+
species_needing_transport_rxns : np.ndarray
|
271
|
+
Vector of molecular species (s_ids) with no or insufficient transportation reactions
|
272
|
+
exchange_compartment_id : str
|
273
|
+
The compartment ID for the exchange compartment
|
184
274
|
|
185
|
-
|
275
|
+
Returns
|
276
|
+
-------
|
277
|
+
pd.DataFrame
|
278
|
+
Edgelist for new transport reactions.
|
279
|
+
"""
|
186
280
|
cspecies_needing_transport = (
|
187
281
|
updated_sbml_dfs.compartmentalized_species[
|
188
282
|
updated_sbml_dfs.compartmentalized_species[SBML_DFS.S_ID].isin(
|
@@ -192,57 +286,59 @@ def update_sbml_df_with_exchange(
|
|
192
286
|
.reset_index()
|
193
287
|
.drop(SBML_DFS.SC_SOURCE, axis=1)
|
194
288
|
)
|
195
|
-
|
196
289
|
exchange_cspecies = cspecies_needing_transport[
|
197
290
|
cspecies_needing_transport[SBML_DFS.C_ID] == exchange_compartment_id
|
198
291
|
].drop(SBML_DFS.C_ID, axis=1)
|
199
292
|
non_exchange_cspecies = cspecies_needing_transport[
|
200
293
|
cspecies_needing_transport[SBML_DFS.C_ID] != exchange_compartment_id
|
201
294
|
].drop(SBML_DFS.C_ID, axis=1)
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
).merge(
|
209
|
-
non_exchange_cspecies.rename(
|
210
|
-
{SBML_DFS.SC_ID: "sc_id_to", SBML_DFS.SC_NAME: "sc_name_to"}, axis=1
|
211
|
-
)
|
212
|
-
),
|
213
|
-
# non-exchange compartment -> exchange compartment
|
214
|
-
non_exchange_cspecies.rename(
|
215
|
-
{SBML_DFS.SC_ID: "sc_id_from", SBML_DFS.SC_NAME: "sc_name_from"}, axis=1
|
216
|
-
).merge(
|
217
|
-
exchange_cspecies.rename(
|
218
|
-
{SBML_DFS.SC_ID: "sc_id_to", SBML_DFS.SC_NAME: "sc_name_to"}, axis=1
|
219
|
-
)
|
220
|
-
),
|
221
|
-
]
|
295
|
+
transport_rxn_edgelist = exchange_cspecies.rename(
|
296
|
+
{SBML_DFS.SC_ID: "sc_id_from", SBML_DFS.SC_NAME: "sc_name_from"}, axis=1
|
297
|
+
).merge(
|
298
|
+
non_exchange_cspecies.rename(
|
299
|
+
{SBML_DFS.SC_ID: "sc_id_to", SBML_DFS.SC_NAME: "sc_name_to"}, axis=1
|
300
|
+
)
|
222
301
|
)
|
223
302
|
|
224
|
-
# we should add two reactions for each non-exchange compartment cspecies
|
225
|
-
# one transporting from the exchange compartment and one transporting into the
|
226
|
-
# exchange compartment
|
227
|
-
assert transport_rxn_edgelist.shape[0] == 2 * non_exchange_cspecies.shape[0]
|
228
|
-
|
229
|
-
# the rows in this edgelist correspond to new reactions that we'll add
|
230
|
-
# to the model
|
231
303
|
transport_rxn_edgelist[SBML_DFS.R_NAME] = [
|
232
|
-
f"{x}
|
304
|
+
f"{x} <-> {y} gap-filling transport"
|
233
305
|
for x, y in zip(
|
234
306
|
transport_rxn_edgelist["sc_name_from"], transport_rxn_edgelist["sc_name_to"]
|
235
307
|
)
|
236
308
|
]
|
237
|
-
transport_rxn_edgelist =
|
309
|
+
transport_rxn_edgelist[SBML_DFS.R_ISREVERSIBLE] = True
|
310
|
+
return transport_rxn_edgelist.reset_index(drop=True)
|
238
311
|
|
239
|
-
|
312
|
+
|
313
|
+
def _create_new_reactions(
|
314
|
+
transport_rxn_edgelist: pd.DataFrame,
|
315
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
316
|
+
gap_filling_id_obj: identifiers.Identifiers,
|
317
|
+
gap_filling_source_obj: source.Source,
|
318
|
+
) -> pd.DataFrame:
|
319
|
+
"""
|
320
|
+
Create new reactions DataFrame for gap-filling transport reactions.
|
321
|
+
|
322
|
+
Parameters
|
323
|
+
----------
|
324
|
+
transport_rxn_edgelist : pd.DataFrame
|
325
|
+
Edgelist for new transport reactions.
|
326
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
327
|
+
The SBML_dfs object
|
328
|
+
gap_filling_id_obj : identifiers.Identifiers
|
329
|
+
Identifiers object for gap-filling
|
330
|
+
gap_filling_source_obj : source.Source
|
331
|
+
Source object for gap-filling
|
332
|
+
|
333
|
+
Returns
|
334
|
+
-------
|
335
|
+
pd.DataFrame
|
336
|
+
DataFrame of new reactions to add.
|
337
|
+
"""
|
240
338
|
|
241
339
|
existing_r_ids = sbml_dfs_utils.id_formatter_inv(sbml_dfs.reactions.index.tolist())
|
242
|
-
# filter np.nan which will be introduced if the key is not the default format
|
243
340
|
existing_r_ids = [x for x in existing_r_ids if x is not np.nan]
|
244
|
-
current_max_r_id = max(existing_r_ids)
|
245
|
-
|
341
|
+
current_max_r_id = max(existing_r_ids) if existing_r_ids else 0
|
246
342
|
new_int_ids = [
|
247
343
|
1 + current_max_r_id + x for x in transport_rxn_edgelist.index.tolist()
|
248
344
|
]
|
@@ -250,22 +346,36 @@ def update_sbml_df_with_exchange(
|
|
250
346
|
new_int_ids, id_type=SBML_DFS.R_ID
|
251
347
|
)
|
252
348
|
new_reactions = (
|
253
|
-
transport_rxn_edgelist[
|
349
|
+
transport_rxn_edgelist[
|
350
|
+
[SBML_DFS.R_ID, SBML_DFS.R_NAME, SBML_DFS.R_ISREVERSIBLE]
|
351
|
+
]
|
254
352
|
.set_index(SBML_DFS.R_ID)
|
255
353
|
.assign(r_Identifiers=gap_filling_id_obj)
|
256
354
|
.assign(r_Source=gap_filling_source_obj)
|
257
355
|
)
|
356
|
+
return new_reactions
|
258
357
|
|
259
|
-
logger.info(
|
260
|
-
f"{len(new_reactions)} new reactions must "
|
261
|
-
f"be added to the {exchange_compartment} to add molecular species transportation reactions"
|
262
|
-
)
|
263
358
|
|
264
|
-
|
265
|
-
|
359
|
+
def _create_new_reaction_species(
|
360
|
+
transport_rxn_edgelist: pd.DataFrame,
|
361
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
362
|
+
) -> pd.DataFrame:
|
363
|
+
"""
|
364
|
+
Create new reaction species DataFrame for gap-filling transport reactions.
|
365
|
+
|
366
|
+
Parameters
|
367
|
+
----------
|
368
|
+
transport_rxn_edgelist : pd.DataFrame
|
369
|
+
Edgelist for new transport reactions.
|
370
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
371
|
+
The SBML_dfs object
|
372
|
+
|
373
|
+
Returns
|
374
|
+
-------
|
375
|
+
pd.DataFrame
|
376
|
+
DataFrame of new reaction species to add.
|
377
|
+
"""
|
266
378
|
|
267
|
-
# create new reaction species
|
268
|
-
# each reaction adds two reaction species - the from and to compartmentalized species
|
269
379
|
new_reaction_species = pd.concat(
|
270
380
|
[
|
271
381
|
transport_rxn_edgelist[["sc_id_from", SBML_DFS.R_ID]]
|
@@ -284,53 +394,36 @@ def update_sbml_df_with_exchange(
|
|
284
394
|
existing_rsc_ids = sbml_dfs_utils.id_formatter_inv(
|
285
395
|
sbml_dfs.reaction_species.index.tolist()
|
286
396
|
)
|
397
|
+
|
287
398
|
# filter np.nan which will be introduced if the key is not the default format
|
288
399
|
existing_rsc_ids = [x for x in existing_rsc_ids if x is not np.nan]
|
289
|
-
current_max_rsc_id = max(existing_rsc_ids)
|
290
|
-
|
400
|
+
current_max_rsc_id = max(existing_rsc_ids) if existing_rsc_ids else 0
|
291
401
|
new_int_ids = [
|
292
402
|
1 + current_max_rsc_id + x for x in new_reaction_species.index.tolist()
|
293
403
|
]
|
294
404
|
new_reaction_species[SBML_DFS.RSC_ID] = sbml_dfs_utils.id_formatter(
|
295
405
|
new_int_ids, id_type=SBML_DFS.RSC_ID
|
296
406
|
)
|
297
|
-
|
298
|
-
|
299
|
-
updated_sbml_dfs.reaction_species = pd.concat(
|
300
|
-
[updated_sbml_dfs.reaction_species, new_reaction_species]
|
301
|
-
)
|
302
|
-
|
303
|
-
updated_sbml_dfs = sbml_dfs_utils.check_entity_data_index_matching(
|
304
|
-
updated_sbml_dfs, SBML_DFS.REACTIONS
|
305
|
-
)
|
306
|
-
|
307
|
-
updated_sbml_dfs.validate()
|
308
|
-
|
309
|
-
return updated_sbml_dfs
|
407
|
+
return new_reaction_species.set_index(SBML_DFS.RSC_ID)
|
310
408
|
|
311
409
|
|
312
410
|
def _identify_species_needing_transport_reactions(
|
313
411
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
314
412
|
) -> np.ndarray:
|
315
413
|
"""
|
316
|
-
Identify
|
317
|
-
|
318
|
-
Determine whether each molecular species has sufficient transport reactions
|
319
|
-
so all of the compartments where it exists are connected.
|
414
|
+
Identify molecular species needing transport reactions so all of the compartments where it exists are connected.
|
320
415
|
|
321
|
-
Parameters
|
322
|
-
|
323
|
-
sbml_dfs: sbml_dfs_core.SBML_dfs
|
416
|
+
Parameters
|
417
|
+
----------
|
418
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
324
419
|
A mechanistic model containing a set of molecular species which exist
|
325
420
|
in multiple compartments and are interconverted by reactions
|
326
421
|
|
327
|
-
Returns
|
328
|
-
|
329
|
-
|
422
|
+
Returns
|
423
|
+
-------
|
424
|
+
np.ndarray
|
330
425
|
Vector of molecular species (s_ids) with no or insufficient transportation reactions
|
331
|
-
|
332
426
|
"""
|
333
|
-
|
334
427
|
# ensure that all genic reaction species can be produced and transported to each
|
335
428
|
# compartment where they should exist.
|
336
429
|
# we should be able to follow a directed path from a synthesized protein
|
@@ -420,7 +513,7 @@ def _identify_species_needing_transport_reactions(
|
|
420
513
|
species_transport_status_df = pd.DataFrame(species_transport_status_dict_list)
|
421
514
|
|
422
515
|
# optional logging
|
423
|
-
|
516
|
+
logger.debug(_log_protein_transport_gapfilling(species_transport_status_df))
|
424
517
|
|
425
518
|
# define proteins which whose compartmentalized forms are not connected
|
426
519
|
proteins_needing_transport_rxns = species_transport_status_df[
|
@@ -443,31 +536,25 @@ def _identify_species_needing_transport_reactions(
|
|
443
536
|
|
444
537
|
|
445
538
|
def _eval_existing_inter_cspecies_paths(
|
446
|
-
comp_specs: pd.DataFrame,
|
539
|
+
comp_specs: pd.DataFrame,
|
540
|
+
existing_cspecies_paths: pd.DataFrame,
|
447
541
|
) -> dict:
|
448
542
|
"""
|
449
|
-
Evaluate
|
450
|
-
|
451
|
-
Determine whether paths between compartments found in
|
452
|
-
_find_existing_inter_cspecies_paths()
|
453
|
-
cover all of the compartments where the protein exists.
|
454
|
-
|
455
|
-
Parameters:
|
543
|
+
Evaluate whether paths between compartments found in _find_existing_inter_cspecies_paths cover all of the compartments where the protein exists.
|
456
544
|
|
457
|
-
|
545
|
+
Parameters
|
546
|
+
----------
|
547
|
+
comp_specs : pd.DataFrame
|
458
548
|
Compartmentalized species for a single s_id
|
459
|
-
existing_cspecies_paths: pd.DataFrame
|
460
|
-
An edgelist of a from and to compartmentalized species
|
461
|
-
and a label of the path connecting them.
|
549
|
+
existing_cspecies_paths : pd.DataFrame
|
550
|
+
An edgelist of a from and to compartmentalized species and a label of the path connecting them.
|
462
551
|
|
463
|
-
Returns
|
464
|
-
|
465
|
-
|
552
|
+
Returns
|
553
|
+
-------
|
554
|
+
dict
|
466
555
|
type: the status category the species falls in
|
467
|
-
|
468
|
-
|
556
|
+
msg: an optional message describing the type
|
469
557
|
"""
|
470
|
-
|
471
558
|
# If the largest connected component includes all compartmentalized species
|
472
559
|
# then we can assume that the transportation reactions which exist are adequate. Note that
|
473
560
|
# because the subgraph is directed its topology may still be kind of funky.
|
@@ -515,31 +602,23 @@ def _find_existing_inter_cspecies_paths(
|
|
515
602
|
partial_protein_cspecies: pd.DataFrame,
|
516
603
|
) -> pd.DataFrame | None:
|
517
604
|
"""
|
518
|
-
Find
|
605
|
+
Find which compartments a protein exists in can be reached from one another by traversing a directed graph of reactions and molecular species including the protein.
|
519
606
|
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
Parameters:
|
525
|
-
|
526
|
-
comp_specs: pd.DataFrame
|
607
|
+
Parameters
|
608
|
+
----------
|
609
|
+
comp_specs : pd.DataFrame
|
527
610
|
Compartmentalized species for a single s_id
|
528
|
-
uniprot_id: str
|
611
|
+
uniprot_id : str
|
529
612
|
The Uniprot ID for the protein of interest
|
530
|
-
directed_graph: ig.Graph
|
613
|
+
directed_graph : ig.Graph
|
531
614
|
An igraph version of the sbml_dfs model
|
532
|
-
partial_protein_cspecies: pd.DataFrame
|
533
|
-
A table of proteins included in each species ID (this includes BQB_HAS_PART
|
534
|
-
qualifiers in addition to the BQB_IS qualifiers which generally define
|
535
|
-
distinct species
|
536
|
-
|
537
|
-
Returns:
|
538
|
-
|
539
|
-
existing_cspecies_paths: pd.DataFrame or None
|
540
|
-
An edgelist of a from and to compartmentalized species and a label of the path
|
541
|
-
connecting them.
|
615
|
+
partial_protein_cspecies : pd.DataFrame
|
616
|
+
A table of proteins included in each species ID (this includes BQB_HAS_PART qualifiers in addition to the BQB_IS qualifiers which generally define distinct species
|
542
617
|
|
618
|
+
Returns
|
619
|
+
-------
|
620
|
+
pd.DataFrame or None
|
621
|
+
An edgelist of a from and to compartmentalized species and a label of the path connecting them.
|
543
622
|
"""
|
544
623
|
|
545
624
|
reaction_vertices = np.where(
|
@@ -593,6 +672,14 @@ def _find_existing_inter_cspecies_paths(
|
|
593
672
|
def _log_protein_transport_gapfilling(
|
594
673
|
species_transport_status_df: pd.DataFrame,
|
595
674
|
) -> None:
|
675
|
+
"""
|
676
|
+
Log summary statistics and example messages for protein transport gapfilling.
|
677
|
+
|
678
|
+
Parameters
|
679
|
+
----------
|
680
|
+
species_transport_status_df : pd.DataFrame
|
681
|
+
DataFrame summarizing transport status for each species
|
682
|
+
"""
|
596
683
|
print(
|
597
684
|
utils.style_df(
|
598
685
|
species_transport_status_df.value_counts("type").to_frame().reset_index(),
|