napistu 0.1.0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. napistu/__init__.py +1 -1
  2. napistu/consensus.py +1010 -513
  3. napistu/constants.py +24 -0
  4. napistu/gcs/constants.py +2 -2
  5. napistu/gcs/downloads.py +57 -25
  6. napistu/gcs/utils.py +21 -0
  7. napistu/identifiers.py +105 -6
  8. napistu/ingestion/constants.py +0 -1
  9. napistu/ingestion/obo.py +24 -8
  10. napistu/ingestion/psi_mi.py +20 -5
  11. napistu/ingestion/reactome.py +8 -32
  12. napistu/mcp/__init__.py +69 -0
  13. napistu/mcp/__main__.py +180 -0
  14. napistu/mcp/codebase.py +182 -0
  15. napistu/mcp/codebase_utils.py +298 -0
  16. napistu/mcp/constants.py +72 -0
  17. napistu/mcp/documentation.py +166 -0
  18. napistu/mcp/documentation_utils.py +235 -0
  19. napistu/mcp/execution.py +382 -0
  20. napistu/mcp/profiles.py +73 -0
  21. napistu/mcp/server.py +86 -0
  22. napistu/mcp/tutorials.py +124 -0
  23. napistu/mcp/tutorials_utils.py +230 -0
  24. napistu/mcp/utils.py +47 -0
  25. napistu/mechanism_matching.py +782 -26
  26. napistu/modify/constants.py +41 -0
  27. napistu/modify/curation.py +4 -1
  28. napistu/modify/gaps.py +243 -156
  29. napistu/modify/pathwayannot.py +26 -8
  30. napistu/network/neighborhoods.py +16 -7
  31. napistu/network/net_create.py +209 -54
  32. napistu/network/net_propagation.py +118 -0
  33. napistu/network/net_utils.py +1 -32
  34. napistu/rpy2/netcontextr.py +10 -7
  35. napistu/rpy2/rids.py +7 -5
  36. napistu/sbml_dfs_core.py +46 -29
  37. napistu/sbml_dfs_utils.py +37 -1
  38. napistu/source.py +8 -2
  39. napistu/utils.py +67 -8
  40. napistu-0.2.4.dev2.dist-info/METADATA +84 -0
  41. napistu-0.2.4.dev2.dist-info/RECORD +95 -0
  42. {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/WHEEL +1 -1
  43. tests/conftest.py +11 -5
  44. tests/test_consensus.py +4 -1
  45. tests/test_gaps.py +127 -0
  46. tests/test_gcs.py +3 -2
  47. tests/test_igraph.py +14 -0
  48. tests/test_mcp_documentation_utils.py +13 -0
  49. tests/test_mechanism_matching.py +658 -0
  50. tests/test_net_propagation.py +89 -0
  51. tests/test_net_utils.py +83 -0
  52. tests/test_sbml.py +2 -0
  53. tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
  54. tests/test_utils.py +81 -0
  55. napistu-0.1.0.dist-info/METADATA +0 -56
  56. napistu-0.1.0.dist-info/RECORD +0 -77
  57. {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/entry_points.txt +0 -0
  58. {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
  59. {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/top_level.txt +0 -0
@@ -2,8 +2,13 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from types import SimpleNamespace
6
+
5
7
  import pandas as pd
6
8
 
9
+ from napistu.constants import IDENTIFIERS
10
+ from napistu.constants import ONTOLOGIES
11
+
7
12
  VALID_ANNOTATION_TYPES = [
8
13
  "foci",
9
14
  "reactions",
@@ -84,3 +89,39 @@ COFACTOR_CHEBI_IDS = pd.DataFrame(
84
89
  ],
85
90
  columns=["cofactor", "chebi"],
86
91
  )
92
+
93
+ NEO4J_MEMBERS_RAW = SimpleNamespace(
94
+ SET_NAME="set_name",
95
+ SET_ID="set_id",
96
+ MEMBER_NAME="member_name",
97
+ MEMBER_ID="member_id",
98
+ IDENTIFIER=IDENTIFIERS.IDENTIFIER,
99
+ ONTOLOGY=IDENTIFIERS.ONTOLOGY,
100
+ )
101
+
102
+ NEO4_MEMBERS_SET = {
103
+ NEO4J_MEMBERS_RAW.SET_NAME,
104
+ NEO4J_MEMBERS_RAW.SET_ID,
105
+ NEO4J_MEMBERS_RAW.MEMBER_NAME,
106
+ NEO4J_MEMBERS_RAW.MEMBER_ID,
107
+ NEO4J_MEMBERS_RAW.IDENTIFIER,
108
+ NEO4J_MEMBERS_RAW.ONTOLOGY,
109
+ }
110
+
111
+ REACTOME_CROSSREF_RAW = SimpleNamespace(
112
+ MEMBER_NAME="member_name",
113
+ REACTOME_ID="reactome_id",
114
+ UNIPROT=ONTOLOGIES.UNIPROT,
115
+ IDENTIFIER=IDENTIFIERS.IDENTIFIER,
116
+ ONTOLOGY=IDENTIFIERS.ONTOLOGY,
117
+ URL=IDENTIFIERS.URL,
118
+ )
119
+
120
+ REACTOME_CROSSREF_SET = {
121
+ REACTOME_CROSSREF_RAW.MEMBER_NAME,
122
+ REACTOME_CROSSREF_RAW.REACTOME_ID,
123
+ REACTOME_CROSSREF_RAW.UNIPROT,
124
+ REACTOME_CROSSREF_RAW.IDENTIFIER,
125
+ REACTOME_CROSSREF_RAW.ONTOLOGY,
126
+ REACTOME_CROSSREF_RAW.URL,
127
+ }
@@ -244,7 +244,10 @@ def _find_invalid_entities(
244
244
 
245
245
  # pull out the annotations that start with the table being evaluated
246
246
  remove_df = invalid_entities[invalid_entities["table"] == tab]
247
- assert isinstance(remove_df, pd.DataFrame)
247
+ if not isinstance(remove_df, pd.DataFrame):
248
+ raise TypeError(
249
+ f"remove_df must be a pandas DataFrame, but got {type(remove_df).__name__}"
250
+ )
248
251
 
249
252
  invalid_remove_vars = (
250
253
  remove_df["variable"][~remove_df["variable"].isin(tab_vars)]
napistu/modify/gaps.py CHANGED
@@ -29,24 +29,25 @@ def add_transportation_reactions(
29
29
  exchange_compartment: str = COMPARTMENTS["CYTOSOL"],
30
30
  ) -> sbml_dfs_core.SBML_dfs:
31
31
  """
32
- Add Transportation Reactions
32
+ Add transportation reactions to connect all forms of a protein across compartments.
33
33
 
34
34
  Identifies proteins whose various compartmentalized forms cannot reach one
35
35
  another via existing transportation reactions and then adds transportation
36
36
  reactions which connect all forms of a protein.
37
37
 
38
- sbml_dfs: sbml_dfs_core.SBML_dfs
38
+ Parameters
39
+ ----------
40
+ sbml_dfs : sbml_dfs_core.SBML_dfs
39
41
  A mechanistic model containing a set of molecular species which exist
40
- in multiple compartments and are interconverted by reactions
41
- exchange_compartment: str
42
- The name of an exchange compartment matching a c_name from sbml_dfs.compartments
42
+ in multiple compartments and are interconverted by reactions.
43
+ exchange_compartment : str, optional
44
+ The name of an exchange compartment matching a c_name from sbml_dfs.compartments.
43
45
 
44
- Returns:
45
-
46
- sbml_df_with_exchange: sbml_dfs_core.SBML_dfs
46
+ Returns
47
+ -------
48
+ sbml_dfs_core.SBML_dfs
47
49
  The input sbml_dfs with additional transport reactions and compartmentalized species
48
50
  (in the exchange compartment) added.
49
-
50
51
  """
51
52
 
52
53
  # validate arguments
@@ -75,28 +76,32 @@ def update_sbml_df_with_exchange(
75
76
  exchange_compartment: str = COMPARTMENTS["CYTOSOL"],
76
77
  ) -> sbml_dfs_core.SBML_dfs:
77
78
  """
78
-
79
- Update SBML_dfs With Exchange
80
-
81
79
  Add transportation reactions between all locations of a set of molecular species by
82
80
  including bidirectional exchange reactions through an exchange compartment.
83
81
 
84
- Parameters:
85
-
86
- species_needing_transport_rxns: np.ndarray
82
+ This function is modular and delegates to helper functions for each logical step:
83
+ - Finding new exchange compartmentalized species
84
+ - Adding new compartmentalized species
85
+ - Building the transport reaction edgelist
86
+ - Creating new reactions
87
+ - Creating new reaction species
88
+ - Updating and validating the sbml_dfs
89
+
90
+ Parameters
91
+ ----------
92
+ species_needing_transport_rxns : np.ndarray
87
93
  Vector of molecular species (s_ids) with no or insufficient transportation reactions
88
- sbml_dfs: sbml_dfs_core.SBML_dfs
94
+ sbml_dfs : sbml_dfs_core.SBML_dfs
89
95
  A mechanistic model containing a set of molecular species which exist
90
96
  in multiple compartments and are interconverted by reactions
91
- exchange_compartment: str
97
+ exchange_compartment : str, optional
92
98
  The name of an exchange compartment matching a c_name from sbml_dfs.compartments
93
99
 
94
- Returns:
95
-
96
- update_sbml_df_with_exchange: sbml_dfs_core.SBML_dfs
100
+ Returns
101
+ -------
102
+ sbml_dfs_core.SBML_dfs
97
103
  The input sbml_dfs with additional transport reactions and compartmentalized species
98
104
  (in the exchange compartment) added.
99
-
100
105
  """
101
106
 
102
107
  exchange_compartment_id = sbml_dfs.compartments[
@@ -108,8 +113,6 @@ def update_sbml_df_with_exchange(
108
113
  f"{len(exchange_compartment_id)} compartments - this is unexpected behavior"
109
114
  )
110
115
  exchange_compartment_id = exchange_compartment_id[0]
111
-
112
- # create a source object with provenance information for the entities that we'll add to the sbml_dfs
113
116
  gap_filling_source_obj = source.Source(
114
117
  pd.Series(
115
118
  {
@@ -121,26 +124,107 @@ def update_sbml_df_with_exchange(
121
124
  .to_frame()
122
125
  .T
123
126
  )
124
-
125
- # initialize an empty identifiers object for gap filled reactions
126
127
  gap_filling_id_obj = identifiers.Identifiers([])
128
+ new_exchange_cspecies = _find_new_exchange_cspecies(
129
+ species_needing_transport_rxns, sbml_dfs, exchange_compartment_id
130
+ )
131
+ logger.info(
132
+ f"{len(new_exchange_cspecies)} new compartmentalized species must "
133
+ f"be added to the {exchange_compartment} to add protein transportation gap filling"
134
+ )
135
+ new_exchange_cspecies_df = _add_new_exchange_cspecies(
136
+ new_exchange_cspecies,
137
+ sbml_dfs,
138
+ exchange_compartment_id,
139
+ exchange_compartment,
140
+ gap_filling_source_obj,
141
+ )
142
+ updated_sbml_dfs = copy.deepcopy(sbml_dfs)
143
+ updated_sbml_dfs.compartmentalized_species = pd.concat(
144
+ [updated_sbml_dfs.compartmentalized_species, new_exchange_cspecies_df]
145
+ )
146
+ transport_rxn_edgelist = _build_transport_rxn_edgelist(
147
+ updated_sbml_dfs, species_needing_transport_rxns, exchange_compartment_id
148
+ )
149
+ new_reactions = _create_new_reactions(
150
+ transport_rxn_edgelist, sbml_dfs, gap_filling_id_obj, gap_filling_source_obj
151
+ )
152
+ logger.info(
153
+ f"{len(new_reactions)} new reactions must "
154
+ f"be added to the {exchange_compartment} to add molecular species transportation reactions"
155
+ )
156
+ updated_sbml_dfs.reactions = pd.concat([updated_sbml_dfs.reactions, new_reactions])
157
+ new_reaction_species = _create_new_reaction_species(
158
+ transport_rxn_edgelist, sbml_dfs
159
+ )
160
+ updated_sbml_dfs.reaction_species = pd.concat(
161
+ [updated_sbml_dfs.reaction_species, new_reaction_species]
162
+ )
163
+ updated_sbml_dfs = sbml_dfs_utils.check_entity_data_index_matching(
164
+ updated_sbml_dfs, SBML_DFS.REACTIONS
165
+ )
166
+ updated_sbml_dfs.validate()
167
+ return updated_sbml_dfs
168
+
127
169
 
128
- # find species which need exchange reactions but which are not currently present in the exchange compartment
170
+ def _find_new_exchange_cspecies(
171
+ species_needing_transport_rxns: np.ndarray,
172
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
173
+ exchange_compartment_id: str,
174
+ ) -> set:
175
+ """
176
+ Find species which need exchange reactions but are not currently present in the exchange compartment.
177
+
178
+ Parameters
179
+ ----------
180
+ species_needing_transport_rxns : np.ndarray
181
+ Vector of molecular species (s_ids) with no or insufficient transportation reactions
182
+ sbml_dfs : sbml_dfs_core.SBML_dfs
183
+ The SBML_dfs object
184
+ exchange_compartment_id : str
185
+ The compartment ID for the exchange compartment
186
+
187
+ Returns
188
+ -------
189
+ set
190
+ Set of s_ids needing new compartmentalized species in the exchange compartment.
191
+ """
129
192
  existing_exchange_cspecies = sbml_dfs.compartmentalized_species[
130
193
  sbml_dfs.compartmentalized_species[SBML_DFS.C_ID] == exchange_compartment_id
131
194
  ]
132
- new_exchange_cspecies = set(species_needing_transport_rxns).difference(
195
+ return set(species_needing_transport_rxns).difference(
133
196
  set(existing_exchange_cspecies[SBML_DFS.S_ID].tolist())
134
197
  )
135
198
 
136
- logger.info(
137
- f"{len(new_exchange_cspecies)} new compartmentalized species must "
138
- f"be added to the {exchange_compartment} to add protein transportation gap filling"
139
- )
140
199
 
141
- # since compartmentalized species are defined by their sid and cid
142
- # add the defining foreign keys for all new exchange species
143
- # then we'll add the primary key by autoincrementing existing keys
200
+ def _add_new_exchange_cspecies(
201
+ new_exchange_cspecies: set,
202
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
203
+ exchange_compartment_id: str,
204
+ exchange_compartment: str,
205
+ gap_filling_source_obj: source.Source,
206
+ ) -> pd.DataFrame:
207
+ """
208
+ Add new compartmentalized species to the exchange compartment.
209
+
210
+ Parameters
211
+ ----------
212
+ new_exchange_cspecies : set
213
+ Set of s_ids needing new compartmentalized species in the exchange compartment.
214
+ sbml_dfs : sbml_dfs_core.SBML_dfs
215
+ The SBML_dfs object
216
+ exchange_compartment_id : str
217
+ The compartment ID for the exchange compartment
218
+ exchange_compartment : str
219
+ The name of the exchange compartment
220
+ gap_filling_source_obj : source.Source
221
+ Source object for gap-filling
222
+
223
+ Returns
224
+ -------
225
+ pd.DataFrame
226
+ DataFrame of new compartmentalized species to add.
227
+ """
144
228
  new_exchange_cspecies_fks = (
145
229
  pd.DataFrame({SBML_DFS.S_ID: list(new_exchange_cspecies)})
146
230
  .assign(c_id=exchange_compartment_id)
@@ -157,32 +241,42 @@ def update_sbml_df_with_exchange(
157
241
  ]
158
242
  new_exchange_cspecies_fks = new_exchange_cspecies_fks.drop(SBML_DFS.S_NAME, axis=1)
159
243
  new_exchange_cspecies_fks[SBML_DFS.SC_SOURCE] = gap_filling_source_obj
160
-
161
- # update index by incrementing existing keys
162
244
  existing_sc_ids = sbml_dfs_utils.id_formatter_inv(
163
245
  sbml_dfs.compartmentalized_species.index.tolist()
164
246
  )
165
- # filter np.nan which will be introduced if the key is not the default format
166
247
  existing_sc_ids = [x for x in existing_sc_ids if x is not np.nan]
167
- current_max_sc_id = max(existing_sc_ids)
168
-
248
+ current_max_sc_id = max(existing_sc_ids) if existing_sc_ids else 0
169
249
  new_int_ids = [
170
250
  1 + current_max_sc_id + x for x in new_exchange_cspecies_fks.index.tolist()
171
251
  ]
172
252
  new_exchange_cspecies_fks[SBML_DFS.SC_ID] = sbml_dfs_utils.id_formatter(
173
253
  new_int_ids, id_type=SBML_DFS.SC_ID
174
254
  )
175
- new_exchange_cspecies_df = new_exchange_cspecies_fks.set_index(SBML_DFS.SC_ID)
255
+ return new_exchange_cspecies_fks.set_index(SBML_DFS.SC_ID)
176
256
 
177
- # add new compartmentalized species to sbml_dfs model
178
- updated_sbml_dfs = copy.deepcopy(sbml_dfs)
179
- updated_sbml_dfs.compartmentalized_species = pd.concat(
180
- [updated_sbml_dfs.compartmentalized_species, new_exchange_cspecies_df]
181
- )
182
257
 
183
- # define all new transport reactions as an edgelist
258
+ def _build_transport_rxn_edgelist(
259
+ updated_sbml_dfs: sbml_dfs_core.SBML_dfs,
260
+ species_needing_transport_rxns: np.ndarray,
261
+ exchange_compartment_id: str,
262
+ ) -> pd.DataFrame:
263
+ """
264
+ Build the edgelist for new transport reactions, ensuring only one reversible reaction per compartment pair.
265
+
266
+ Parameters
267
+ ----------
268
+ updated_sbml_dfs : sbml_dfs_core.SBML_dfs
269
+ The updated SBML_dfs object
270
+ species_needing_transport_rxns : np.ndarray
271
+ Vector of molecular species (s_ids) with no or insufficient transportation reactions
272
+ exchange_compartment_id : str
273
+ The compartment ID for the exchange compartment
184
274
 
185
- # pull out all cspecies of species needing transport
275
+ Returns
276
+ -------
277
+ pd.DataFrame
278
+ Edgelist for new transport reactions.
279
+ """
186
280
  cspecies_needing_transport = (
187
281
  updated_sbml_dfs.compartmentalized_species[
188
282
  updated_sbml_dfs.compartmentalized_species[SBML_DFS.S_ID].isin(
@@ -192,57 +286,59 @@ def update_sbml_df_with_exchange(
192
286
  .reset_index()
193
287
  .drop(SBML_DFS.SC_SOURCE, axis=1)
194
288
  )
195
-
196
289
  exchange_cspecies = cspecies_needing_transport[
197
290
  cspecies_needing_transport[SBML_DFS.C_ID] == exchange_compartment_id
198
291
  ].drop(SBML_DFS.C_ID, axis=1)
199
292
  non_exchange_cspecies = cspecies_needing_transport[
200
293
  cspecies_needing_transport[SBML_DFS.C_ID] != exchange_compartment_id
201
294
  ].drop(SBML_DFS.C_ID, axis=1)
202
-
203
- transport_rxn_edgelist = pd.concat(
204
- [
205
- # exchange compartment -> non-exchange compartment
206
- exchange_cspecies.rename(
207
- {SBML_DFS.SC_ID: "sc_id_from", SBML_DFS.SC_NAME: "sc_name_from"}, axis=1
208
- ).merge(
209
- non_exchange_cspecies.rename(
210
- {SBML_DFS.SC_ID: "sc_id_to", SBML_DFS.SC_NAME: "sc_name_to"}, axis=1
211
- )
212
- ),
213
- # non-exchange compartment -> exchange compartment
214
- non_exchange_cspecies.rename(
215
- {SBML_DFS.SC_ID: "sc_id_from", SBML_DFS.SC_NAME: "sc_name_from"}, axis=1
216
- ).merge(
217
- exchange_cspecies.rename(
218
- {SBML_DFS.SC_ID: "sc_id_to", SBML_DFS.SC_NAME: "sc_name_to"}, axis=1
219
- )
220
- ),
221
- ]
295
+ transport_rxn_edgelist = exchange_cspecies.rename(
296
+ {SBML_DFS.SC_ID: "sc_id_from", SBML_DFS.SC_NAME: "sc_name_from"}, axis=1
297
+ ).merge(
298
+ non_exchange_cspecies.rename(
299
+ {SBML_DFS.SC_ID: "sc_id_to", SBML_DFS.SC_NAME: "sc_name_to"}, axis=1
300
+ )
222
301
  )
223
302
 
224
- # we should add two reactions for each non-exchange compartment cspecies
225
- # one transporting from the exchange compartment and one transporting into the
226
- # exchange compartment
227
- assert transport_rxn_edgelist.shape[0] == 2 * non_exchange_cspecies.shape[0]
228
-
229
- # the rows in this edgelist correspond to new reactions that we'll add
230
- # to the model
231
303
  transport_rxn_edgelist[SBML_DFS.R_NAME] = [
232
- f"{x} -> {y} gap-filling transport"
304
+ f"{x} <-> {y} gap-filling transport"
233
305
  for x, y in zip(
234
306
  transport_rxn_edgelist["sc_name_from"], transport_rxn_edgelist["sc_name_to"]
235
307
  )
236
308
  ]
237
- transport_rxn_edgelist = transport_rxn_edgelist.reset_index(drop=True)
309
+ transport_rxn_edgelist[SBML_DFS.R_ISREVERSIBLE] = True
310
+ return transport_rxn_edgelist.reset_index(drop=True)
238
311
 
239
- # create new reactions, update index by incrementing existing keys
312
+
313
+ def _create_new_reactions(
314
+ transport_rxn_edgelist: pd.DataFrame,
315
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
316
+ gap_filling_id_obj: identifiers.Identifiers,
317
+ gap_filling_source_obj: source.Source,
318
+ ) -> pd.DataFrame:
319
+ """
320
+ Create new reactions DataFrame for gap-filling transport reactions.
321
+
322
+ Parameters
323
+ ----------
324
+ transport_rxn_edgelist : pd.DataFrame
325
+ Edgelist for new transport reactions.
326
+ sbml_dfs : sbml_dfs_core.SBML_dfs
327
+ The SBML_dfs object
328
+ gap_filling_id_obj : identifiers.Identifiers
329
+ Identifiers object for gap-filling
330
+ gap_filling_source_obj : source.Source
331
+ Source object for gap-filling
332
+
333
+ Returns
334
+ -------
335
+ pd.DataFrame
336
+ DataFrame of new reactions to add.
337
+ """
240
338
 
241
339
  existing_r_ids = sbml_dfs_utils.id_formatter_inv(sbml_dfs.reactions.index.tolist())
242
- # filter np.nan which will be introduced if the key is not the default format
243
340
  existing_r_ids = [x for x in existing_r_ids if x is not np.nan]
244
- current_max_r_id = max(existing_r_ids)
245
-
341
+ current_max_r_id = max(existing_r_ids) if existing_r_ids else 0
246
342
  new_int_ids = [
247
343
  1 + current_max_r_id + x for x in transport_rxn_edgelist.index.tolist()
248
344
  ]
@@ -250,22 +346,36 @@ def update_sbml_df_with_exchange(
250
346
  new_int_ids, id_type=SBML_DFS.R_ID
251
347
  )
252
348
  new_reactions = (
253
- transport_rxn_edgelist[[SBML_DFS.R_ID, SBML_DFS.R_NAME]]
349
+ transport_rxn_edgelist[
350
+ [SBML_DFS.R_ID, SBML_DFS.R_NAME, SBML_DFS.R_ISREVERSIBLE]
351
+ ]
254
352
  .set_index(SBML_DFS.R_ID)
255
353
  .assign(r_Identifiers=gap_filling_id_obj)
256
354
  .assign(r_Source=gap_filling_source_obj)
257
355
  )
356
+ return new_reactions
258
357
 
259
- logger.info(
260
- f"{len(new_reactions)} new reactions must "
261
- f"be added to the {exchange_compartment} to add molecular species transportation reactions"
262
- )
263
358
 
264
- # add new reactions
265
- updated_sbml_dfs.reactions = pd.concat([updated_sbml_dfs.reactions, new_reactions])
359
+ def _create_new_reaction_species(
360
+ transport_rxn_edgelist: pd.DataFrame,
361
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
362
+ ) -> pd.DataFrame:
363
+ """
364
+ Create new reaction species DataFrame for gap-filling transport reactions.
365
+
366
+ Parameters
367
+ ----------
368
+ transport_rxn_edgelist : pd.DataFrame
369
+ Edgelist for new transport reactions.
370
+ sbml_dfs : sbml_dfs_core.SBML_dfs
371
+ The SBML_dfs object
372
+
373
+ Returns
374
+ -------
375
+ pd.DataFrame
376
+ DataFrame of new reaction species to add.
377
+ """
266
378
 
267
- # create new reaction species
268
- # each reaction adds two reaction species - the from and to compartmentalized species
269
379
  new_reaction_species = pd.concat(
270
380
  [
271
381
  transport_rxn_edgelist[["sc_id_from", SBML_DFS.R_ID]]
@@ -284,53 +394,36 @@ def update_sbml_df_with_exchange(
284
394
  existing_rsc_ids = sbml_dfs_utils.id_formatter_inv(
285
395
  sbml_dfs.reaction_species.index.tolist()
286
396
  )
397
+
287
398
  # filter np.nan which will be introduced if the key is not the default format
288
399
  existing_rsc_ids = [x for x in existing_rsc_ids if x is not np.nan]
289
- current_max_rsc_id = max(existing_rsc_ids)
290
-
400
+ current_max_rsc_id = max(existing_rsc_ids) if existing_rsc_ids else 0
291
401
  new_int_ids = [
292
402
  1 + current_max_rsc_id + x for x in new_reaction_species.index.tolist()
293
403
  ]
294
404
  new_reaction_species[SBML_DFS.RSC_ID] = sbml_dfs_utils.id_formatter(
295
405
  new_int_ids, id_type=SBML_DFS.RSC_ID
296
406
  )
297
- new_reaction_species = new_reaction_species.set_index(SBML_DFS.RSC_ID)
298
-
299
- updated_sbml_dfs.reaction_species = pd.concat(
300
- [updated_sbml_dfs.reaction_species, new_reaction_species]
301
- )
302
-
303
- updated_sbml_dfs = sbml_dfs_utils.check_entity_data_index_matching(
304
- updated_sbml_dfs, SBML_DFS.REACTIONS
305
- )
306
-
307
- updated_sbml_dfs.validate()
308
-
309
- return updated_sbml_dfs
407
+ return new_reaction_species.set_index(SBML_DFS.RSC_ID)
310
408
 
311
409
 
312
410
  def _identify_species_needing_transport_reactions(
313
411
  sbml_dfs: sbml_dfs_core.SBML_dfs,
314
412
  ) -> np.ndarray:
315
413
  """
316
- Identify Molecular Species Needing Transport Reactions
317
-
318
- Determine whether each molecular species has sufficient transport reactions
319
- so all of the compartments where it exists are connected.
414
+ Identify molecular species needing transport reactions so all of the compartments where it exists are connected.
320
415
 
321
- Parameters:
322
-
323
- sbml_dfs: sbml_dfs_core.SBML_dfs
416
+ Parameters
417
+ ----------
418
+ sbml_dfs : sbml_dfs_core.SBML_dfs
324
419
  A mechanistic model containing a set of molecular species which exist
325
420
  in multiple compartments and are interconverted by reactions
326
421
 
327
- Returns:
328
-
329
- species_needing_transport_rxns: np.ndarray
422
+ Returns
423
+ -------
424
+ np.ndarray
330
425
  Vector of molecular species (s_ids) with no or insufficient transportation reactions
331
-
332
426
  """
333
-
334
427
  # ensure that all genic reaction species can be produced and transported to each
335
428
  # compartment where they should exist.
336
429
  # we should be able to follow a directed path from a synthesized protein
@@ -420,7 +513,7 @@ def _identify_species_needing_transport_reactions(
420
513
  species_transport_status_df = pd.DataFrame(species_transport_status_dict_list)
421
514
 
422
515
  # optional logging
423
- # logger.info(_log_protein_transport_gapfilling(species_transport_status_df))
516
+ logger.debug(_log_protein_transport_gapfilling(species_transport_status_df))
424
517
 
425
518
  # define proteins which whose compartmentalized forms are not connected
426
519
  proteins_needing_transport_rxns = species_transport_status_df[
@@ -443,31 +536,25 @@ def _identify_species_needing_transport_reactions(
443
536
 
444
537
 
445
538
  def _eval_existing_inter_cspecies_paths(
446
- comp_specs: pd.DataFrame, existing_cspecies_paths: pd.DataFrame
539
+ comp_specs: pd.DataFrame,
540
+ existing_cspecies_paths: pd.DataFrame,
447
541
  ) -> dict:
448
542
  """
449
- Evaluate Existing Inter Compartmentalized Species Paths
450
-
451
- Determine whether paths between compartments found in
452
- _find_existing_inter_cspecies_paths()
453
- cover all of the compartments where the protein exists.
454
-
455
- Parameters:
543
+ Evaluate whether paths between compartments found in _find_existing_inter_cspecies_paths cover all of the compartments where the protein exists.
456
544
 
457
- comp_specs: pd.DataFrame
545
+ Parameters
546
+ ----------
547
+ comp_specs : pd.DataFrame
458
548
  Compartmentalized species for a single s_id
459
- existing_cspecies_paths: pd.DataFrame
460
- An edgelist of a from and to compartmentalized species
461
- and a label of the path connecting them.
549
+ existing_cspecies_paths : pd.DataFrame
550
+ An edgelist of a from and to compartmentalized species and a label of the path connecting them.
462
551
 
463
- Returns:
464
-
465
- species_tranpsort_status: dict
552
+ Returns
553
+ -------
554
+ dict
466
555
  type: the status category the species falls in
467
- ?msg: an optional message describing the type
468
-
556
+ msg: an optional message describing the type
469
557
  """
470
-
471
558
  # If the largest connected component includes all compartmentalized species
472
559
  # then we can assume that the transportation reactions which exist are adequate. Note that
473
560
  # because the subgraph is directed its topology may still be kind of funky.
@@ -515,31 +602,23 @@ def _find_existing_inter_cspecies_paths(
515
602
  partial_protein_cspecies: pd.DataFrame,
516
603
  ) -> pd.DataFrame | None:
517
604
  """
518
- Find Existing Inter Compartmentalized Species Paths
605
+ Find which compartments a protein exists in can be reached from one another by traversing a directed graph of reactions and molecular species including the protein.
519
606
 
520
- Determine which compartments a protein exists in can be reached from one another by
521
- traversing a directed graph of reactions and molecular species including the protein
522
- (i.e., paths can involve complexes of the protein of interest).
523
-
524
- Parameters:
525
-
526
- comp_specs: pd.DataFrame
607
+ Parameters
608
+ ----------
609
+ comp_specs : pd.DataFrame
527
610
  Compartmentalized species for a single s_id
528
- uniprot_id: str
611
+ uniprot_id : str
529
612
  The Uniprot ID for the protein of interest
530
- directed_graph: ig.Graph
613
+ directed_graph : ig.Graph
531
614
  An igraph version of the sbml_dfs model
532
- partial_protein_cspecies: pd.DataFrame
533
- A table of proteins included in each species ID (this includes BQB_HAS_PART
534
- qualifiers in addition to the BQB_IS qualifiers which generally define
535
- distinct species
536
-
537
- Returns:
538
-
539
- existing_cspecies_paths: pd.DataFrame or None
540
- An edgelist of a from and to compartmentalized species and a label of the path
541
- connecting them.
615
+ partial_protein_cspecies : pd.DataFrame
616
+ A table of proteins included in each species ID (this includes BQB_HAS_PART qualifiers in addition to the BQB_IS qualifiers which generally define distinct species
542
617
 
618
+ Returns
619
+ -------
620
+ pd.DataFrame or None
621
+ An edgelist of a from and to compartmentalized species and a label of the path connecting them.
543
622
  """
544
623
 
545
624
  reaction_vertices = np.where(
@@ -593,6 +672,14 @@ def _find_existing_inter_cspecies_paths(
593
672
  def _log_protein_transport_gapfilling(
594
673
  species_transport_status_df: pd.DataFrame,
595
674
  ) -> None:
675
+ """
676
+ Log summary statistics and example messages for protein transport gapfilling.
677
+
678
+ Parameters
679
+ ----------
680
+ species_transport_status_df : pd.DataFrame
681
+ DataFrame summarizing transport status for each species
682
+ """
596
683
  print(
597
684
  utils.style_df(
598
685
  species_transport_status_df.value_counts("type").to_frame().reset_index(),