napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. napistu/__init__.py +12 -0
  2. napistu/__main__.py +867 -0
  3. napistu/consensus.py +1557 -0
  4. napistu/constants.py +500 -0
  5. napistu/gcs/__init__.py +10 -0
  6. napistu/gcs/constants.py +69 -0
  7. napistu/gcs/downloads.py +180 -0
  8. napistu/identifiers.py +805 -0
  9. napistu/indices.py +227 -0
  10. napistu/ingestion/__init__.py +10 -0
  11. napistu/ingestion/bigg.py +146 -0
  12. napistu/ingestion/constants.py +296 -0
  13. napistu/ingestion/cpr_edgelist.py +106 -0
  14. napistu/ingestion/identifiers_etl.py +148 -0
  15. napistu/ingestion/obo.py +268 -0
  16. napistu/ingestion/psi_mi.py +276 -0
  17. napistu/ingestion/reactome.py +218 -0
  18. napistu/ingestion/sbml.py +621 -0
  19. napistu/ingestion/string.py +356 -0
  20. napistu/ingestion/trrust.py +285 -0
  21. napistu/ingestion/yeast.py +147 -0
  22. napistu/mechanism_matching.py +597 -0
  23. napistu/modify/__init__.py +10 -0
  24. napistu/modify/constants.py +86 -0
  25. napistu/modify/curation.py +628 -0
  26. napistu/modify/gaps.py +635 -0
  27. napistu/modify/pathwayannot.py +1381 -0
  28. napistu/modify/uncompartmentalize.py +264 -0
  29. napistu/network/__init__.py +10 -0
  30. napistu/network/constants.py +117 -0
  31. napistu/network/neighborhoods.py +1594 -0
  32. napistu/network/net_create.py +1647 -0
  33. napistu/network/net_utils.py +652 -0
  34. napistu/network/paths.py +500 -0
  35. napistu/network/precompute.py +221 -0
  36. napistu/rpy2/__init__.py +127 -0
  37. napistu/rpy2/callr.py +168 -0
  38. napistu/rpy2/constants.py +101 -0
  39. napistu/rpy2/netcontextr.py +464 -0
  40. napistu/rpy2/rids.py +697 -0
  41. napistu/sbml_dfs_core.py +2216 -0
  42. napistu/sbml_dfs_utils.py +304 -0
  43. napistu/source.py +394 -0
  44. napistu/utils.py +943 -0
  45. napistu-0.1.0.dist-info/METADATA +56 -0
  46. napistu-0.1.0.dist-info/RECORD +77 -0
  47. napistu-0.1.0.dist-info/WHEEL +5 -0
  48. napistu-0.1.0.dist-info/entry_points.txt +2 -0
  49. napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
  50. napistu-0.1.0.dist-info/top_level.txt +2 -0
  51. tests/__init__.py +0 -0
  52. tests/conftest.py +83 -0
  53. tests/test_consensus.py +255 -0
  54. tests/test_constants.py +20 -0
  55. tests/test_curation.py +134 -0
  56. tests/test_data/__init__.py +0 -0
  57. tests/test_edgelist.py +20 -0
  58. tests/test_gcs.py +23 -0
  59. tests/test_identifiers.py +151 -0
  60. tests/test_igraph.py +353 -0
  61. tests/test_indices.py +88 -0
  62. tests/test_mechanism_matching.py +126 -0
  63. tests/test_net_utils.py +66 -0
  64. tests/test_netcontextr.py +105 -0
  65. tests/test_obo.py +34 -0
  66. tests/test_pathwayannot.py +95 -0
  67. tests/test_precomputed_distances.py +222 -0
  68. tests/test_rpy2.py +61 -0
  69. tests/test_sbml.py +46 -0
  70. tests/test_sbml_dfs_create.py +307 -0
  71. tests/test_sbml_dfs_utils.py +22 -0
  72. tests/test_sbo.py +11 -0
  73. tests/test_set_coverage.py +50 -0
  74. tests/test_source.py +67 -0
  75. tests/test_uncompartmentalize.py +40 -0
  76. tests/test_utils.py +487 -0
  77. tests/utils.py +30 -0
@@ -0,0 +1,628 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Any
5
+
6
+ from fs import open_fs
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ from napistu import identifiers
11
+ from napistu import sbml_dfs_core
12
+ from napistu import sbml_dfs_utils
13
+ from napistu import source
14
+
15
+ from napistu.constants import BQB
16
+ from napistu.constants import SBML_DFS
17
+ from napistu.constants import SBML_DFS_SCHEMA
18
+ from napistu.constants import IDENTIFIERS
19
+ from napistu.constants import MINI_SBO_FROM_NAME
20
+ from napistu.constants import SBOTERM_NAMES
21
+
22
+ from napistu.modify.constants import VALID_ANNOTATION_TYPES
23
+
24
+
25
+ def curate_sbml_dfs(
26
+ curation_dir: str, sbml_dfs: sbml_dfs_core.SBML_dfs, verbose: bool = True
27
+ ) -> sbml_dfs_core.SBML_dfs:
28
+ """
29
+ Curate SBML_dfs
30
+
31
+ Update a pathway model using manual annotations.
32
+
33
+ The current workflow is to:
34
+ - annotate pathways in https://docs.google.com/spreadsheets/d/1waVXSVMOthL5QAT0PITgLMDdXGHIS50LZ2P1_F_c-6s/edit#gid=101210748
35
+ - parse annotations into flat files using parse_manual_annotation.Rmd
36
+ - call this function to format flat files and update a current SBML_dfs pathway model
37
+
38
+ Params
39
+ ------
40
+ curation_dir: str
41
+ Directory containing annotations generated using parse_manual_annotation.Rmd
42
+ sbml_dfs: sbml_dfs_core.SBML_dfs
43
+ A pathway model
44
+ verbose: bool
45
+ Extra reporting
46
+
47
+ Returns
48
+ -------
49
+ sbml_df: sbml_dfs_core.SBML_dfs
50
+ A curated pathway model
51
+
52
+ """
53
+
54
+ try:
55
+ open_fs(curation_dir)
56
+ except Exception as e:
57
+ raise FileNotFoundError(f"{curation_dir} does not exist") from e
58
+
59
+ if not isinstance(sbml_dfs, sbml_dfs_core.SBML_dfs):
60
+ raise TypeError(
61
+ f"sbml_dfs was a {type(sbml_dfs)} and must be an sbml_dfs_core.SBML_dfs"
62
+ )
63
+ if not isinstance(verbose, bool):
64
+ raise TypeError(f"verbose was a {type(verbose)} and must be a bool")
65
+
66
+ curation_dict = read_pathway_curations(curation_dir)
67
+
68
+ # remove existing entities
69
+ if "remove" in curation_dict.keys():
70
+ invalid_entities_dict = _find_invalid_entities(
71
+ sbml_dfs, curation_dict["remove"]
72
+ )
73
+ if verbose:
74
+ print(
75
+ "removing "
76
+ + ", ".join(
77
+ [
78
+ str(len(y)) + " " + x + "s"
79
+ for x, y in invalid_entities_dict.items()
80
+ ]
81
+ )
82
+ )
83
+ sbml_dfs = _remove_entities(sbml_dfs, invalid_entities_dict)
84
+
85
+ # add new entities
86
+ new_entities = format_curations(curation_dict, sbml_dfs)
87
+ if verbose:
88
+ print(
89
+ "adding "
90
+ + ", ".join([str(y.shape[0]) + " " + x for x, y in new_entities.items()])
91
+ )
92
+ for entity_type in new_entities.keys():
93
+ entity_df = getattr(sbml_dfs, entity_type)
94
+ updated_entity_df = pd.concat([entity_df, new_entities[entity_type]])
95
+ setattr(sbml_dfs, entity_type, updated_entity_df)
96
+ sbml_dfs.validate()
97
+
98
+ return sbml_dfs
99
+
100
+
101
+ def read_pathway_curations(curation_dir: str) -> dict[str, pd.DataFrame]:
102
+ """
103
+ Read Pathway Curations
104
+
105
+ Load curations that were prepared by parse_manual_annotations.Rmd
106
+
107
+ Params
108
+ ------
109
+ curation_dir: str
110
+ Directory containing annotations generated using parse_manual_annotation.Rmd
111
+
112
+ Returns
113
+ -------
114
+ curations: dict
115
+ Dictionary containing different types of annoations
116
+ """
117
+ with open_fs(curation_dir) as curation_fs:
118
+ curation_files = curation_fs.listdir(".")
119
+
120
+ annotations_types = set(curation_files).intersection(
121
+ {x + ".tsv" for x in VALID_ANNOTATION_TYPES}
122
+ )
123
+
124
+ curation_dict = {}
125
+ for annotation_file in annotations_types:
126
+ with curation_fs.open(annotation_file) as f:
127
+ key = os.path.splitext(annotation_file)[0]
128
+ curation_dict[key] = pd.read_csv(f, sep="\t")
129
+
130
+ return curation_dict
131
+
132
+
133
+ def format_curations(
134
+ curation_dict: dict[str, pd.DataFrame], sbml_dfs: sbml_dfs_core.SBML_dfs
135
+ ) -> dict[str, pd.DataFrame]:
136
+ """
137
+ Format Curations
138
+
139
+ Format manual curations into a set of table that can be appended to an sbml_dfs's tables
140
+
141
+ Params
142
+ ------
143
+ curation_dict:
144
+ Curations imported using read_pathway_curations
145
+ sbml_dfs:
146
+ A pathway model
147
+
148
+ Returns
149
+ -------
150
+ new_entities: dict
151
+ Curations formatted as sbml_dfs_core.SBML_dfs tables
152
+
153
+ """
154
+
155
+ new_entity_types = set(curation_dict.keys()).difference({"foci", "remove"})
156
+
157
+ if SBML_DFS.COMPARTMENTS in new_entity_types:
158
+ raise NotImplementedError("logic for adding compartments does not exist")
159
+
160
+ new_entities = dict() # type: dict[str, pd.DataFrame]
161
+
162
+ # reorganize reaction species' annotations as a dict to allow for
163
+ # annotations added expicitly in the curations sheet
164
+ # and implicitly due to newly added reactions
165
+ reaction_species_dict = dict() # type: dict[str, pd.DataFrame | None]
166
+ reaction_species_dict["explicit"] = _format_explicit_reaction_species(curation_dict)
167
+ reaction_species_dict["implicit"] = None
168
+
169
+ # create reaction species based on reaction stoichiometry
170
+ if SBML_DFS.REACTIONS in new_entity_types:
171
+ reaction_species_dict["implicit"] = _format_implicit_reaction_species(
172
+ curation_dict
173
+ )
174
+ new_entity_types.add(SBML_DFS.REACTION_SPECIES)
175
+ curation_dict[SBML_DFS.REACTION_SPECIES] = pd.concat(reaction_species_dict.values()) # type: ignore
176
+
177
+ if SBML_DFS.REACTIONS in new_entity_types:
178
+ # add "r_isreversible" to curation_dict["reactions"]
179
+ curation_dict[SBML_DFS.REACTIONS][SBML_DFS.R_ISREVERSIBLE] = [
180
+ (
181
+ True
182
+ if curation_dict[SBML_DFS.REACTIONS][SBML_DFS.STOICHIOMETRY]
183
+ .iloc[0]
184
+ .split("<->")
185
+ == 2
186
+ else False
187
+ )
188
+ for i in range(
189
+ 0, curation_dict[SBML_DFS.REACTIONS][SBML_DFS.STOICHIOMETRY].shape[0]
190
+ )
191
+ ]
192
+
193
+ for entity_type in SBML_DFS_SCHEMA.SCHEMA.keys():
194
+ if entity_type not in new_entity_types:
195
+ continue
196
+
197
+ # add in the order of compartments, species > reactions > compartmentalized_species > reaction_species
198
+ new_entities[entity_type] = format_curated_entities(
199
+ entity_type, curation_dict[entity_type], new_entities, sbml_dfs # type: ignore
200
+ )
201
+
202
+ return new_entities
203
+
204
+
205
+ def _find_invalid_entities(
206
+ sbml_dfs: sbml_dfs_core.SBML_dfs, invalid_entities: pd.DataFrame
207
+ ) -> dict[str, set]:
208
+ """
209
+ Find Invalid Entities
210
+
211
+ Based on a set of entity names or attributes, find each entities
212
+ corresponding primary key
213
+
214
+ Params
215
+ ------
216
+ sbml_dfs: sbml_dfs_core.SBML_dfs
217
+ A pathway model
218
+ invalid_entities: pd.DataFrame
219
+ A table containing entities to be removed ("remove"),
220
+ the table where the entity resides ("table") and variable used
221
+ to find the entity ("variable")
222
+
223
+ Returns
224
+ -------
225
+ invalid_entities_dict: dict
226
+ A dictionary containing the primary keys of invalid entities
227
+
228
+ """
229
+
230
+ # find tables where removal will occur (or at least start)
231
+ unique_tables = invalid_entities["table"].unique().tolist()
232
+ invalid_tables = [x for x in unique_tables if x not in sbml_dfs.schema.keys()]
233
+
234
+ if len(invalid_tables) > 0:
235
+ raise ValueError(
236
+ f"{', '.join(invalid_tables)} are not valid table names; "
237
+ f"valid tables are {', '.join(sbml_dfs.schema.keys())}"
238
+ )
239
+
240
+ invalid_entities_dict = dict() # type: dict[str, set]
241
+ for tab in unique_tables:
242
+ tab_schema = sbml_dfs.schema[tab]
243
+ tab_vars = tab_schema["vars"] + [tab_schema["pk"]]
244
+
245
+ # pull out the annotations that start with the table being evaluated
246
+ remove_df = invalid_entities[invalid_entities["table"] == tab]
247
+ assert isinstance(remove_df, pd.DataFrame)
248
+
249
+ invalid_remove_vars = (
250
+ remove_df["variable"][~remove_df["variable"].isin(tab_vars)]
251
+ .unique()
252
+ .tolist()
253
+ )
254
+ if len(invalid_remove_vars) > 0:
255
+ raise ValueError(
256
+ f"{', '.join(invalid_remove_vars)} are not valid variables"
257
+ f" in the {tab} table; valid variables are {', '.join(tab_vars)}"
258
+ )
259
+
260
+ # find the pk corresponding to each removal annotation
261
+
262
+ tab_df = getattr(sbml_dfs, tab)
263
+
264
+ invalid_entities_dict[tab_schema["pk"]] = set()
265
+ for i in range(0, remove_df.shape[0]):
266
+ remove_series = remove_df.iloc[i]
267
+
268
+ if remove_series["variable"] == tab_schema["pk"]:
269
+ # check that pk exists and then add to invalid entities
270
+ if remove_series["remove"] not in tab_df.index:
271
+ raise ValueError(
272
+ f"{remove_series['remove']} was not found in the index of {tab}"
273
+ )
274
+ invalid_entities_dict[tab_schema["pk"]].add(remove_series["remove"])
275
+ else:
276
+ # lookup by
277
+ matching_entity = tab_df[
278
+ tab_df[remove_series["variable"]] == remove_series["remove"]
279
+ ]
280
+
281
+ if matching_entity.shape[0] == 0:
282
+ raise ValueError(
283
+ f"{remove_series['remove']} was not found in the {remove_series['variable']} column of {tab}"
284
+ )
285
+
286
+ [invalid_entities_dict[tab_schema["pk"]].add(x) for x in matching_entity.index.tolist()] # type: ignore
287
+
288
+ # iterate through primary key -> foreign key relationships
289
+ # to define additional entities which should be removed based on
290
+ # initial removal annotations
291
+ new_invalid_entities_dict = invalid_entities_dict.copy()
292
+
293
+ cont = True
294
+ while cont:
295
+ new_invalid_entities_dict = _expand_entities_by_fks(
296
+ sbml_dfs, new_invalid_entities_dict
297
+ )
298
+
299
+ if new_invalid_entities_dict != invalid_entities_dict:
300
+ invalid_entities_dict = new_invalid_entities_dict
301
+ new_invalid_entities_dict = invalid_entities_dict.copy()
302
+ else:
303
+ cont = False
304
+
305
+ return invalid_entities_dict
306
+
307
+
308
+ def _expand_entities_by_fks(sbml_dfs: sbml_dfs_core.SBML_dfs, pk_dict: dict) -> dict:
309
+ """
310
+ Expand Entities By Foreign Keys
311
+
312
+ Starting with a dictionary of foreign keys, add all primary keys that are defined by these foreign keys
313
+
314
+ Params
315
+ ------
316
+ sbml_dfs: sbml_dfs_core.SBML_dfs
317
+ A pathway model
318
+ pk_dict: dict
319
+ Dictionary where keys are types of primary keys in sbml_dfs
320
+
321
+ Returns
322
+ -------
323
+ pk_dict: dict
324
+ Input where additional primary keys may have been added
325
+
326
+ """
327
+
328
+ for tab in sbml_dfs.schema.keys():
329
+ tab_df = getattr(sbml_dfs, tab)
330
+ tab_schema = sbml_dfs.schema[tab]
331
+ pk = tab_schema["pk"]
332
+
333
+ if "fk" in tab_schema.keys():
334
+ # check for foreign keys which are defined by primary keys
335
+ # add these to the pk_dict
336
+ for fk in tab_schema["fk"]:
337
+ if fk in pk_dict.keys():
338
+ fks = tab_df[tab_df[fk].isin(pk_dict[fk])]
339
+ if pk not in pk_dict.keys():
340
+ pk_dict[pk] = set()
341
+ for x in fks.index.tolist():
342
+ pk_dict[pk].add(x)
343
+
344
+ return pk_dict
345
+
346
+
347
+ def _remove_entities(
348
+ sbml_dfs: sbml_dfs_core.SBML_dfs, pk_dict: dict
349
+ ) -> sbml_dfs_core.SBML_dfs:
350
+ """
351
+ Remove Entities
352
+
353
+ Remove entities whose primary keys are in pk_dict
354
+
355
+ Params
356
+ ------
357
+ sbml_dfs: sbml_dfs_core.SBML_dfs
358
+ A pathway model
359
+ pk_dict: dict
360
+ Dictionary where keys are types of primary keys in sbml_dfs
361
+
362
+ Returns
363
+ -------
364
+ sbml_dfs: sbml_dfs_core.SBML_dfs
365
+ Input with some entities removed
366
+
367
+ """
368
+
369
+ for tab in sbml_dfs.schema.keys():
370
+ tab_df = getattr(sbml_dfs, tab)
371
+ tab_schema = sbml_dfs.schema[tab]
372
+
373
+ if tab_schema["pk"] in pk_dict.keys():
374
+ updated_table = tab_df[~tab_df.index.isin(pk_dict[tab_schema["pk"]])]
375
+ setattr(sbml_dfs, tab, updated_table)
376
+
377
+ return sbml_dfs
378
+
379
+
380
+ def format_curated_entities(
381
+ entity_type: str,
382
+ new_curated_entities: dict[Any, pd.DataFrame],
383
+ new_entities: dict[str, pd.DataFrame],
384
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
385
+ curation_id: str = "Calico curations",
386
+ ) -> pd.DataFrame:
387
+ """
388
+ Format Curated Entities
389
+
390
+ Convert entities from the curation format to the stucture of SBML_dfs tables
391
+
392
+ Params
393
+ ------
394
+ entity_type: str
395
+ The type of entity to update (e.g., reactions, species, ...)
396
+ new_curated_entities: dict
397
+ Curation pd.DataFrames generated using read_pathway_curations
398
+ new_entities: dict
399
+ Curations formatted as sbml_dfs_core.SBML_dfs tables
400
+ sbml_dfs: sbml_dfs_core.SBML_dfs
401
+ A pathway model
402
+ curation_id: str
403
+ Name to use as a pathway id in source.Source objects
404
+
405
+ Returns
406
+ -------
407
+ new_entity_df: pd.DataFrame
408
+ Input for entity_type formatted as an SBML_dfs table
409
+
410
+ """
411
+
412
+ if not isinstance(entity_type, str):
413
+ raise TypeError(f"entity_type was a {type(entity_type)} and must be a str")
414
+ if not isinstance(new_curated_entities, pd.DataFrame):
415
+ raise TypeError(
416
+ f"new_curated_entities was a {type(new_curated_entities)} and must be a pd.DataFrame"
417
+ )
418
+ if not isinstance(new_entities, dict):
419
+ raise TypeError(f"new_entities was a {type(new_entities)} and must be a dict")
420
+ if not isinstance(sbml_dfs, sbml_dfs_core.SBML_dfs):
421
+ raise TypeError(
422
+ f"sbml_dfs was a {type(sbml_dfs)} and must be an sbml_dfs_core.SBML_dfs"
423
+ )
424
+ if not isinstance(curation_id, str):
425
+ raise TypeError(f"curation_id was a {type(curation_id)} and must be a str")
426
+
427
+ type_schema = sbml_dfs.schema[entity_type]
428
+
429
+ # name the entity
430
+ if "label" in type_schema.keys():
431
+ new_curated_entities[type_schema["label"]] = new_curated_entities[entity_type]
432
+ else:
433
+ # add a temporary label to improve error messages
434
+ new_curated_entities["label"] = [
435
+ ", ".join(new_curated_entities.select_dtypes(include=["object"]).iloc[i])
436
+ for i in range(0, new_curated_entities.shape[0])
437
+ ]
438
+
439
+ if "source" in type_schema.keys():
440
+ new_curated_entities["curator"] = new_curated_entities["curator"].fillna(
441
+ "unknown"
442
+ )
443
+ # convert curator entries to Sources
444
+ new_curated_entities[type_schema["source"]] = [
445
+ source.Source(
446
+ pd.DataFrame(
447
+ {"model": x, "name": "custom - " + x, "pathway_id": curation_id},
448
+ index=[0],
449
+ )
450
+ )
451
+ for x in new_curated_entities["curator"]
452
+ ]
453
+
454
+ # add the primary key
455
+ max_pk = max(
456
+ sbml_dfs_utils.id_formatter_inv(getattr(sbml_dfs, entity_type).index.tolist())
457
+ )
458
+ if max_pk is np.nan:
459
+ max_pk = int(-1)
460
+
461
+ new_curated_entities[type_schema["pk"]] = sbml_dfs_utils.id_formatter(
462
+ range(
463
+ max_pk + 1,
464
+ max_pk + new_curated_entities.shape[0] + 1,
465
+ ),
466
+ type_schema["pk"],
467
+ )
468
+
469
+ # add foreign keys if they exist
470
+
471
+ if "fk" in type_schema.keys():
472
+ # find primary keys corresponding to foreign keys, including both existing and newly added entities
473
+ for fk in type_schema["fk"]:
474
+ # find the table that the fk belongs to
475
+ fk_of = [x for x, y in sbml_dfs.schema.items() if y["pk"] == fk][0]
476
+
477
+ # pull up referenced entities table, including newly added entities
478
+ if fk_of in new_entities.keys():
479
+ ref_entities = pd.concat(
480
+ [new_entities[fk_of], getattr(sbml_dfs, fk_of)]
481
+ )
482
+ else:
483
+ ref_entities = getattr(sbml_dfs, fk_of)
484
+ key_ref_schema = sbml_dfs.schema[fk_of]
485
+ # add primary key by joining on label
486
+ new_curated_entities = new_curated_entities.merge(
487
+ ref_entities[key_ref_schema["label"]].reset_index(), how="left"
488
+ )
489
+
490
+ # check that all fks were found
491
+ failed_join_df = new_curated_entities[
492
+ new_curated_entities[key_ref_schema["pk"]].isna()
493
+ ]
494
+ if failed_join_df.shape[0] != 0:
495
+ if "label" in type_schema.keys():
496
+ fail_str = "\n".join(failed_join_df[type_schema["label"]])
497
+ else:
498
+ fail_str = "\n".join(failed_join_df["label"])
499
+ raise ValueError(
500
+ f"{failed_join_df.shape[0]} merges of {fk_of} "
501
+ f"failed when updating the {entity_type} table:\n{fail_str}"
502
+ )
503
+
504
+ # add id where applicable
505
+ if "id" in type_schema.keys():
506
+ ids = list()
507
+ for i in range(0, new_curated_entities.shape[0]):
508
+ new_entity_series = new_curated_entities.iloc[i]
509
+
510
+ is_identified = not new_entity_series.isna()["uri"]
511
+ if is_identified:
512
+ id = [
513
+ identifiers.format_uri(
514
+ new_entity_series["uri"], biological_qualifier_type=BQB.IS
515
+ )
516
+ ]
517
+ else:
518
+ id = [
519
+ {
520
+ IDENTIFIERS.ONTOLOGY: "custom_species",
521
+ IDENTIFIERS.IDENTIFIER: new_entity_series[type_schema["pk"]],
522
+ IDENTIFIERS.BQB: BQB.IS,
523
+ }
524
+ ]
525
+ # stub the id using the entity pk
526
+ ids.append(identifiers.Identifiers(id))
527
+
528
+ new_curated_entities[type_schema["id"]] = ids
529
+
530
+ return new_curated_entities.set_index(type_schema["pk"])[type_schema["vars"]]
531
+
532
+
533
+ def _format_implicit_reaction_species(
534
+ curation_dict: dict[str, pd.DataFrame],
535
+ ) -> pd.DataFrame:
536
+ """Construct reaction species which are defined in reactions' stoichiometry."""
537
+
538
+ curated_reactions = curation_dict[SBML_DFS.REACTIONS][
539
+ [SBML_DFS.REACTIONS, SBML_DFS.STOICHIOMETRY]
540
+ ]
541
+
542
+ reaction_species = list()
543
+ for i in range(0, curated_reactions.shape[0]):
544
+ reaction_stoi = curated_reactions[SBML_DFS.STOICHIOMETRY].iloc[i]
545
+ if len(reaction_stoi.split("<->")) == 2:
546
+ split_stoi = reaction_stoi.split("<->")
547
+ elif len(reaction_stoi.split("->")) == 2:
548
+ split_stoi = reaction_stoi.split("->")
549
+ else:
550
+ raise ValueError(
551
+ f"{reaction_stoi} is not a valid reaction stoichiometry; "
552
+ "there must be one and only one '->' to separate the substrates and products"
553
+ )
554
+
555
+ substrates = [x.strip() for x in split_stoi[0].strip().split("++")]
556
+ products = [x.strip() for x in split_stoi[1].strip().split("++")]
557
+
558
+ a_reactions_species = pd.concat(
559
+ [
560
+ pd.DataFrame(
561
+ [
562
+ {
563
+ SBML_DFS.SC_NAME: x,
564
+ SBML_DFS.STOICHIOMETRY: -1,
565
+ SBML_DFS.SBO_TERM: MINI_SBO_FROM_NAME[
566
+ SBOTERM_NAMES.REACTANT
567
+ ],
568
+ }
569
+ for x in substrates
570
+ ]
571
+ ),
572
+ pd.DataFrame(
573
+ [
574
+ {
575
+ SBML_DFS.SC_NAME: x,
576
+ SBML_DFS.STOICHIOMETRY: 1,
577
+ SBML_DFS.SBO_TERM: MINI_SBO_FROM_NAME[
578
+ SBOTERM_NAMES.PRODUCT
579
+ ],
580
+ }
581
+ for x in products
582
+ ]
583
+ ),
584
+ ]
585
+ ).assign(r_name=curated_reactions[SBML_DFS.REACTIONS].iloc[i])
586
+
587
+ reaction_species.append(a_reactions_species)
588
+
589
+ return pd.concat(reaction_species)
590
+
591
+
592
+ def _format_explicit_reaction_species(
593
+ curation_dict: dict[str, pd.DataFrame],
594
+ ) -> pd.DataFrame | None:
595
+ """Format reaction species which are deirectly defined among curated species."""
596
+
597
+ if SBML_DFS.REACTION_SPECIES not in curation_dict.keys():
598
+ print("No explicitly curated reaction species")
599
+ return None
600
+
601
+ # convert from sbo_term_names to sbo_term
602
+ mini_sbo_terms_df = pd.DataFrame(MINI_SBO_FROM_NAME, index=[SBML_DFS.SBO_TERM]).T
603
+
604
+ augmented_reaction_species = (
605
+ curation_dict[SBML_DFS.REACTION_SPECIES]
606
+ .rename({SBML_DFS.REACTION_SPECIES: SBML_DFS.SC_NAME}, axis=1)
607
+ .merge(mini_sbo_terms_df, left_on="sbo_term_name", right_index=True, how="left")
608
+ )
609
+
610
+ # invalid terms
611
+ invalid_terms_df = augmented_reaction_species[
612
+ augmented_reaction_species[SBML_DFS.SBO_TERM].isna()
613
+ ]
614
+ if invalid_terms_df.shape[0] != 0:
615
+ invalid_terms = invalid_terms_df["sbo_term_name"].unique().tolist()
616
+ raise ValueError(
617
+ f'{", ".join(invalid_terms)} are invalid entries for "sbo_term_name", '
618
+ f'valid entries are {", ".join(mini_sbo_terms_df.index.tolist())}'
619
+ )
620
+
621
+ # there currently isn't a good way to encode evidence and curator annotations
622
+ # as source objects for reaction_species since they lack a source object
623
+ # to date they have had the same source as their reaction
624
+ augmented_reaction_species = augmented_reaction_species.drop(
625
+ ["sbo_term_name", "evidence", "curator"], axis=1
626
+ )
627
+
628
+ return augmented_reaction_species