pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
- pyobo/apps/__init__.py +0 -3
- pyobo/apps/cli.py +0 -24
- pyobo/apps/gilda/__init__.py +0 -3
- pyobo/apps/gilda/__main__.py +0 -8
- pyobo/apps/gilda/app.py +0 -48
- pyobo/apps/gilda/cli.py +0 -36
- pyobo/apps/gilda/templates/base.html +0 -33
- pyobo/apps/gilda/templates/home.html +0 -11
- pyobo/apps/gilda/templates/matches.html +0 -32
- pyobo/apps/mapper/__init__.py +0 -3
- pyobo/apps/mapper/__main__.py +0 -11
- pyobo/apps/mapper/cli.py +0 -37
- pyobo/apps/mapper/mapper.py +0 -187
- pyobo/apps/mapper/templates/base.html +0 -35
- pyobo/apps/mapper/templates/mapper_home.html +0 -64
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.1.dist-info/RECORD +0 -173
- pyobo-0.11.1.dist-info/WHEEL +0 -5
- pyobo-0.11.1.dist-info/top_level.txt +0 -1
pyobo/sources/__init__.py
CHANGED
|
@@ -3,10 +3,12 @@
|
|
|
3
3
|
from class_resolver import ClassResolver
|
|
4
4
|
|
|
5
5
|
from .antibodyregistry import AntibodyRegistryGetter
|
|
6
|
+
from .bigg import BiGGCompartmentGetter, BiGGMetaboliteGetter, BiGGModelGetter, BiGGReactionGetter
|
|
6
7
|
from .ccle import CCLEGetter
|
|
7
8
|
from .cgnc import CGNCGetter
|
|
8
|
-
from .chembl import ChEMBLCompoundGetter
|
|
9
|
+
from .chembl import ChEMBLCompoundGetter, ChEMBLTargetGetter
|
|
9
10
|
from .civic_gene import CIVICGeneGetter
|
|
11
|
+
from .clinicaltrials import ClinicalTrialsGetter
|
|
10
12
|
from .complexportal import ComplexPortalGetter
|
|
11
13
|
from .conso import CONSOGetter
|
|
12
14
|
from .cpt import CPTGetter
|
|
@@ -14,34 +16,38 @@ from .credit import CreditGetter
|
|
|
14
16
|
from .cvx import CVXGetter
|
|
15
17
|
from .depmap import DepMapGetter
|
|
16
18
|
from .dictybase_gene import DictybaseGetter
|
|
17
|
-
from .drugbank import DrugBankGetter
|
|
18
|
-
from .drugbank_salt import DrugBankSaltGetter
|
|
19
|
+
from .drugbank import DrugBankGetter, DrugBankSaltGetter
|
|
19
20
|
from .drugcentral import DrugCentralGetter
|
|
20
21
|
from .expasy import ExpasyGetter
|
|
21
22
|
from .famplex import FamPlexGetter
|
|
22
23
|
from .flybase import FlyBaseGetter
|
|
23
|
-
from .
|
|
24
|
-
from .
|
|
25
|
-
from .
|
|
26
|
-
from .
|
|
27
|
-
from .
|
|
28
|
-
from .
|
|
29
|
-
from .icd11 import ICD11Getter
|
|
24
|
+
from .gard import GARDGetter
|
|
25
|
+
from .geonames import GeonamesFeatureGetter, GeonamesGetter
|
|
26
|
+
from .gtdb import GTDBGetter
|
|
27
|
+
from .gwascentral import GWASCentralPhenotypeGetter, GWASCentralStudyGetter
|
|
28
|
+
from .hgnc import HGNCGetter, HGNCGroupGetter
|
|
29
|
+
from .icd import ICD10Getter, ICD11Getter
|
|
30
30
|
from .interpro import InterProGetter
|
|
31
31
|
from .itis import ITISGetter
|
|
32
32
|
from .kegg import KEGGGeneGetter, KEGGGenomeGetter, KEGGPathwayGetter
|
|
33
33
|
from .mesh import MeSHGetter
|
|
34
34
|
from .mgi import MGIGetter
|
|
35
|
-
from .mirbase import MiRBaseGetter
|
|
36
|
-
from .mirbase_family import MiRBaseFamilyGetter
|
|
37
|
-
from .mirbase_mature import MiRBaseMatureGetter
|
|
35
|
+
from .mirbase import MiRBaseFamilyGetter, MiRBaseGetter, MiRBaseMatureGetter
|
|
38
36
|
from .msigdb import MSigDBGetter
|
|
39
|
-
from .
|
|
37
|
+
from .ncbi import NCBIGCGetter, NCBIGeneGetter
|
|
38
|
+
from .nih_reporter import NIHReporterGetter
|
|
39
|
+
from .nlm import NLMCatalogGetter, NLMPublisherGetter
|
|
40
40
|
from .npass import NPASSGetter
|
|
41
41
|
from .omim_ps import OMIMPSGetter
|
|
42
42
|
from .pathbank import PathBankGetter
|
|
43
|
-
from .pfam import PfamGetter
|
|
44
|
-
from .
|
|
43
|
+
from .pfam import PfamClanGetter, PfamGetter
|
|
44
|
+
from .pharmgkb import (
|
|
45
|
+
PharmGKBChemicalGetter,
|
|
46
|
+
PharmGKBDiseaseGetter,
|
|
47
|
+
PharmGKBGeneGetter,
|
|
48
|
+
PharmGKBPathwayGetter,
|
|
49
|
+
PharmGKBVariantGetter,
|
|
50
|
+
)
|
|
45
51
|
from .pid import PIDGetter
|
|
46
52
|
from .pombase import PomBaseGetter
|
|
47
53
|
from .pubchem import PubChemCompoundGetter
|
|
@@ -51,15 +57,21 @@ from .rhea import RheaGetter
|
|
|
51
57
|
from .ror import RORGetter
|
|
52
58
|
from .selventa import SCHEMGetter, SCOMPGetter, SDISGetter, SFAMGetter
|
|
53
59
|
from .sgd import SGDGetter
|
|
60
|
+
from .signor import SignorGetter
|
|
54
61
|
from .slm import SLMGetter
|
|
55
|
-
from .umls import UMLSGetter
|
|
62
|
+
from .umls import UMLSGetter, UMLSSTyGetter
|
|
63
|
+
from .unimod import UnimodGetter
|
|
56
64
|
from .uniprot import UniProtGetter, UniProtPtmGetter
|
|
57
65
|
from .wikipathways import WikiPathwaysGetter
|
|
58
66
|
from .zfin import ZFINGetter
|
|
59
|
-
from ..struct import Obo
|
|
67
|
+
from ..struct.struct import AdHocOntologyBase, Obo
|
|
60
68
|
|
|
61
69
|
__all__ = [
|
|
62
70
|
"AntibodyRegistryGetter",
|
|
71
|
+
"BiGGCompartmentGetter",
|
|
72
|
+
"BiGGMetaboliteGetter",
|
|
73
|
+
"BiGGModelGetter",
|
|
74
|
+
"BiGGReactionGetter",
|
|
63
75
|
"CCLEGetter",
|
|
64
76
|
"CGNCGetter",
|
|
65
77
|
"CIVICGeneGetter",
|
|
@@ -67,6 +79,8 @@ __all__ = [
|
|
|
67
79
|
"CPTGetter",
|
|
68
80
|
"CVXGetter",
|
|
69
81
|
"ChEMBLCompoundGetter",
|
|
82
|
+
"ChEMBLTargetGetter",
|
|
83
|
+
"ClinicalTrialsGetter",
|
|
70
84
|
"ComplexPortalGetter",
|
|
71
85
|
"CreditGetter",
|
|
72
86
|
"DepMapGetter",
|
|
@@ -77,8 +91,11 @@ __all__ = [
|
|
|
77
91
|
"ExpasyGetter",
|
|
78
92
|
"FamPlexGetter",
|
|
79
93
|
"FlyBaseGetter",
|
|
94
|
+
"GARDGetter",
|
|
95
|
+
"GTDBGetter",
|
|
80
96
|
"GWASCentralPhenotypeGetter",
|
|
81
97
|
"GWASCentralStudyGetter",
|
|
98
|
+
"GeonamesFeatureGetter",
|
|
82
99
|
"GeonamesGetter",
|
|
83
100
|
"HGNCGetter",
|
|
84
101
|
"HGNCGroupGetter",
|
|
@@ -95,13 +112,22 @@ __all__ = [
|
|
|
95
112
|
"MiRBaseFamilyGetter",
|
|
96
113
|
"MiRBaseGetter",
|
|
97
114
|
"MiRBaseMatureGetter",
|
|
115
|
+
"NCBIGCGetter",
|
|
98
116
|
"NCBIGeneGetter",
|
|
117
|
+
"NIHReporterGetter",
|
|
118
|
+
"NLMCatalogGetter",
|
|
119
|
+
"NLMPublisherGetter",
|
|
99
120
|
"NPASSGetter",
|
|
100
121
|
"OMIMPSGetter",
|
|
101
122
|
"PIDGetter",
|
|
102
123
|
"PathBankGetter",
|
|
103
124
|
"PfamClanGetter",
|
|
104
125
|
"PfamGetter",
|
|
126
|
+
"PharmGKBChemicalGetter",
|
|
127
|
+
"PharmGKBDiseaseGetter",
|
|
128
|
+
"PharmGKBGeneGetter",
|
|
129
|
+
"PharmGKBPathwayGetter",
|
|
130
|
+
"PharmGKBVariantGetter",
|
|
105
131
|
"PomBaseGetter",
|
|
106
132
|
"PubChemCompoundGetter",
|
|
107
133
|
"RGDGetter",
|
|
@@ -114,24 +140,21 @@ __all__ = [
|
|
|
114
140
|
"SFAMGetter",
|
|
115
141
|
"SGDGetter",
|
|
116
142
|
"SLMGetter",
|
|
143
|
+
"SignorGetter",
|
|
117
144
|
"UMLSGetter",
|
|
145
|
+
"UMLSSTyGetter",
|
|
118
146
|
"UniProtGetter",
|
|
119
147
|
"UniProtPtmGetter",
|
|
148
|
+
"UnimodGetter",
|
|
120
149
|
"WikiPathwaysGetter",
|
|
121
150
|
"ZFINGetter",
|
|
122
151
|
"ontology_resolver",
|
|
123
152
|
]
|
|
124
153
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
_assert_sorted()
|
|
133
|
-
del _assert_sorted
|
|
134
|
-
|
|
135
|
-
ontology_resolver: ClassResolver[Obo] = ClassResolver.from_subclasses(base=Obo, suffix="Getter")
|
|
154
|
+
ontology_resolver: ClassResolver[Obo] = ClassResolver.from_subclasses(
|
|
155
|
+
base=Obo,
|
|
156
|
+
suffix="Getter",
|
|
157
|
+
skip={AdHocOntologyBase},
|
|
158
|
+
)
|
|
136
159
|
for getter in list(ontology_resolver):
|
|
137
160
|
ontology_resolver.synonyms[getter.ontology] = getter
|
pyobo/sources/agrovoc.py
CHANGED
|
File without changes
|
|
@@ -1,15 +1,18 @@
|
|
|
1
|
-
"""Converter for the Antibody Registry.
|
|
1
|
+
"""Converter for the Antibody Registry.
|
|
2
|
+
|
|
3
|
+
TODO use API https://www.antibodyregistry.org/api/antibodies?page=1&size=100
|
|
4
|
+
"""
|
|
2
5
|
|
|
3
6
|
import logging
|
|
4
7
|
from collections.abc import Iterable, Mapping
|
|
5
|
-
from typing import Optional
|
|
6
8
|
|
|
7
9
|
import pandas as pd
|
|
8
10
|
from bioregistry.utils import removeprefix
|
|
9
11
|
from tqdm.auto import tqdm
|
|
10
12
|
|
|
11
|
-
from pyobo import Obo, Term
|
|
13
|
+
from pyobo import Obo, Reference, Term
|
|
12
14
|
from pyobo.api.utils import get_version
|
|
15
|
+
from pyobo.struct.typedef import has_citation
|
|
13
16
|
from pyobo.utils.path import ensure_df
|
|
14
17
|
|
|
15
18
|
__all__ = [
|
|
@@ -23,7 +26,7 @@ URL = "http://antibodyregistry.org/php/fileHandler.php"
|
|
|
23
26
|
CHUNKSIZE = 20_000
|
|
24
27
|
|
|
25
28
|
|
|
26
|
-
def get_chunks(*, force: bool = False, version:
|
|
29
|
+
def get_chunks(*, force: bool = False, version: str | None = None) -> pd.DataFrame:
|
|
27
30
|
"""Get the BioGRID identifiers mapping dataframe."""
|
|
28
31
|
if version is None:
|
|
29
32
|
version = get_version(PREFIX)
|
|
@@ -44,19 +47,15 @@ class AntibodyRegistryGetter(Obo):
|
|
|
44
47
|
"""An ontology representation of the Antibody Registry."""
|
|
45
48
|
|
|
46
49
|
ontology = bioversions_key = PREFIX
|
|
50
|
+
typedefs = [has_citation]
|
|
47
51
|
|
|
48
52
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
49
53
|
"""Iterate over terms in the ontology."""
|
|
50
54
|
return iter_terms(force=force, version=self._version_or_raise)
|
|
51
55
|
|
|
52
56
|
|
|
53
|
-
def get_obo(*, force: bool = False) -> Obo:
|
|
54
|
-
"""Get the Antibody Registry as OBO."""
|
|
55
|
-
return AntibodyRegistryGetter(force=force)
|
|
56
|
-
|
|
57
|
-
|
|
58
57
|
# TODO there are tonnnnsss of mappings to be curated
|
|
59
|
-
MAPPING: Mapping[str,
|
|
58
|
+
MAPPING: Mapping[str, str | None] = {
|
|
60
59
|
"AMERICAN DIAGNOSTICA": None, # No website
|
|
61
60
|
"Biolegend": "biolegend",
|
|
62
61
|
"Enzo Life Sciences": "enzo",
|
|
@@ -74,7 +73,7 @@ SKIP = {
|
|
|
74
73
|
}
|
|
75
74
|
|
|
76
75
|
|
|
77
|
-
def iter_terms(*, force: bool = False, version:
|
|
76
|
+
def iter_terms(*, force: bool = False, version: str | None = None) -> Iterable[Term]:
|
|
78
77
|
"""Iterate over antibodies."""
|
|
79
78
|
chunks = get_chunks(force=force, version=version)
|
|
80
79
|
needs_curating = set()
|
|
@@ -98,7 +97,7 @@ def iter_terms(*, force: bool = False, version: Optional[str] = None) -> Iterabl
|
|
|
98
97
|
pubmed_id = pubmed_id.strip()
|
|
99
98
|
if not pubmed_id:
|
|
100
99
|
continue
|
|
101
|
-
term.append_provenance(("pubmed", pubmed_id))
|
|
100
|
+
term.append_provenance(Reference(prefix="pubmed", identifier=pubmed_id))
|
|
102
101
|
yield term
|
|
103
102
|
|
|
104
103
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Converter for resources in BiGG."""
|
|
2
|
+
|
|
3
|
+
from .bigg_compartment import BiGGCompartmentGetter
|
|
4
|
+
from .bigg_metabolite import BiGGMetaboliteGetter
|
|
5
|
+
from .bigg_model import BiGGModelGetter
|
|
6
|
+
from .bigg_reaction import BiGGReactionGetter
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"BiGGCompartmentGetter",
|
|
10
|
+
"BiGGMetaboliteGetter",
|
|
11
|
+
"BiGGModelGetter",
|
|
12
|
+
"BiGGReactionGetter",
|
|
13
|
+
]
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Get compartments from BiGG."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
|
|
5
|
+
from bioversions.utils import get_soup
|
|
6
|
+
|
|
7
|
+
from pyobo import Obo, Reference, Term
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"BiGGCompartmentGetter",
|
|
11
|
+
"get_compartments",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
DATA_URL = "http://bigg.ucsd.edu/compartments/"
|
|
15
|
+
PREFIX = "bigg.compartment"
|
|
16
|
+
GO_MAPPING: dict[str, Reference | None] = {
|
|
17
|
+
"c": Reference(prefix="go", identifier="0005829", name="cytosol"),
|
|
18
|
+
"e": Reference(prefix="go", identifier="0005615", name="extracellular space"),
|
|
19
|
+
"p": Reference(prefix="go", identifier="0042597", name="periplasmic space"),
|
|
20
|
+
"m": Reference(prefix="go", identifier="0005739", name="mitochondrion"),
|
|
21
|
+
"r": Reference(prefix="go", identifier="0005783", name="endoplasmic reticulum"),
|
|
22
|
+
"v": Reference(prefix="go", identifier="0005773", name="vacuole"),
|
|
23
|
+
"n": Reference(prefix="go", identifier="0005634", name="nucleus"),
|
|
24
|
+
"g": Reference(prefix="go", identifier="0005794", name="Golgi apparatus"),
|
|
25
|
+
"u": Reference(prefix="go", identifier="0009579", name="thylakoid"),
|
|
26
|
+
"l": Reference(prefix="go", identifier="0005764", name="lysosome"),
|
|
27
|
+
"h": Reference(prefix="go", identifier="0009507", name="chloroplast"),
|
|
28
|
+
"f": Reference(prefix="go", identifier="0005929", name="cilium"),
|
|
29
|
+
"s": Reference(prefix="go", identifier="1990413", name="eyespot apparatus"),
|
|
30
|
+
"um": Reference(prefix="go", identifier="0042651", name="thylakoid membrane"),
|
|
31
|
+
"y": Reference(prefix="go", identifier="0070069", name="cytochrome complex"),
|
|
32
|
+
# note that glyoxysome is a child class of peroxisome in GO
|
|
33
|
+
"x": Reference(prefix="go", identifier="0005777", name="peroxisome"),
|
|
34
|
+
"mm": Reference(prefix="go", identifier="0005743", name="mitochondrial inner membrane"),
|
|
35
|
+
"im": Reference(prefix="go", identifier="0005758", name="mitochondrial intermembrane space"),
|
|
36
|
+
"cx": None, # missing for carboxyzome
|
|
37
|
+
"cm": None, # missing for cytosolic membrane
|
|
38
|
+
"i": None, # missing for inner mitochondrial compartment
|
|
39
|
+
"w": None, # missing for wildtype staph aureus
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class BiGGCompartmentGetter(Obo):
|
|
44
|
+
"""An ontology representation of BiGG compartments."""
|
|
45
|
+
|
|
46
|
+
ontology = PREFIX
|
|
47
|
+
bioversions_key = "bigg"
|
|
48
|
+
|
|
49
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
50
|
+
"""Iterate over terms in the ontology."""
|
|
51
|
+
return iterate_terms(force=force, version=self._version_or_raise)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_compartments(*, force: bool = False, version: str | None = None) -> dict[str, str]:
|
|
55
|
+
"""Get a dictionary of BiGG compartments."""
|
|
56
|
+
rv = {}
|
|
57
|
+
soup = get_soup(DATA_URL)
|
|
58
|
+
table = soup.find(**{"class": "myTable"}) # type:ignore[arg-type]
|
|
59
|
+
if table is None:
|
|
60
|
+
raise ValueError
|
|
61
|
+
for row in table.find_all("tr"): # type:ignore[attr-defined]
|
|
62
|
+
cells = list(row.find_all("td"))
|
|
63
|
+
if not cells:
|
|
64
|
+
continue
|
|
65
|
+
identifier_cell, name_cell = cells
|
|
66
|
+
rv[identifier_cell.text] = name_cell.text
|
|
67
|
+
return rv
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def iterate_terms(*, force: bool = False, version: str | None = None) -> Iterable[Term]:
|
|
71
|
+
"""Iterate over BiGG compartments."""
|
|
72
|
+
compartments = get_compartments(force=force, version=version)
|
|
73
|
+
for identifier, name in compartments.items():
|
|
74
|
+
term = Term.from_triple(PREFIX, identifier, name)
|
|
75
|
+
if go_component_ref := GO_MAPPING.get(identifier):
|
|
76
|
+
term.append_exact_match(go_component_ref)
|
|
77
|
+
yield term
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
if __name__ == "__main__":
|
|
81
|
+
BiGGCompartmentGetter.cli()
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""Converter for metabolites in BiGG."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
|
|
7
|
+
import bioregistry
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from pydantic import ValidationError
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
|
|
12
|
+
from pyobo.sources.bigg.bigg_compartment import GO_MAPPING
|
|
13
|
+
from pyobo.struct import Obo, Reference, Term
|
|
14
|
+
from pyobo.struct.typedef import located_in, participates_in
|
|
15
|
+
from pyobo.utils.path import ensure_df
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"BiGGMetaboliteGetter",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
PREFIX = "bigg.metabolite"
|
|
24
|
+
URL = "http://bigg.ucsd.edu/static/namespace/bigg_models_metabolites.txt"
|
|
25
|
+
PATTERN = re.compile("^[a-z_A-Z0-9]+$")
|
|
26
|
+
|
|
27
|
+
MOLECULE = Term.from_triple("cob", "0000013", "molecule")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class BiGGMetaboliteGetter(Obo):
|
|
31
|
+
"""An ontology representation of BiGG Metabolites."""
|
|
32
|
+
|
|
33
|
+
ontology = PREFIX
|
|
34
|
+
bioversions_key = "bigg"
|
|
35
|
+
typedefs = [participates_in, located_in]
|
|
36
|
+
root_terms = [MOLECULE.reference]
|
|
37
|
+
|
|
38
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
39
|
+
"""Iterate over terms in the ontology."""
|
|
40
|
+
return iterate_terms(force=force, version=self._version_or_raise)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
KEY_TO_PREFIX = {
|
|
44
|
+
"CHEBI": "chebi",
|
|
45
|
+
"Human Metabolome Database": "hmdb",
|
|
46
|
+
"LipidMaps": "lipidmaps",
|
|
47
|
+
"BioCyc": "biocyc",
|
|
48
|
+
"KEGG Compound": "kegg.compound",
|
|
49
|
+
"MetaNetX (MNX) Chemical": "metanetx.chemical",
|
|
50
|
+
"InChI Key": "inchikey",
|
|
51
|
+
"SEED Compound": "seed.compound",
|
|
52
|
+
"Reactome Compound": "reactome",
|
|
53
|
+
"KEGG Drug": "kegg.drug",
|
|
54
|
+
"KEGG Glycan": "kegg.glycan",
|
|
55
|
+
"MetaNetX (MNX) Equation": "metanetx.reaction",
|
|
56
|
+
"RHEA": "rhea",
|
|
57
|
+
"EC Number": "ec",
|
|
58
|
+
"SEED Reaction": "seed.reaction",
|
|
59
|
+
"Reactome Reaction": "reactome",
|
|
60
|
+
"KEGG Reaction": "kegg.reaction",
|
|
61
|
+
}
|
|
62
|
+
EXACTS = {"inchikey"}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _split(x) -> list[str]:
|
|
66
|
+
if pd.notna(x):
|
|
67
|
+
return [y.strip() for y in x.split(";")]
|
|
68
|
+
return []
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def iterate_terms(force: bool = False, version: str | None = None) -> Iterable[Term]:
|
|
72
|
+
"""Iterate terms for BiGG Metabolite."""
|
|
73
|
+
bigg_df = ensure_df(
|
|
74
|
+
prefix=PREFIX,
|
|
75
|
+
url=URL,
|
|
76
|
+
force=force,
|
|
77
|
+
version=version,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
for v in KEY_TO_PREFIX.values():
|
|
81
|
+
nmp = bioregistry.normalize_prefix(v)
|
|
82
|
+
if v != nmp:
|
|
83
|
+
raise ValueError(f"Normalize {v} to {nmp}")
|
|
84
|
+
|
|
85
|
+
universal_references: set[Reference] = set()
|
|
86
|
+
compartment_references: set[Reference] = set()
|
|
87
|
+
|
|
88
|
+
yield MOLECULE
|
|
89
|
+
|
|
90
|
+
# TODO there are duplicates on universal ID - this might be
|
|
91
|
+
# because the compartment ID is unique
|
|
92
|
+
for (
|
|
93
|
+
bigg_compartmental_id,
|
|
94
|
+
universal_bigg_id,
|
|
95
|
+
name,
|
|
96
|
+
model_list,
|
|
97
|
+
database_links,
|
|
98
|
+
old_bigg_ids,
|
|
99
|
+
) in tqdm(
|
|
100
|
+
bigg_df.values,
|
|
101
|
+
unit_scale=True,
|
|
102
|
+
unit="metabolite",
|
|
103
|
+
desc=f"[{PREFIX}] processing",
|
|
104
|
+
):
|
|
105
|
+
if not PATTERN.match(bigg_compartmental_id):
|
|
106
|
+
tqdm.write(f"[{PREFIX}] invalid BIGG ID: {bigg_compartmental_id}")
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
universal_name = name.strip() if pd.notna(name) else None
|
|
110
|
+
|
|
111
|
+
_, _, compartment_letter = bigg_compartmental_id.rpartition("_")
|
|
112
|
+
compartment_reference = GO_MAPPING[compartment_letter] or Reference(
|
|
113
|
+
prefix="bigg.compartment", identifier=compartment_letter
|
|
114
|
+
)
|
|
115
|
+
compartment_references.add(compartment_reference)
|
|
116
|
+
compartment_name = (
|
|
117
|
+
f"{universal_name} (in {compartment_reference.name})" if universal_name else None
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
term = Term(
|
|
121
|
+
reference=Reference(
|
|
122
|
+
prefix=PREFIX,
|
|
123
|
+
identifier=bigg_compartmental_id,
|
|
124
|
+
name=compartment_name,
|
|
125
|
+
),
|
|
126
|
+
)
|
|
127
|
+
term.append_relationship(located_in, compartment_reference)
|
|
128
|
+
|
|
129
|
+
if PATTERN.match(universal_bigg_id):
|
|
130
|
+
universal_reference = Reference(
|
|
131
|
+
prefix=PREFIX, identifier=universal_bigg_id, name=universal_name
|
|
132
|
+
)
|
|
133
|
+
term.append_parent(universal_reference)
|
|
134
|
+
universal_references.add(universal_reference)
|
|
135
|
+
else:
|
|
136
|
+
tqdm.write(f"[{PREFIX}] invalid universal BIGG ID: {bigg_compartmental_id}")
|
|
137
|
+
|
|
138
|
+
for old_bigg_id in _split(old_bigg_ids):
|
|
139
|
+
if old_bigg_id in {bigg_compartmental_id, universal_bigg_id}:
|
|
140
|
+
continue
|
|
141
|
+
if not PATTERN.match(old_bigg_id):
|
|
142
|
+
if not old_bigg_id.endswith("]"):
|
|
143
|
+
# if it ends with ']' then it's a compartment identifier
|
|
144
|
+
logger.debug(f"[{PREFIX}:{universal_bigg_id}] invalid alt ID: {old_bigg_id}")
|
|
145
|
+
continue
|
|
146
|
+
term.append_alt(Reference(prefix=PREFIX, identifier=old_bigg_id))
|
|
147
|
+
_parse_model_links(term, model_list)
|
|
148
|
+
_parse_dblinks(term, database_links)
|
|
149
|
+
|
|
150
|
+
yield term
|
|
151
|
+
|
|
152
|
+
for universal_reference in universal_references:
|
|
153
|
+
yield Term(reference=universal_reference).append_parent(MOLECULE)
|
|
154
|
+
|
|
155
|
+
for compartment in compartment_references:
|
|
156
|
+
yield Term(reference=compartment)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _parse_model_links(term: Term, model_list: str) -> None:
|
|
160
|
+
for model_id in _split(model_list):
|
|
161
|
+
try:
|
|
162
|
+
reference = Reference(prefix="bigg.model", identifier=model_id)
|
|
163
|
+
except ValidationError:
|
|
164
|
+
tqdm.write(f"[{term.curie}] invalid model reference: {model_id}")
|
|
165
|
+
else:
|
|
166
|
+
term.annotate_object(participates_in, reference)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _parse_dblinks(term: Term, database_links: str, property_map=None) -> None:
|
|
170
|
+
if not property_map:
|
|
171
|
+
property_map = {}
|
|
172
|
+
|
|
173
|
+
# there are duplicate xrefs, keep track
|
|
174
|
+
seen = set()
|
|
175
|
+
|
|
176
|
+
for dblink in _split(database_links):
|
|
177
|
+
key, _, identifier_url = dblink.strip().partition(":")
|
|
178
|
+
identifier_url = identifier_url.strip()
|
|
179
|
+
if not identifier_url:
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
if identifier_url.startswith("http://identifiers.org/kegg.glycan/"):
|
|
183
|
+
prefix = "kegg.glycan"
|
|
184
|
+
identifier = identifier_url.removeprefix("http://identifiers.org/kegg.glycan/")
|
|
185
|
+
elif identifier_url.startswith("http://identifiers.org/kegg.drug/"):
|
|
186
|
+
prefix = "kegg.drug"
|
|
187
|
+
identifier = identifier_url.removeprefix("http://identifiers.org/kegg.drug/")
|
|
188
|
+
elif identifier_url.startswith("http://identifiers.org/kegg.reaction/"):
|
|
189
|
+
prefix = "kegg.reaction"
|
|
190
|
+
identifier = identifier_url.removeprefix("http://identifiers.org/kegg.reaction/")
|
|
191
|
+
else:
|
|
192
|
+
prefix_, identifier_ = bioregistry.parse_iri(identifier_url)
|
|
193
|
+
if not prefix_ or not identifier_:
|
|
194
|
+
tqdm.write(f"[{PREFIX}] failed to parse xref IRI: {identifier_url}")
|
|
195
|
+
continue
|
|
196
|
+
prefix, identifier = prefix_, identifier_
|
|
197
|
+
if prefix == "kegg":
|
|
198
|
+
prefix = "kegg.compound"
|
|
199
|
+
if prefix != KEY_TO_PREFIX.get(key):
|
|
200
|
+
tqdm.write(f"[{PREFIX}] mismatch between {prefix=} and {key=} - {identifier_url}")
|
|
201
|
+
continue
|
|
202
|
+
if prefix == "rhea" and "#" in identifier:
|
|
203
|
+
identifier = identifier.split("#")[0]
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
reference = Reference(prefix=prefix, identifier=identifier)
|
|
207
|
+
except ValidationError:
|
|
208
|
+
tqdm.write(f"[{term.curie}] could not validate xref - {prefix}:{identifier}")
|
|
209
|
+
continue
|
|
210
|
+
# don't add self-reference
|
|
211
|
+
if reference.pair == term.pair:
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
if reference in seen:
|
|
215
|
+
tqdm.write(f"[{term.curie}] got duplicate xref {reference}")
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
seen.add(reference)
|
|
219
|
+
|
|
220
|
+
if prefix in property_map:
|
|
221
|
+
term.annotate_object(property_map[prefix], reference)
|
|
222
|
+
elif prefix in EXACTS:
|
|
223
|
+
term.append_exact_match(reference)
|
|
224
|
+
else:
|
|
225
|
+
term.append_xref(reference)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
if __name__ == "__main__":
|
|
229
|
+
BiGGMetaboliteGetter.cli()
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Converter for models in BiGG."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
|
|
7
|
+
from pyobo.resources.ncbitaxon import get_ncbitaxon_id
|
|
8
|
+
from pyobo.struct import Obo, Term
|
|
9
|
+
from pyobo.utils.path import ensure_path
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"BiGGModelGetter",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
URL = "http://bigg.ucsd.edu/api/v2/models"
|
|
17
|
+
PREFIX = "bigg.model"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BiGGModelGetter(Obo):
|
|
21
|
+
"""An ontology representation of BiGG Models."""
|
|
22
|
+
|
|
23
|
+
ontology = PREFIX
|
|
24
|
+
bioversions_key = "bigg"
|
|
25
|
+
|
|
26
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
27
|
+
"""Iterate over terms in the ontology."""
|
|
28
|
+
return iterate_terms(version=self._version_or_raise)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def iterate_terms(version: str) -> Iterable[Term]:
|
|
32
|
+
"""Iterate over BiGG Models."""
|
|
33
|
+
path = ensure_path(PREFIX, url=URL, version=version)
|
|
34
|
+
records = json.loads(path.read_text())["results"]
|
|
35
|
+
for record in records:
|
|
36
|
+
ncbitaxon_id = get_ncbitaxon_id(record["organism"])
|
|
37
|
+
term = Term.from_triple(PREFIX, record["bigg_id"])
|
|
38
|
+
if ncbitaxon_id:
|
|
39
|
+
term.set_species(ncbitaxon_id)
|
|
40
|
+
else:
|
|
41
|
+
logger.info("[%s] could not ground organism name: %s", term.curie, record["organism"])
|
|
42
|
+
yield term
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
BiGGModelGetter.cli()
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Converter for BiGG."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from pydantic import ValidationError
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
|
|
9
|
+
from pyobo.sources.bigg.bigg_metabolite import _parse_dblinks, _parse_model_links, _split
|
|
10
|
+
from pyobo.struct import Obo, Reference, Term
|
|
11
|
+
from pyobo.struct.typedef import enabled_by, participates_in
|
|
12
|
+
from pyobo.utils.path import ensure_df
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"BiGGReactionGetter",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
PREFIX = "bigg.reaction"
|
|
19
|
+
URL = "http://bigg.ucsd.edu/static/namespace/bigg_models_reactions.txt"
|
|
20
|
+
PROPERTY_MAP = {"ec": enabled_by}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BiGGReactionGetter(Obo):
|
|
24
|
+
"""An ontology representation of BiGG Reactions."""
|
|
25
|
+
|
|
26
|
+
ontology = PREFIX
|
|
27
|
+
bioversions_key = "bigg"
|
|
28
|
+
typedefs = [participates_in, enabled_by]
|
|
29
|
+
|
|
30
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
31
|
+
"""Iterate over terms in the ontology."""
|
|
32
|
+
return iterate_terms(force=force, version=self._version_or_raise)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def iterate_terms(force: bool = False, version: str | None = None) -> Iterable[Term]:
|
|
36
|
+
"""Iterate terms for BiGG Reaction."""
|
|
37
|
+
bigg_reaction_df = ensure_df(
|
|
38
|
+
prefix=PREFIX,
|
|
39
|
+
url=URL,
|
|
40
|
+
force=force,
|
|
41
|
+
version=version,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
for bigg_id, name, reaction_string, model_list, database_links, old_bigg_ids in tqdm(
|
|
45
|
+
bigg_reaction_df.values, unit_scale=True, unit="reaction", desc=f"[{PREFIX}] processing"
|
|
46
|
+
):
|
|
47
|
+
if "(" in bigg_id:
|
|
48
|
+
tqdm.write(f"[{PREFIX}] identifier has open paren. can't encode in OWL: {bigg_id}")
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
term = Term(
|
|
52
|
+
reference=Reference(
|
|
53
|
+
prefix=PREFIX, identifier=bigg_id, name=name if pd.notna(name) else None
|
|
54
|
+
),
|
|
55
|
+
definition=reaction_string,
|
|
56
|
+
)
|
|
57
|
+
for old_bigg_id in _split(old_bigg_ids):
|
|
58
|
+
if old_bigg_id == bigg_id:
|
|
59
|
+
continue
|
|
60
|
+
if "(" in old_bigg_id:
|
|
61
|
+
continue
|
|
62
|
+
try:
|
|
63
|
+
alt_reference = Reference(prefix=PREFIX, identifier=old_bigg_id)
|
|
64
|
+
except ValidationError:
|
|
65
|
+
tqdm.write(f"[{term.curie}] had problematic alt reference: {old_bigg_id}")
|
|
66
|
+
else:
|
|
67
|
+
term.append_alt(alt_reference)
|
|
68
|
+
_parse_model_links(term, model_list)
|
|
69
|
+
|
|
70
|
+
# TODO make sure exact match goes to the bidirectional rhea reaction but not others
|
|
71
|
+
_parse_dblinks(term, database_links)
|
|
72
|
+
|
|
73
|
+
yield term
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
if __name__ == "__main__":
|
|
77
|
+
BiGGReactionGetter.cli()
|