pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
  203. pyobo/aws.py +0 -162
  204. pyobo/cli/aws.py +0 -47
  205. pyobo/identifier_utils.py +0 -142
  206. pyobo/normalizer.py +0 -232
  207. pyobo/registries/__init__.py +0 -16
  208. pyobo/registries/metaregistry.json +0 -507
  209. pyobo/registries/metaregistry.py +0 -135
  210. pyobo/sources/icd11.py +0 -105
  211. pyobo/xrefdb/__init__.py +0 -1
  212. pyobo/xrefdb/canonicalizer.py +0 -214
  213. pyobo/xrefdb/priority.py +0 -59
  214. pyobo/xrefdb/sources/__init__.py +0 -60
  215. pyobo/xrefdb/sources/biomappings.py +0 -36
  216. pyobo/xrefdb/sources/cbms2019.py +0 -91
  217. pyobo/xrefdb/sources/chembl.py +0 -83
  218. pyobo/xrefdb/sources/compath.py +0 -82
  219. pyobo/xrefdb/sources/famplex.py +0 -64
  220. pyobo/xrefdb/sources/gilda.py +0 -50
  221. pyobo/xrefdb/sources/intact.py +0 -113
  222. pyobo/xrefdb/sources/ncit.py +0 -133
  223. pyobo/xrefdb/sources/pubchem.py +0 -27
  224. pyobo/xrefdb/sources/wikidata.py +0 -116
  225. pyobo-0.11.2.dist-info/RECORD +0 -157
  226. pyobo-0.11.2.dist-info/WHEEL +0 -5
  227. pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/sources/__init__.py CHANGED
@@ -3,10 +3,12 @@
3
3
  from class_resolver import ClassResolver
4
4
 
5
5
  from .antibodyregistry import AntibodyRegistryGetter
6
+ from .bigg import BiGGCompartmentGetter, BiGGMetaboliteGetter, BiGGModelGetter, BiGGReactionGetter
6
7
  from .ccle import CCLEGetter
7
8
  from .cgnc import CGNCGetter
8
- from .chembl import ChEMBLCompoundGetter
9
+ from .chembl import ChEMBLCompoundGetter, ChEMBLTargetGetter
9
10
  from .civic_gene import CIVICGeneGetter
11
+ from .clinicaltrials import ClinicalTrialsGetter
10
12
  from .complexportal import ComplexPortalGetter
11
13
  from .conso import CONSOGetter
12
14
  from .cpt import CPTGetter
@@ -14,34 +16,38 @@ from .credit import CreditGetter
14
16
  from .cvx import CVXGetter
15
17
  from .depmap import DepMapGetter
16
18
  from .dictybase_gene import DictybaseGetter
17
- from .drugbank import DrugBankGetter
18
- from .drugbank_salt import DrugBankSaltGetter
19
+ from .drugbank import DrugBankGetter, DrugBankSaltGetter
19
20
  from .drugcentral import DrugCentralGetter
20
21
  from .expasy import ExpasyGetter
21
22
  from .famplex import FamPlexGetter
22
23
  from .flybase import FlyBaseGetter
23
- from .geonames import GeonamesGetter
24
- from .gwascentral_phenotype import GWASCentralPhenotypeGetter
25
- from .gwascentral_study import GWASCentralStudyGetter
26
- from .hgnc import HGNCGetter
27
- from .hgncgenefamily import HGNCGroupGetter
28
- from .icd10 import ICD10Getter
29
- from .icd11 import ICD11Getter
24
+ from .gard import GARDGetter
25
+ from .geonames import GeonamesFeatureGetter, GeonamesGetter
26
+ from .gtdb import GTDBGetter
27
+ from .gwascentral import GWASCentralPhenotypeGetter, GWASCentralStudyGetter
28
+ from .hgnc import HGNCGetter, HGNCGroupGetter
29
+ from .icd import ICD10Getter, ICD11Getter
30
30
  from .interpro import InterProGetter
31
31
  from .itis import ITISGetter
32
32
  from .kegg import KEGGGeneGetter, KEGGGenomeGetter, KEGGPathwayGetter
33
33
  from .mesh import MeSHGetter
34
34
  from .mgi import MGIGetter
35
- from .mirbase import MiRBaseGetter
36
- from .mirbase_family import MiRBaseFamilyGetter
37
- from .mirbase_mature import MiRBaseMatureGetter
35
+ from .mirbase import MiRBaseFamilyGetter, MiRBaseGetter, MiRBaseMatureGetter
38
36
  from .msigdb import MSigDBGetter
39
- from .ncbigene import NCBIGeneGetter
37
+ from .ncbi import NCBIGCGetter, NCBIGeneGetter
38
+ from .nih_reporter import NIHReporterGetter
39
+ from .nlm import NLMCatalogGetter, NLMPublisherGetter
40
40
  from .npass import NPASSGetter
41
41
  from .omim_ps import OMIMPSGetter
42
42
  from .pathbank import PathBankGetter
43
- from .pfam import PfamGetter
44
- from .pfam_clan import PfamClanGetter
43
+ from .pfam import PfamClanGetter, PfamGetter
44
+ from .pharmgkb import (
45
+ PharmGKBChemicalGetter,
46
+ PharmGKBDiseaseGetter,
47
+ PharmGKBGeneGetter,
48
+ PharmGKBPathwayGetter,
49
+ PharmGKBVariantGetter,
50
+ )
45
51
  from .pid import PIDGetter
46
52
  from .pombase import PomBaseGetter
47
53
  from .pubchem import PubChemCompoundGetter
@@ -51,15 +57,21 @@ from .rhea import RheaGetter
51
57
  from .ror import RORGetter
52
58
  from .selventa import SCHEMGetter, SCOMPGetter, SDISGetter, SFAMGetter
53
59
  from .sgd import SGDGetter
60
+ from .signor import SignorGetter
54
61
  from .slm import SLMGetter
55
- from .umls import UMLSGetter
62
+ from .umls import UMLSGetter, UMLSSTyGetter
63
+ from .unimod import UnimodGetter
56
64
  from .uniprot import UniProtGetter, UniProtPtmGetter
57
65
  from .wikipathways import WikiPathwaysGetter
58
66
  from .zfin import ZFINGetter
59
- from ..struct import Obo
67
+ from ..struct.struct import AdHocOntologyBase, Obo
60
68
 
61
69
  __all__ = [
62
70
  "AntibodyRegistryGetter",
71
+ "BiGGCompartmentGetter",
72
+ "BiGGMetaboliteGetter",
73
+ "BiGGModelGetter",
74
+ "BiGGReactionGetter",
63
75
  "CCLEGetter",
64
76
  "CGNCGetter",
65
77
  "CIVICGeneGetter",
@@ -67,6 +79,8 @@ __all__ = [
67
79
  "CPTGetter",
68
80
  "CVXGetter",
69
81
  "ChEMBLCompoundGetter",
82
+ "ChEMBLTargetGetter",
83
+ "ClinicalTrialsGetter",
70
84
  "ComplexPortalGetter",
71
85
  "CreditGetter",
72
86
  "DepMapGetter",
@@ -77,8 +91,11 @@ __all__ = [
77
91
  "ExpasyGetter",
78
92
  "FamPlexGetter",
79
93
  "FlyBaseGetter",
94
+ "GARDGetter",
95
+ "GTDBGetter",
80
96
  "GWASCentralPhenotypeGetter",
81
97
  "GWASCentralStudyGetter",
98
+ "GeonamesFeatureGetter",
82
99
  "GeonamesGetter",
83
100
  "HGNCGetter",
84
101
  "HGNCGroupGetter",
@@ -95,13 +112,22 @@ __all__ = [
95
112
  "MiRBaseFamilyGetter",
96
113
  "MiRBaseGetter",
97
114
  "MiRBaseMatureGetter",
115
+ "NCBIGCGetter",
98
116
  "NCBIGeneGetter",
117
+ "NIHReporterGetter",
118
+ "NLMCatalogGetter",
119
+ "NLMPublisherGetter",
99
120
  "NPASSGetter",
100
121
  "OMIMPSGetter",
101
122
  "PIDGetter",
102
123
  "PathBankGetter",
103
124
  "PfamClanGetter",
104
125
  "PfamGetter",
126
+ "PharmGKBChemicalGetter",
127
+ "PharmGKBDiseaseGetter",
128
+ "PharmGKBGeneGetter",
129
+ "PharmGKBPathwayGetter",
130
+ "PharmGKBVariantGetter",
105
131
  "PomBaseGetter",
106
132
  "PubChemCompoundGetter",
107
133
  "RGDGetter",
@@ -114,24 +140,21 @@ __all__ = [
114
140
  "SFAMGetter",
115
141
  "SGDGetter",
116
142
  "SLMGetter",
143
+ "SignorGetter",
117
144
  "UMLSGetter",
145
+ "UMLSSTyGetter",
118
146
  "UniProtGetter",
119
147
  "UniProtPtmGetter",
148
+ "UnimodGetter",
120
149
  "WikiPathwaysGetter",
121
150
  "ZFINGetter",
122
151
  "ontology_resolver",
123
152
  ]
124
153
 
125
-
126
- def _assert_sorted():
127
- _sorted = sorted(__all__)
128
- if _sorted != __all__:
129
- raise ValueError(f"unsorted. should be:\n{_sorted}")
130
-
131
-
132
- _assert_sorted()
133
- del _assert_sorted
134
-
135
- ontology_resolver: ClassResolver[Obo] = ClassResolver.from_subclasses(base=Obo, suffix="Getter")
154
+ ontology_resolver: ClassResolver[Obo] = ClassResolver.from_subclasses(
155
+ base=Obo,
156
+ suffix="Getter",
157
+ skip={AdHocOntologyBase},
158
+ )
136
159
  for getter in list(ontology_resolver):
137
160
  ontology_resolver.synonyms[getter.ontology] = getter
pyobo/sources/agrovoc.py CHANGED
File without changes
@@ -1,15 +1,18 @@
1
- """Converter for the Antibody Registry."""
1
+ """Converter for the Antibody Registry.
2
+
3
+ TODO use API https://www.antibodyregistry.org/api/antibodies?page=1&size=100
4
+ """
2
5
 
3
6
  import logging
4
7
  from collections.abc import Iterable, Mapping
5
- from typing import Optional
6
8
 
7
9
  import pandas as pd
8
10
  from bioregistry.utils import removeprefix
9
11
  from tqdm.auto import tqdm
10
12
 
11
- from pyobo import Obo, Term
13
+ from pyobo import Obo, Reference, Term
12
14
  from pyobo.api.utils import get_version
15
+ from pyobo.struct.typedef import has_citation
13
16
  from pyobo.utils.path import ensure_df
14
17
 
15
18
  __all__ = [
@@ -23,7 +26,7 @@ URL = "http://antibodyregistry.org/php/fileHandler.php"
23
26
  CHUNKSIZE = 20_000
24
27
 
25
28
 
26
- def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame:
29
+ def get_chunks(*, force: bool = False, version: str | None = None) -> pd.DataFrame:
27
30
  """Get the BioGRID identifiers mapping dataframe."""
28
31
  if version is None:
29
32
  version = get_version(PREFIX)
@@ -44,19 +47,15 @@ class AntibodyRegistryGetter(Obo):
44
47
  """An ontology representation of the Antibody Registry."""
45
48
 
46
49
  ontology = bioversions_key = PREFIX
50
+ typedefs = [has_citation]
47
51
 
48
52
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
49
53
  """Iterate over terms in the ontology."""
50
54
  return iter_terms(force=force, version=self._version_or_raise)
51
55
 
52
56
 
53
- def get_obo(*, force: bool = False) -> Obo:
54
- """Get the Antibody Registry as OBO."""
55
- return AntibodyRegistryGetter(force=force)
56
-
57
-
58
57
  # TODO there are tonnnnsss of mappings to be curated
59
- MAPPING: Mapping[str, Optional[str]] = {
58
+ MAPPING: Mapping[str, str | None] = {
60
59
  "AMERICAN DIAGNOSTICA": None, # No website
61
60
  "Biolegend": "biolegend",
62
61
  "Enzo Life Sciences": "enzo",
@@ -74,7 +73,7 @@ SKIP = {
74
73
  }
75
74
 
76
75
 
77
- def iter_terms(*, force: bool = False, version: Optional[str] = None) -> Iterable[Term]:
76
+ def iter_terms(*, force: bool = False, version: str | None = None) -> Iterable[Term]:
78
77
  """Iterate over antibodies."""
79
78
  chunks = get_chunks(force=force, version=version)
80
79
  needs_curating = set()
@@ -98,7 +97,7 @@ def iter_terms(*, force: bool = False, version: Optional[str] = None) -> Iterabl
98
97
  pubmed_id = pubmed_id.strip()
99
98
  if not pubmed_id:
100
99
  continue
101
- term.append_provenance(("pubmed", pubmed_id))
100
+ term.append_provenance(Reference(prefix="pubmed", identifier=pubmed_id))
102
101
  yield term
103
102
 
104
103
 
@@ -0,0 +1,13 @@
1
+ """Converter for resources in BiGG."""
2
+
3
+ from .bigg_compartment import BiGGCompartmentGetter
4
+ from .bigg_metabolite import BiGGMetaboliteGetter
5
+ from .bigg_model import BiGGModelGetter
6
+ from .bigg_reaction import BiGGReactionGetter
7
+
8
+ __all__ = [
9
+ "BiGGCompartmentGetter",
10
+ "BiGGMetaboliteGetter",
11
+ "BiGGModelGetter",
12
+ "BiGGReactionGetter",
13
+ ]
@@ -0,0 +1,81 @@
1
+ """Get compartments from BiGG."""
2
+
3
+ from collections.abc import Iterable
4
+
5
+ from bioversions.utils import get_soup
6
+
7
+ from pyobo import Obo, Reference, Term
8
+
9
+ __all__ = [
10
+ "BiGGCompartmentGetter",
11
+ "get_compartments",
12
+ ]
13
+
14
+ DATA_URL = "http://bigg.ucsd.edu/compartments/"
15
+ PREFIX = "bigg.compartment"
16
+ GO_MAPPING: dict[str, Reference | None] = {
17
+ "c": Reference(prefix="go", identifier="0005829", name="cytosol"),
18
+ "e": Reference(prefix="go", identifier="0005615", name="extracellular space"),
19
+ "p": Reference(prefix="go", identifier="0042597", name="periplasmic space"),
20
+ "m": Reference(prefix="go", identifier="0005739", name="mitochondrion"),
21
+ "r": Reference(prefix="go", identifier="0005783", name="endoplasmic reticulum"),
22
+ "v": Reference(prefix="go", identifier="0005773", name="vacuole"),
23
+ "n": Reference(prefix="go", identifier="0005634", name="nucleus"),
24
+ "g": Reference(prefix="go", identifier="0005794", name="Golgi apparatus"),
25
+ "u": Reference(prefix="go", identifier="0009579", name="thylakoid"),
26
+ "l": Reference(prefix="go", identifier="0005764", name="lysosome"),
27
+ "h": Reference(prefix="go", identifier="0009507", name="chloroplast"),
28
+ "f": Reference(prefix="go", identifier="0005929", name="cilium"),
29
+ "s": Reference(prefix="go", identifier="1990413", name="eyespot apparatus"),
30
+ "um": Reference(prefix="go", identifier="0042651", name="thylakoid membrane"),
31
+ "y": Reference(prefix="go", identifier="0070069", name="cytochrome complex"),
32
+ # note that glyoxysome is a child class of peroxisome in GO
33
+ "x": Reference(prefix="go", identifier="0005777", name="peroxisome"),
34
+ "mm": Reference(prefix="go", identifier="0005743", name="mitochondrial inner membrane"),
35
+ "im": Reference(prefix="go", identifier="0005758", name="mitochondrial intermembrane space"),
36
+ "cx": None, # missing for carboxyzome
37
+ "cm": None, # missing for cytosolic membrane
38
+ "i": None, # missing for inner mitochondrial compartment
39
+ "w": None, # missing for wildtype staph aureus
40
+ }
41
+
42
+
43
+ class BiGGCompartmentGetter(Obo):
44
+ """An ontology representation of BiGG compartments."""
45
+
46
+ ontology = PREFIX
47
+ bioversions_key = "bigg"
48
+
49
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
50
+ """Iterate over terms in the ontology."""
51
+ return iterate_terms(force=force, version=self._version_or_raise)
52
+
53
+
54
+ def get_compartments(*, force: bool = False, version: str | None = None) -> dict[str, str]:
55
+ """Get a dictionary of BiGG compartments."""
56
+ rv = {}
57
+ soup = get_soup(DATA_URL)
58
+ table = soup.find(**{"class": "myTable"}) # type:ignore[arg-type]
59
+ if table is None:
60
+ raise ValueError
61
+ for row in table.find_all("tr"): # type:ignore[attr-defined]
62
+ cells = list(row.find_all("td"))
63
+ if not cells:
64
+ continue
65
+ identifier_cell, name_cell = cells
66
+ rv[identifier_cell.text] = name_cell.text
67
+ return rv
68
+
69
+
70
+ def iterate_terms(*, force: bool = False, version: str | None = None) -> Iterable[Term]:
71
+ """Iterate over BiGG compartments."""
72
+ compartments = get_compartments(force=force, version=version)
73
+ for identifier, name in compartments.items():
74
+ term = Term.from_triple(PREFIX, identifier, name)
75
+ if go_component_ref := GO_MAPPING.get(identifier):
76
+ term.append_exact_match(go_component_ref)
77
+ yield term
78
+
79
+
80
+ if __name__ == "__main__":
81
+ BiGGCompartmentGetter.cli()
@@ -0,0 +1,229 @@
1
+ """Converter for metabolites in BiGG."""
2
+
3
+ import logging
4
+ import re
5
+ from collections.abc import Iterable
6
+
7
+ import bioregistry
8
+ import pandas as pd
9
+ from pydantic import ValidationError
10
+ from tqdm import tqdm
11
+
12
+ from pyobo.sources.bigg.bigg_compartment import GO_MAPPING
13
+ from pyobo.struct import Obo, Reference, Term
14
+ from pyobo.struct.typedef import located_in, participates_in
15
+ from pyobo.utils.path import ensure_df
16
+
17
+ __all__ = [
18
+ "BiGGMetaboliteGetter",
19
+ ]
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ PREFIX = "bigg.metabolite"
24
+ URL = "http://bigg.ucsd.edu/static/namespace/bigg_models_metabolites.txt"
25
+ PATTERN = re.compile("^[a-z_A-Z0-9]+$")
26
+
27
+ MOLECULE = Term.from_triple("cob", "0000013", "molecule")
28
+
29
+
30
+ class BiGGMetaboliteGetter(Obo):
31
+ """An ontology representation of BiGG Metabolites."""
32
+
33
+ ontology = PREFIX
34
+ bioversions_key = "bigg"
35
+ typedefs = [participates_in, located_in]
36
+ root_terms = [MOLECULE.reference]
37
+
38
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
39
+ """Iterate over terms in the ontology."""
40
+ return iterate_terms(force=force, version=self._version_or_raise)
41
+
42
+
43
+ KEY_TO_PREFIX = {
44
+ "CHEBI": "chebi",
45
+ "Human Metabolome Database": "hmdb",
46
+ "LipidMaps": "lipidmaps",
47
+ "BioCyc": "biocyc",
48
+ "KEGG Compound": "kegg.compound",
49
+ "MetaNetX (MNX) Chemical": "metanetx.chemical",
50
+ "InChI Key": "inchikey",
51
+ "SEED Compound": "seed.compound",
52
+ "Reactome Compound": "reactome",
53
+ "KEGG Drug": "kegg.drug",
54
+ "KEGG Glycan": "kegg.glycan",
55
+ "MetaNetX (MNX) Equation": "metanetx.reaction",
56
+ "RHEA": "rhea",
57
+ "EC Number": "ec",
58
+ "SEED Reaction": "seed.reaction",
59
+ "Reactome Reaction": "reactome",
60
+ "KEGG Reaction": "kegg.reaction",
61
+ }
62
+ EXACTS = {"inchikey"}
63
+
64
+
65
+ def _split(x) -> list[str]:
66
+ if pd.notna(x):
67
+ return [y.strip() for y in x.split(";")]
68
+ return []
69
+
70
+
71
+ def iterate_terms(force: bool = False, version: str | None = None) -> Iterable[Term]:
72
+ """Iterate terms for BiGG Metabolite."""
73
+ bigg_df = ensure_df(
74
+ prefix=PREFIX,
75
+ url=URL,
76
+ force=force,
77
+ version=version,
78
+ )
79
+
80
+ for v in KEY_TO_PREFIX.values():
81
+ nmp = bioregistry.normalize_prefix(v)
82
+ if v != nmp:
83
+ raise ValueError(f"Normalize {v} to {nmp}")
84
+
85
+ universal_references: set[Reference] = set()
86
+ compartment_references: set[Reference] = set()
87
+
88
+ yield MOLECULE
89
+
90
+ # TODO there are duplicates on universal ID - this might be
91
+ # because the compartment ID is unique
92
+ for (
93
+ bigg_compartmental_id,
94
+ universal_bigg_id,
95
+ name,
96
+ model_list,
97
+ database_links,
98
+ old_bigg_ids,
99
+ ) in tqdm(
100
+ bigg_df.values,
101
+ unit_scale=True,
102
+ unit="metabolite",
103
+ desc=f"[{PREFIX}] processing",
104
+ ):
105
+ if not PATTERN.match(bigg_compartmental_id):
106
+ tqdm.write(f"[{PREFIX}] invalid BIGG ID: {bigg_compartmental_id}")
107
+ continue
108
+
109
+ universal_name = name.strip() if pd.notna(name) else None
110
+
111
+ _, _, compartment_letter = bigg_compartmental_id.rpartition("_")
112
+ compartment_reference = GO_MAPPING[compartment_letter] or Reference(
113
+ prefix="bigg.compartment", identifier=compartment_letter
114
+ )
115
+ compartment_references.add(compartment_reference)
116
+ compartment_name = (
117
+ f"{universal_name} (in {compartment_reference.name})" if universal_name else None
118
+ )
119
+
120
+ term = Term(
121
+ reference=Reference(
122
+ prefix=PREFIX,
123
+ identifier=bigg_compartmental_id,
124
+ name=compartment_name,
125
+ ),
126
+ )
127
+ term.append_relationship(located_in, compartment_reference)
128
+
129
+ if PATTERN.match(universal_bigg_id):
130
+ universal_reference = Reference(
131
+ prefix=PREFIX, identifier=universal_bigg_id, name=universal_name
132
+ )
133
+ term.append_parent(universal_reference)
134
+ universal_references.add(universal_reference)
135
+ else:
136
+ tqdm.write(f"[{PREFIX}] invalid universal BIGG ID: {bigg_compartmental_id}")
137
+
138
+ for old_bigg_id in _split(old_bigg_ids):
139
+ if old_bigg_id in {bigg_compartmental_id, universal_bigg_id}:
140
+ continue
141
+ if not PATTERN.match(old_bigg_id):
142
+ if not old_bigg_id.endswith("]"):
143
+ # if it ends with ']' then it's a compartment identifier
144
+ logger.debug(f"[{PREFIX}:{universal_bigg_id}] invalid alt ID: {old_bigg_id}")
145
+ continue
146
+ term.append_alt(Reference(prefix=PREFIX, identifier=old_bigg_id))
147
+ _parse_model_links(term, model_list)
148
+ _parse_dblinks(term, database_links)
149
+
150
+ yield term
151
+
152
+ for universal_reference in universal_references:
153
+ yield Term(reference=universal_reference).append_parent(MOLECULE)
154
+
155
+ for compartment in compartment_references:
156
+ yield Term(reference=compartment)
157
+
158
+
159
+ def _parse_model_links(term: Term, model_list: str) -> None:
160
+ for model_id in _split(model_list):
161
+ try:
162
+ reference = Reference(prefix="bigg.model", identifier=model_id)
163
+ except ValidationError:
164
+ tqdm.write(f"[{term.curie}] invalid model reference: {model_id}")
165
+ else:
166
+ term.annotate_object(participates_in, reference)
167
+
168
+
169
+ def _parse_dblinks(term: Term, database_links: str, property_map=None) -> None:
170
+ if not property_map:
171
+ property_map = {}
172
+
173
+ # there are duplicate xrefs, keep track
174
+ seen = set()
175
+
176
+ for dblink in _split(database_links):
177
+ key, _, identifier_url = dblink.strip().partition(":")
178
+ identifier_url = identifier_url.strip()
179
+ if not identifier_url:
180
+ continue
181
+
182
+ if identifier_url.startswith("http://identifiers.org/kegg.glycan/"):
183
+ prefix = "kegg.glycan"
184
+ identifier = identifier_url.removeprefix("http://identifiers.org/kegg.glycan/")
185
+ elif identifier_url.startswith("http://identifiers.org/kegg.drug/"):
186
+ prefix = "kegg.drug"
187
+ identifier = identifier_url.removeprefix("http://identifiers.org/kegg.drug/")
188
+ elif identifier_url.startswith("http://identifiers.org/kegg.reaction/"):
189
+ prefix = "kegg.reaction"
190
+ identifier = identifier_url.removeprefix("http://identifiers.org/kegg.reaction/")
191
+ else:
192
+ prefix_, identifier_ = bioregistry.parse_iri(identifier_url)
193
+ if not prefix_ or not identifier_:
194
+ tqdm.write(f"[{PREFIX}] failed to parse xref IRI: {identifier_url}")
195
+ continue
196
+ prefix, identifier = prefix_, identifier_
197
+ if prefix == "kegg":
198
+ prefix = "kegg.compound"
199
+ if prefix != KEY_TO_PREFIX.get(key):
200
+ tqdm.write(f"[{PREFIX}] mismatch between {prefix=} and {key=} - {identifier_url}")
201
+ continue
202
+ if prefix == "rhea" and "#" in identifier:
203
+ identifier = identifier.split("#")[0]
204
+
205
+ try:
206
+ reference = Reference(prefix=prefix, identifier=identifier)
207
+ except ValidationError:
208
+ tqdm.write(f"[{term.curie}] could not validate xref - {prefix}:{identifier}")
209
+ continue
210
+ # don't add self-reference
211
+ if reference.pair == term.pair:
212
+ continue
213
+
214
+ if reference in seen:
215
+ tqdm.write(f"[{term.curie}] got duplicate xref {reference}")
216
+ continue
217
+
218
+ seen.add(reference)
219
+
220
+ if prefix in property_map:
221
+ term.annotate_object(property_map[prefix], reference)
222
+ elif prefix in EXACTS:
223
+ term.append_exact_match(reference)
224
+ else:
225
+ term.append_xref(reference)
226
+
227
+
228
+ if __name__ == "__main__":
229
+ BiGGMetaboliteGetter.cli()
@@ -0,0 +1,46 @@
1
+ """Converter for models in BiGG."""
2
+
3
+ import json
4
+ import logging
5
+ from collections.abc import Iterable
6
+
7
+ from pyobo.resources.ncbitaxon import get_ncbitaxon_id
8
+ from pyobo.struct import Obo, Term
9
+ from pyobo.utils.path import ensure_path
10
+
11
+ __all__ = [
12
+ "BiGGModelGetter",
13
+ ]
14
+
15
+ logger = logging.getLogger(__name__)
16
+ URL = "http://bigg.ucsd.edu/api/v2/models"
17
+ PREFIX = "bigg.model"
18
+
19
+
20
+ class BiGGModelGetter(Obo):
21
+ """An ontology representation of BiGG Models."""
22
+
23
+ ontology = PREFIX
24
+ bioversions_key = "bigg"
25
+
26
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
27
+ """Iterate over terms in the ontology."""
28
+ return iterate_terms(version=self._version_or_raise)
29
+
30
+
31
+ def iterate_terms(version: str) -> Iterable[Term]:
32
+ """Iterate over BiGG Models."""
33
+ path = ensure_path(PREFIX, url=URL, version=version)
34
+ records = json.loads(path.read_text())["results"]
35
+ for record in records:
36
+ ncbitaxon_id = get_ncbitaxon_id(record["organism"])
37
+ term = Term.from_triple(PREFIX, record["bigg_id"])
38
+ if ncbitaxon_id:
39
+ term.set_species(ncbitaxon_id)
40
+ else:
41
+ logger.info("[%s] could not ground organism name: %s", term.curie, record["organism"])
42
+ yield term
43
+
44
+
45
+ if __name__ == "__main__":
46
+ BiGGModelGetter.cli()
@@ -0,0 +1,77 @@
1
+ """Converter for BiGG."""
2
+
3
+ from collections.abc import Iterable
4
+
5
+ import pandas as pd
6
+ from pydantic import ValidationError
7
+ from tqdm import tqdm
8
+
9
+ from pyobo.sources.bigg.bigg_metabolite import _parse_dblinks, _parse_model_links, _split
10
+ from pyobo.struct import Obo, Reference, Term
11
+ from pyobo.struct.typedef import enabled_by, participates_in
12
+ from pyobo.utils.path import ensure_df
13
+
14
+ __all__ = [
15
+ "BiGGReactionGetter",
16
+ ]
17
+
18
+ PREFIX = "bigg.reaction"
19
+ URL = "http://bigg.ucsd.edu/static/namespace/bigg_models_reactions.txt"
20
+ PROPERTY_MAP = {"ec": enabled_by}
21
+
22
+
23
+ class BiGGReactionGetter(Obo):
24
+ """An ontology representation of BiGG Reactions."""
25
+
26
+ ontology = PREFIX
27
+ bioversions_key = "bigg"
28
+ typedefs = [participates_in, enabled_by]
29
+
30
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
31
+ """Iterate over terms in the ontology."""
32
+ return iterate_terms(force=force, version=self._version_or_raise)
33
+
34
+
35
+ def iterate_terms(force: bool = False, version: str | None = None) -> Iterable[Term]:
36
+ """Iterate terms for BiGG Reaction."""
37
+ bigg_reaction_df = ensure_df(
38
+ prefix=PREFIX,
39
+ url=URL,
40
+ force=force,
41
+ version=version,
42
+ )
43
+
44
+ for bigg_id, name, reaction_string, model_list, database_links, old_bigg_ids in tqdm(
45
+ bigg_reaction_df.values, unit_scale=True, unit="reaction", desc=f"[{PREFIX}] processing"
46
+ ):
47
+ if "(" in bigg_id:
48
+ tqdm.write(f"[{PREFIX}] identifier has open paren. can't encode in OWL: {bigg_id}")
49
+ continue
50
+
51
+ term = Term(
52
+ reference=Reference(
53
+ prefix=PREFIX, identifier=bigg_id, name=name if pd.notna(name) else None
54
+ ),
55
+ definition=reaction_string,
56
+ )
57
+ for old_bigg_id in _split(old_bigg_ids):
58
+ if old_bigg_id == bigg_id:
59
+ continue
60
+ if "(" in old_bigg_id:
61
+ continue
62
+ try:
63
+ alt_reference = Reference(prefix=PREFIX, identifier=old_bigg_id)
64
+ except ValidationError:
65
+ tqdm.write(f"[{term.curie}] had problematic alt reference: {old_bigg_id}")
66
+ else:
67
+ term.append_alt(alt_reference)
68
+ _parse_model_links(term, model_list)
69
+
70
+ # TODO make sure exact match goes to the bidirectional rhea reaction but not others
71
+ _parse_dblinks(term, database_links)
72
+
73
+ yield term
74
+
75
+
76
+ if __name__ == "__main__":
77
+ BiGGReactionGetter.cli()