pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
  203. pyobo/aws.py +0 -162
  204. pyobo/cli/aws.py +0 -47
  205. pyobo/identifier_utils.py +0 -142
  206. pyobo/normalizer.py +0 -232
  207. pyobo/registries/__init__.py +0 -16
  208. pyobo/registries/metaregistry.json +0 -507
  209. pyobo/registries/metaregistry.py +0 -135
  210. pyobo/sources/icd11.py +0 -105
  211. pyobo/xrefdb/__init__.py +0 -1
  212. pyobo/xrefdb/canonicalizer.py +0 -214
  213. pyobo/xrefdb/priority.py +0 -59
  214. pyobo/xrefdb/sources/__init__.py +0 -60
  215. pyobo/xrefdb/sources/biomappings.py +0 -36
  216. pyobo/xrefdb/sources/cbms2019.py +0 -91
  217. pyobo/xrefdb/sources/chembl.py +0 -83
  218. pyobo/xrefdb/sources/compath.py +0 -82
  219. pyobo/xrefdb/sources/famplex.py +0 -64
  220. pyobo/xrefdb/sources/gilda.py +0 -50
  221. pyobo/xrefdb/sources/intact.py +0 -113
  222. pyobo/xrefdb/sources/ncit.py +0 -133
  223. pyobo/xrefdb/sources/pubchem.py +0 -27
  224. pyobo/xrefdb/sources/wikidata.py +0 -116
  225. pyobo-0.11.2.dist-info/RECORD +0 -157
  226. pyobo-0.11.2.dist-info/WHEEL +0 -5
  227. pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/sources/reactome.py CHANGED
@@ -11,7 +11,7 @@ from tqdm.auto import tqdm
11
11
  from ..api import get_id_multirelations_mapping
12
12
  from ..constants import SPECIES_REMAPPING
13
13
  from ..resources.ncbitaxon import get_ncbitaxon_id
14
- from ..struct import Obo, Reference, Term, from_species, has_participant
14
+ from ..struct import Obo, Reference, Term, from_species, has_citation, has_participant
15
15
  from ..utils.io import multidict
16
16
  from ..utils.path import ensure_df
17
17
 
@@ -31,18 +31,13 @@ class ReactomeGetter(Obo):
31
31
  """An ontology representation of the Reactome pathway database."""
32
32
 
33
33
  ontology = bioversions_key = PREFIX
34
- typedefs = [from_species, has_participant]
34
+ typedefs = [from_species, has_participant, has_citation]
35
35
 
36
36
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
37
37
  """Iterate over terms in the ontology."""
38
38
  return iter_terms(version=self._version_or_raise, force=force)
39
39
 
40
40
 
41
- def get_obo(force: bool = False) -> Obo:
42
- """Get Reactome OBO."""
43
- return ReactomeGetter(force=force)
44
-
45
-
46
41
  def ensure_participant_df(version: str, force: bool = False) -> pd.DataFrame:
47
42
  """Get the pathway uniprot participant dataframe."""
48
43
  uniprot_pathway_url = f"https://reactome.org/download/{version}/UniProt2Reactome_All_Levels.txt"
@@ -76,11 +71,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
76
71
  for reactome_id, name, species_name, taxonomy_id in it:
77
72
  terms[reactome_id] = term = Term(
78
73
  reference=Reference(prefix=PREFIX, identifier=reactome_id, name=name),
79
- provenance=[
80
- Reference(prefix="pubmed", identifier=pubmed_id)
81
- for pubmed_id in provenance_d.get(reactome_id, [])
82
- ],
83
74
  )
75
+ for pubmed_id in provenance_d.get(reactome_id, []):
76
+ term.append_provenance(Reference(prefix="pubmed", identifier=pubmed_id))
77
+
84
78
  if not taxonomy_id or pd.isna(taxonomy_id):
85
79
  raise ValueError(f"unmapped species: {species_name}")
86
80
 
pyobo/sources/rgd.py CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  import logging
4
4
  from collections.abc import Iterable
5
- from typing import Optional
6
5
 
7
6
  import pandas as pd
8
7
  from tqdm.auto import tqdm
@@ -10,10 +9,11 @@ from tqdm.auto import tqdm
10
9
  from pyobo.struct import (
11
10
  Obo,
12
11
  Reference,
13
- Synonym,
14
12
  SynonymTypeDef,
15
13
  Term,
14
+ default_reference,
16
15
  from_species,
16
+ has_citation,
17
17
  has_gene_product,
18
18
  transcribes_to,
19
19
  )
@@ -22,8 +22,8 @@ from pyobo.utils.path import ensure_df
22
22
  logger = logging.getLogger(__name__)
23
23
  PREFIX = "rgd"
24
24
 
25
- old_symbol_type = SynonymTypeDef.from_text("old_symbol")
26
- old_name_type = SynonymTypeDef.from_text("old_name")
25
+ old_symbol_type = SynonymTypeDef(reference=default_reference(PREFIX, "old_symbol"))
26
+ old_name_type = SynonymTypeDef(reference=default_reference(PREFIX, "old_name"))
27
27
 
28
28
  # NOTE unigene id was discontinue in January 18th, 2021 dump
29
29
 
@@ -73,7 +73,7 @@ class RGDGetter(Obo):
73
73
  """An ontology representation of RGD's rat gene nomenclature."""
74
74
 
75
75
  bioversions_key = ontology = PREFIX
76
- typedefs = [from_species, transcribes_to, has_gene_product]
76
+ typedefs = [from_species, transcribes_to, has_gene_product, has_citation]
77
77
  synonym_typedefs = [old_name_type, old_symbol_type]
78
78
 
79
79
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
@@ -81,11 +81,6 @@ class RGDGetter(Obo):
81
81
  return get_terms(force=force, version=self._version_or_raise)
82
82
 
83
83
 
84
- def get_obo(force: bool = False) -> Obo:
85
- """Get RGD as OBO."""
86
- return RGDGetter(force=force)
87
-
88
-
89
84
  namespace_to_column = [
90
85
  ("ensembl", "ENSEMBL_ID"),
91
86
  ("uniprot", "UNIPROT_ID"),
@@ -93,7 +88,7 @@ namespace_to_column = [
93
88
  ]
94
89
 
95
90
 
96
- def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Term]:
91
+ def get_terms(force: bool = False, version: str | None = None) -> Iterable[Term]:
97
92
  """Get RGD terms."""
98
93
  df = ensure_df(
99
94
  PREFIX,
@@ -124,11 +119,11 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
124
119
  old_names = row["OLD_NAME"]
125
120
  if old_names and pd.notna(old_names):
126
121
  for old_name in old_names.split(";"):
127
- term.append_synonym(Synonym(name=old_name, type=old_name_type))
122
+ term.append_synonym(old_name, type=old_name_type)
128
123
  old_symbols = row["OLD_SYMBOL"]
129
124
  if old_symbols and pd.notna(old_symbols):
130
125
  for old_symbol in old_symbols.split(";"):
131
- term.append_synonym(Synonym(name=old_symbol, type=old_symbol_type))
126
+ term.append_synonym(old_symbol, type=old_symbol_type)
132
127
  for prefix, key in namespace_to_column:
133
128
  xref_ids = str(row[key])
134
129
  if xref_ids and pd.notna(xref_ids):
@@ -136,7 +131,7 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
136
131
  if xref_id == "nan":
137
132
  continue
138
133
  if prefix == "uniprot":
139
- term.append_relationship(
134
+ term.annotate_object(
140
135
  has_gene_product, Reference(prefix=prefix, identifier=xref_id)
141
136
  )
142
137
  elif prefix == "ensembl":
@@ -144,11 +139,11 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
144
139
  # second one is reverse strand
145
140
  term.append_xref(Reference(prefix=prefix, identifier=xref_id))
146
141
  elif xref_id.startswith("ENSMUST"):
147
- term.append_relationship(
142
+ term.annotate_object(
148
143
  transcribes_to, Reference(prefix=prefix, identifier=xref_id)
149
144
  )
150
145
  elif xref_id.startswith("ENSMUSP"):
151
- term.append_relationship(
146
+ term.annotate_object(
152
147
  has_gene_product, Reference(prefix=prefix, identifier=xref_id)
153
148
  )
154
149
  else:
pyobo/sources/rhea.py CHANGED
@@ -2,23 +2,13 @@
2
2
 
3
3
  import logging
4
4
  from collections.abc import Iterable
5
- from typing import TYPE_CHECKING, Optional
5
+ from typing import TYPE_CHECKING, Any, cast
6
6
 
7
7
  import pystow
8
8
 
9
9
  from pyobo.api.utils import get_version
10
- from pyobo.struct import Obo, Reference, Term
11
- from pyobo.struct.typedef import (
12
- TypeDef,
13
- enabled_by,
14
- has_bidirectional_reaction,
15
- has_input,
16
- has_left_to_right_reaction,
17
- has_output,
18
- has_participant,
19
- has_right_to_left_reaction,
20
- reaction_enabled_by_molecular_function,
21
- )
10
+ from pyobo.struct import Obo, Reference, Term, TypeDef
11
+ from pyobo.struct import typedef as v
22
12
  from pyobo.utils.path import ensure_df
23
13
 
24
14
  if TYPE_CHECKING:
@@ -32,6 +22,16 @@ logger = logging.getLogger(__name__)
32
22
  PREFIX = "rhea"
33
23
  RHEA_RDF_GZ_URL = "ftp://ftp.expasy.org/databases/rhea/rdf/rhea.rdf.gz"
34
24
 
25
+ has_left_to_right_reaction = TypeDef.default(
26
+ PREFIX, "hasLeftToRightReaction", name="has left to right reaction", is_metadata_tag=True
27
+ ).append_xref(v.has_left_to_right_reaction)
28
+ has_right_to_left_reaction = TypeDef.default(
29
+ PREFIX, "hasRightToLeftReaction", name="has right to left reaction", is_metadata_tag=True
30
+ ).append_xref(v.has_right_to_left_reaction)
31
+ has_bidirectional_reaction = TypeDef.default(
32
+ PREFIX, "hasBidirectionalReaction", name="has bidirectional reaction", is_metadata_tag=True
33
+ ).append_xref(v.has_bidirectional_reaction)
34
+
35
35
 
36
36
  class RheaGetter(Obo):
37
37
  """An ontology representation of Rhea's chemical reaction database."""
@@ -41,11 +41,11 @@ class RheaGetter(Obo):
41
41
  has_left_to_right_reaction,
42
42
  has_bidirectional_reaction,
43
43
  has_right_to_left_reaction,
44
- enabled_by,
45
- has_input,
46
- has_output,
47
- has_participant,
48
- reaction_enabled_by_molecular_function,
44
+ v.enabled_by,
45
+ v.has_input,
46
+ v.has_output,
47
+ v.has_participant,
48
+ v.reaction_enabled_by_molecular_function,
49
49
  ]
50
50
 
51
51
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
@@ -53,16 +53,13 @@ class RheaGetter(Obo):
53
53
  return iter_terms(version=self._version_or_raise, force=force)
54
54
 
55
55
 
56
- def get_obo(force: bool = False) -> Obo:
57
- """Get Rhea as OBO."""
58
- return RheaGetter(force=force)
59
-
60
-
61
- def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdflib.Graph":
56
+ def ensure_rhea_rdf(version: str | None = None, force: bool = False) -> "rdflib.Graph":
62
57
  """Get the Rhea RDF graph."""
63
58
  # see docs: https://ftp.expasy.org/databases/rhea/rdf/rhea_rdf_documentation.pdf
64
59
  if version is None:
65
60
  version = get_version(PREFIX)
61
+ if version is None:
62
+ raise ValueError
66
63
  return pystow.ensure_rdf(
67
64
  "pyobo",
68
65
  "raw",
@@ -100,7 +97,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
100
97
  }
101
98
  """
102
99
  )
103
- names = {str(identifier): str(name) for _, identifier, name in result}
100
+ names = {
101
+ str(identifier): str(name)
102
+ for _, identifier, name in cast(Iterable[tuple[Any, str, str]], result)
103
+ }
104
104
 
105
105
  terms: dict[str, Term] = {}
106
106
  master_to_left: dict[str, str] = {}
@@ -145,8 +145,9 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
145
145
  ?compound rh:chebi|rh:underlyingChebi|(rh:reactivePart/rh:chebi) ?chebi .
146
146
  }
147
147
  """
148
- for master_rhea_id, side_uri, chebi_uri in graph.query(sparql):
149
- master_rhea_id = str(master_rhea_id)
148
+ results = cast(Iterable[tuple[int, str, str]], graph.query(sparql))
149
+ for master_rhea_id_int, side_uri, chebi_uri in results:
150
+ master_rhea_id = str(master_rhea_id_int)
150
151
  chebi_reference = Reference(
151
152
  prefix="chebi", identifier=chebi_uri[len("http://purl.obolibrary.org/obo/CHEBI_") :]
152
153
  )
@@ -159,10 +160,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
159
160
  right_rhea_id = master_to_left[master_rhea_id]
160
161
  else:
161
162
  raise ValueError(f"Invalid side: {side_uri}")
162
- terms[master_rhea_id].append_relationship(has_participant, chebi_reference)
163
- terms[master_to_bi[master_rhea_id]].append_relationship(has_participant, chebi_reference)
164
- terms[left_rhea_id].append_relationship(has_input, chebi_reference)
165
- terms[right_rhea_id].append_relationship(has_output, chebi_reference)
163
+ terms[master_rhea_id].annotate_object(v.has_participant, chebi_reference)
164
+ terms[master_to_bi[master_rhea_id]].annotate_object(v.has_participant, chebi_reference)
165
+ terms[left_rhea_id].append_relationship(v.has_input, chebi_reference)
166
+ terms[right_rhea_id].append_relationship(v.has_output, chebi_reference)
166
167
 
167
168
  hierarchy = ensure_df(
168
169
  PREFIX,
@@ -181,8 +182,8 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
181
182
  ("reactome", "rhea2reactome", None),
182
183
  ("macie", "rhea2macie", None),
183
184
  ("metacyc", "rhea2metacyc", None),
184
- ("go", "rhea2go", reaction_enabled_by_molecular_function),
185
- ("uniprot", "rhea2uniprot", enabled_by),
185
+ ("go", "rhea2go", v.reaction_enabled_by_molecular_function),
186
+ ("uniprot", "rhea2uniprot", v.enabled_by),
186
187
  ]:
187
188
  xref_df = ensure_df(
188
189
  PREFIX,
@@ -202,7 +203,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
202
203
  )
203
204
  continue
204
205
  target_reference = Reference(prefix=xref_prefix, identifier=xref_id)
205
- if isinstance(relation, TypeDef):
206
+ if relation is not None:
206
207
  terms[directional_rhea_id].append_relationship(relation, target_reference)
207
208
  else:
208
209
  terms[directional_rhea_id].append_xref(target_reference)
@@ -223,11 +224,11 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
223
224
  _iubmb,
224
225
  ) in ec_df.values:
225
226
  terms[directional_rhea_id].append_relationship(
226
- enabled_by, Reference(prefix="eccode", identifier=ec)
227
+ v.enabled_by, Reference(prefix="ec", identifier=ec)
227
228
  )
228
229
 
229
230
  yield from terms.values()
230
231
 
231
232
 
232
233
  if __name__ == "__main__":
233
- RheaGetter().write_default(write_obo=True, force=True)
234
+ RheaGetter.cli(["--owl"])
pyobo/sources/ror.py CHANGED
@@ -3,16 +3,18 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import json
6
+ import logging
6
7
  import zipfile
7
8
  from collections.abc import Iterable
8
9
  from typing import Any
9
10
 
10
11
  import bioregistry
11
12
  import zenodo_client
13
+ from pydantic import ValidationError
12
14
  from tqdm.auto import tqdm
13
15
 
14
16
  from pyobo.struct import Obo, Reference, Term
15
- from pyobo.struct.struct import acronym
17
+ from pyobo.struct.struct import CHARLIE_TERM, HUMAN_TERM, PYOBO_INJECTED, acronym
16
18
  from pyobo.struct.typedef import (
17
19
  has_homepage,
18
20
  has_part,
@@ -23,11 +25,13 @@ from pyobo.struct.typedef import (
23
25
  see_also,
24
26
  )
25
27
 
28
+ logger = logging.getLogger(__name__)
26
29
  PREFIX = "ror"
27
30
  ROR_ZENODO_RECORD_ID = "10086202"
28
31
 
29
32
  # Constants
30
- ORG_CLASS = Reference(prefix="OBI", identifier="0000245")
33
+ ORG_CLASS = Reference(prefix="OBI", identifier="0000245", name="organization")
34
+ CITY_CLASS = Reference(prefix="ENVO", identifier="00000856", name="city")
31
35
 
32
36
  RMAP = {
33
37
  "Related": see_also,
@@ -52,16 +56,7 @@ class RORGetter(Obo):
52
56
  ontology = bioregistry_key = PREFIX
53
57
  typedefs = [has_homepage, *RMAP.values()]
54
58
  synonym_typedefs = [acronym]
55
- idspaces = {
56
- "ror": "https://ror.org/",
57
- "geonames": "https://www.geonames.org/",
58
- "ENVO": "http://purl.obolibrary.org/obo/ENVO_",
59
- "BFO": "http://purl.obolibrary.org/obo/BFO_",
60
- "RO": "http://purl.obolibrary.org/obo/RO_",
61
- "OBI": "http://purl.obolibrary.org/obo/OBI_",
62
- "OMO": "http://purl.obolibrary.org/obo/OMO_",
63
- "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
64
- }
59
+ root_terms = [CITY_CLASS, ORG_CLASS]
65
60
 
66
61
  def __post_init__(self):
67
62
  self.data_version, _url, _path = _get_info()
@@ -69,26 +64,40 @@ class RORGetter(Obo):
69
64
 
70
65
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
71
66
  """Iterate over terms in the ontology."""
72
- return iterate_ror_terms(force=force)
73
-
74
-
75
- ROR_ORGANIZATION_TYPE_TO_OBI = {
76
- "Education": ...,
77
- "Facility": ...,
78
- "Company": ...,
79
- "Government": ...,
80
- "Healthcare": ...,
81
- "Other": ...,
82
- "Archive": ...,
67
+ yield CHARLIE_TERM
68
+ yield HUMAN_TERM
69
+ yield Term(reference=ORG_CLASS)
70
+ yield Term(reference=CITY_CLASS)
71
+ yield from ROR_ORGANIZATION_TYPE_TO_OBI.values()
72
+ yield from iterate_ror_terms(force=force)
73
+
74
+
75
+ ROR_ORGANIZATION_TYPE_TO_OBI: dict[str, Term] = {
76
+ "Education": Term.default(PREFIX, "education", "educational organization"),
77
+ "Facility": Term.default(PREFIX, "facility", "facility"),
78
+ "Company": Term.default(PREFIX, "company", "company"),
79
+ "Government": Term.default(PREFIX, "government", "government organization"),
80
+ "Healthcare": Term.default(PREFIX, "healthcare", "healthcare organization"),
81
+ "Archive": Term.default(PREFIX, "archive", "archival organization"),
82
+ "Nonprofit": Term.default(PREFIX, "healthcare", "nonprofit organization")
83
+ .append_xref(Reference(prefix="ICO", identifier="0000048"))
84
+ .append_xref(Reference(prefix="GSSO", identifier="004615")),
83
85
  }
86
+ for _k, v in ROR_ORGANIZATION_TYPE_TO_OBI.items():
87
+ v.append_parent(ORG_CLASS)
88
+ v.append_contributor(CHARLIE_TERM)
89
+ v.append_comment(PYOBO_INJECTED)
90
+
84
91
  _MISSED_ORG_TYPES: set[str] = set()
85
92
 
86
93
 
87
94
  def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
88
95
  """Iterate over terms in ROR."""
89
- version, source_uri, records = get_latest(force=force)
90
- unhandled_xref_prefixes = set()
91
- for record in tqdm(records, unit_scale=True, unit="record", desc=PREFIX):
96
+ _version, _source_uri, records = get_latest(force=force)
97
+ unhandled_xref_prefixes: set[str] = set()
98
+
99
+ seen_geonames_references = set()
100
+ for record in tqdm(records, unit_scale=True, unit="record", desc=f"{PREFIX} v{_version}"):
92
101
  identifier = record["id"].removeprefix("https://ror.org/")
93
102
  name = record["name"]
94
103
  name = NAME_REMAPPING.get(name, name)
@@ -103,13 +112,14 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
103
112
  type="Instance",
104
113
  definition=description,
105
114
  )
106
- term.append_parent(ORG_CLASS)
107
- # TODO replace term.append_parent(ORG_CLASS) with:
108
- # for organization_type in organization_types:
109
- # term.append_parent(ORG_PARENTS[organization_type])
115
+ for organization_type in organization_types:
116
+ if organization_type == "Other":
117
+ term.append_parent(ORG_CLASS)
118
+ else:
119
+ term.append_parent(ROR_ORGANIZATION_TYPE_TO_OBI[organization_type])
110
120
 
111
121
  for link in record.get("links", []):
112
- term.append_property(has_homepage, link)
122
+ term.annotate_uri(has_homepage, link)
113
123
 
114
124
  if name.startswith("The "):
115
125
  term.append_synonym(name.removeprefix("The "))
@@ -120,23 +130,29 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
120
130
  RMAP[relationship["type"]], Reference(prefix=PREFIX, identifier=target_id)
121
131
  )
122
132
 
123
- term.is_obsolete = record.get("status") != "active"
133
+ if record.get("status") != "active":
134
+ term.is_obsolete = True
124
135
 
125
136
  for address in record.get("addresses", []):
126
137
  city = address.get("geonames_city")
127
138
  if not city:
128
139
  continue
129
- term.append_relationship(
130
- RMAP["Located in"], Reference(prefix="geonames", identifier=str(city["id"]))
140
+ geonames_reference = Reference(
141
+ prefix="geonames", identifier=str(city["id"]), name=city["city"]
131
142
  )
132
-
133
- for label in record.get("labels", []):
134
- label = label["label"] # there's a language availabel in this dict too
135
- term.append_synonym(label)
143
+ seen_geonames_references.add(geonames_reference)
144
+ term.append_relationship(RMAP["Located in"], geonames_reference)
145
+
146
+ for label_dict in record.get("labels", []):
147
+ label = label_dict["label"]
148
+ label = label.strip().replace("\n", " ")
149
+ language = label_dict["iso639"]
150
+ term.append_synonym(label, language=language)
136
151
  if label.startswith("The "):
137
- term.append_synonym(label.removeprefix("The "))
152
+ term.append_synonym(label.removeprefix("The "), language=language)
138
153
 
139
154
  for synonym in record.get("aliases", []):
155
+ synonym = synonym.strip().replace("\n", " ")
140
156
  term.append_synonym(synonym)
141
157
  if synonym.startswith("The "):
142
158
  term.append_synonym(synonym.removeprefix("The "))
@@ -162,10 +178,21 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
162
178
  if isinstance(identifiers, str):
163
179
  identifiers = [identifiers]
164
180
  for xref_id in identifiers:
165
- term.append_xref(Reference(prefix=norm_prefix, identifier=xref_id.replace(" ", "")))
181
+ xref_id = xref_id.replace(" ", "")
182
+ try:
183
+ xref = Reference(prefix=norm_prefix, identifier=xref_id)
184
+ except ValidationError:
185
+ tqdm.write(f"[{term.curie}] invalid xref: {norm_prefix}:{xref_id}")
186
+ else:
187
+ term.append_xref(xref)
166
188
 
167
189
  yield term
168
190
 
191
+ for geonames_ref in sorted(seen_geonames_references):
192
+ geonames_term = Term(reference=geonames_ref, type="Instance")
193
+ geonames_term.append_parent(CITY_CLASS)
194
+ yield geonames_term
195
+
169
196
 
170
197
  def _get_info(*, force: bool = False):
171
198
  client = zenodo_client.Zenodo()
@@ -193,7 +220,7 @@ def get_latest(*, force: bool = False):
193
220
 
194
221
  def get_ror_to_country_geonames(**kwargs: Any) -> dict[str, str]:
195
222
  """Get a mapping of ROR ids to GeoNames IDs for countries."""
196
- from pyobo.sources.geonames import get_city_to_country
223
+ from pyobo.sources.geonames.geonames import get_city_to_country
197
224
 
198
225
  city_to_country = get_city_to_country()
199
226
  rv = {}
@@ -207,4 +234,4 @@ def get_ror_to_country_geonames(**kwargs: Any) -> dict[str, str]:
207
234
 
208
235
 
209
236
  if __name__ == "__main__":
210
- RORGetter(force=True).write_default(write_obo=True, force=True)
237
+ RORGetter.cli()
File without changes
@@ -1,6 +1,8 @@
1
1
  """Selventa chemicals.
2
2
 
3
- .. seealso:: https://github.com/pyobo/pyobo/issues/27
3
+ .. seealso::
4
+
5
+ https://github.com/pyobo/pyobo/issues/27
4
6
  """
5
7
 
6
8
  from collections.abc import Iterable
@@ -29,11 +31,6 @@ class SCHEMGetter(Obo):
29
31
  return iter_terms(force=force)
30
32
 
31
33
 
32
- def get_obo(*, force: bool = False) -> Obo:
33
- """Get Selventa chemical as OBO."""
34
- return SCHEMGetter(force=force)
35
-
36
-
37
34
  def iter_terms(force: bool = False) -> Iterable[Term]:
38
35
  """Iterate over selventa chemical terms."""
39
36
  df = ensure_df(PREFIX, url=URL, skiprows=8, force=force)
@@ -45,4 +42,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
45
42
 
46
43
 
47
44
  if __name__ == "__main__":
48
- get_obo().write_default(write_obo=True, force=True)
45
+ SCHEMGetter.cli()
@@ -26,11 +26,6 @@ class SCOMPGetter(Obo):
26
26
  return iter_terms(force=force)
27
27
 
28
28
 
29
- def get_obo(*, force: bool = False) -> Obo:
30
- """Get Selventa Complexes as OBO."""
31
- return SCOMPGetter(force=force)
32
-
33
-
34
29
  def iter_terms(force: bool = False) -> Iterable[Term]:
35
30
  """Iterate over selventa complex terms."""
36
31
  df = ensure_df(PREFIX, url=URL, skiprows=9, force=force)
@@ -54,4 +49,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
54
49
 
55
50
 
56
51
  if __name__ == "__main__":
57
- get_obo().write_default(write_obo=True, force=True)
52
+ SCOMPGetter.cli()
@@ -1,6 +1,8 @@
1
1
  """Selventa diseases.
2
2
 
3
- .. seealso:: https://github.com/pyobo/pyobo/issues/26
3
+ .. seealso::
4
+
5
+ https://github.com/pyobo/pyobo/issues/26
4
6
  """
5
7
 
6
8
  from collections.abc import Iterable
@@ -29,11 +31,6 @@ class SDISGetter(Obo):
29
31
  return iter_terms(force=force)
30
32
 
31
33
 
32
- def get_obo(*, force: bool = False) -> Obo:
33
- """Get Selventa Diseases as OBO."""
34
- return SDISGetter(force=force)
35
-
36
-
37
34
  def iter_terms(force: bool = False) -> Iterable[Term]:
38
35
  """Iterate over selventa disease terms."""
39
36
  df = ensure_df(PREFIX, url=URL, skiprows=9, force=force)
@@ -48,4 +45,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
48
45
 
49
46
 
50
47
  if __name__ == "__main__":
51
- get_obo().write_default(write_obo=True, force=True)
48
+ SDISGetter.cli()
@@ -26,11 +26,6 @@ class SFAMGetter(Obo):
26
26
  return iter_terms(force=force)
27
27
 
28
28
 
29
- def get_obo(*, force: bool = False) -> Obo:
30
- """Get Selventa Families as OBO."""
31
- return SFAMGetter(force=force)
32
-
33
-
34
29
  def iter_terms(force: bool = False) -> Iterable[Term]:
35
30
  """Iterate over selventa family terms."""
36
31
  df = ensure_df(PREFIX, url=URL, skiprows=9, force=force)
@@ -52,4 +47,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
52
47
 
53
48
 
54
49
  if __name__ == "__main__":
55
- get_obo().write_default(write_obo=True, force=True)
50
+ SFAMGetter.cli()
pyobo/sources/sgd.py CHANGED
@@ -3,8 +3,10 @@
3
3
  from collections.abc import Iterable
4
4
  from urllib.parse import unquote_plus
5
5
 
6
+ from pystow.utils import read_tarfile_csv
7
+
6
8
  from ..struct import Obo, Reference, Synonym, Term, from_species
7
- from ..utils.path import ensure_tar_df
9
+ from ..utils.path import ensure_path
8
10
 
9
11
  __all__ = [
10
12
  "SGDGetter",
@@ -31,24 +33,17 @@ class SGDGetter(Obo):
31
33
  yield from get_terms(self, force=force)
32
34
 
33
35
 
34
- def get_obo(force: bool = False) -> Obo:
35
- """Get SGD as OBO."""
36
- return SGDGetter(force=force)
37
-
38
-
39
36
  def get_terms(ontology: Obo, force: bool = False) -> Iterable[Term]:
40
37
  """Get SGD terms."""
41
- df = ensure_tar_df(
42
- prefix=PREFIX,
43
- url=URL,
38
+ path = ensure_path(PREFIX, url=URL, version=ontology._version_or_raise, force=force)
39
+ df = read_tarfile_csv(
40
+ path,
44
41
  inner_path=INNER_PATH,
45
42
  sep="\t",
46
43
  skiprows=18,
47
44
  header=None,
48
45
  names=HEADER,
49
- force=force,
50
46
  dtype=str,
51
- version=ontology._version_or_raise,
52
47
  )
53
48
  df = df[df["feature"] == "gene"]
54
49
  for data in df["data"]:
@@ -0,0 +1,7 @@
1
+ """Sources from `SIGNOR <https://signor.uniroma2.it/>`_."""
2
+
3
+ from .signor_complexes import SignorGetter
4
+
5
+ __all__ = [
6
+ "SignorGetter",
7
+ ]