pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
  203. pyobo/aws.py +0 -162
  204. pyobo/cli/aws.py +0 -47
  205. pyobo/identifier_utils.py +0 -142
  206. pyobo/normalizer.py +0 -232
  207. pyobo/registries/__init__.py +0 -16
  208. pyobo/registries/metaregistry.json +0 -507
  209. pyobo/registries/metaregistry.py +0 -135
  210. pyobo/sources/icd11.py +0 -105
  211. pyobo/xrefdb/__init__.py +0 -1
  212. pyobo/xrefdb/canonicalizer.py +0 -214
  213. pyobo/xrefdb/priority.py +0 -59
  214. pyobo/xrefdb/sources/__init__.py +0 -60
  215. pyobo/xrefdb/sources/biomappings.py +0 -36
  216. pyobo/xrefdb/sources/cbms2019.py +0 -91
  217. pyobo/xrefdb/sources/chembl.py +0 -83
  218. pyobo/xrefdb/sources/compath.py +0 -82
  219. pyobo/xrefdb/sources/famplex.py +0 -64
  220. pyobo/xrefdb/sources/gilda.py +0 -50
  221. pyobo/xrefdb/sources/intact.py +0 -113
  222. pyobo/xrefdb/sources/ncit.py +0 -133
  223. pyobo/xrefdb/sources/pubchem.py +0 -27
  224. pyobo/xrefdb/sources/wikidata.py +0 -116
  225. pyobo-0.11.2.dist-info/RECORD +0 -157
  226. pyobo-0.11.2.dist-info/WHEEL +0 -5
  227. pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/sources/expasy.py CHANGED
@@ -4,18 +4,18 @@ import logging
4
4
  import re
5
5
  from collections import defaultdict
6
6
  from collections.abc import Iterable, Mapping
7
- from typing import Any, Optional
7
+ from typing import Any
8
8
 
9
9
  from .utils import get_go_mapping
10
- from ..struct import Obo, Reference, Synonym, Term
11
- from ..struct.typedef import enables, has_member, term_replaced_by
10
+ from ..struct import Annotation, Obo, OBOLiteral, Reference, Synonym, Term
11
+ from ..struct.typedef import enables, has_member, has_source, term_replaced_by
12
12
  from ..utils.path import ensure_path
13
13
 
14
14
  __all__ = [
15
15
  "ExpasyGetter",
16
16
  ]
17
17
 
18
- PREFIX = "eccode"
18
+ PREFIX = "ec"
19
19
  EXPASY_DATABASE_URL = "ftp://ftp.expasy.org/databases/enzyme/enzyme.dat"
20
20
  EXPASY_TREE_URL = "ftp://ftp.expasy.org/databases/enzyme/enzclass.txt"
21
21
 
@@ -43,33 +43,23 @@ class ExpasyGetter(Obo):
43
43
  """A getter for ExPASy Enzyme Classes."""
44
44
 
45
45
  bioversions_key = ontology = PREFIX
46
- typedefs = [has_member, enables, term_replaced_by]
46
+ typedefs = [has_member, enables, term_replaced_by, has_source]
47
47
  root_terms = [
48
- Reference(prefix="eccode", identifier="1"),
49
- Reference(prefix="eccode", identifier="2"),
50
- Reference(prefix="eccode", identifier="3"),
51
- Reference(prefix="eccode", identifier="4"),
52
- Reference(prefix="eccode", identifier="5"),
53
- Reference(prefix="eccode", identifier="6"),
54
- Reference(prefix="eccode", identifier="7"),
48
+ Reference(prefix=PREFIX, identifier="1"),
49
+ Reference(prefix=PREFIX, identifier="2"),
50
+ Reference(prefix=PREFIX, identifier="3"),
51
+ Reference(prefix=PREFIX, identifier="4"),
52
+ Reference(prefix=PREFIX, identifier="5"),
53
+ Reference(prefix=PREFIX, identifier="6"),
54
+ Reference(prefix=PREFIX, identifier="7"),
55
55
  ]
56
- idspaces = {
57
- "uniprot": "https://bioregistry.io/uniprot:",
58
- "eccode": "https://bioregistry.io/eccode:",
59
- "GO": "http://purl.obolibrary.org/obo/GO_",
60
- "RO": "http://purl.obolibrary.org/obo/RO_",
61
- }
56
+ property_values = [Annotation(has_source.reference, OBOLiteral.uri(EXPASY_DATABASE_URL))]
62
57
 
63
58
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
64
59
  """Iterate over terms in the ontology."""
65
60
  return get_terms(version=self._version_or_raise, force=force)
66
61
 
67
62
 
68
- def get_obo(force: bool = False) -> Obo:
69
- """Get ExPASy as OBO."""
70
- return ExpasyGetter(force=force)
71
-
72
-
73
63
  def get_terms(version: str, force: bool = False) -> Iterable[Term]:
74
64
  """Get the ExPASy terms."""
75
65
  tree_path = ensure_path(PREFIX, url=EXPASY_TREE_URL, version=version, force=force)
@@ -111,9 +101,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
111
101
  reference=Reference(prefix=PREFIX, identifier=ec_code), is_obsolete=True
112
102
  )
113
103
  for transfer_id in transfer_ids:
114
- term.append_relationship(
115
- term_replaced_by, Reference(prefix=PREFIX, identifier=transfer_id)
116
- )
104
+ term.append_replaced_by(Reference(prefix=PREFIX, identifier=transfer_id))
117
105
  continue
118
106
 
119
107
  parent_ec_code = data["parent"]["identifier"]
@@ -142,16 +130,17 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
142
130
  reference=Reference(prefix=PREFIX, identifier=ec_code, name=name),
143
131
  parents=[parent_term.reference],
144
132
  synonyms=synonyms,
133
+ definition=data.get("reaction"),
145
134
  )
146
135
  for domain in data.get("domains", []):
147
- term.append_relationship(
136
+ term.annotate_object(
148
137
  has_member,
149
138
  Reference.model_validate(
150
139
  {"prefix": domain["namespace"], "identifier": domain["identifier"]},
151
140
  ),
152
141
  )
153
142
  for protein in data.get("proteins", []):
154
- term.append_relationship(
143
+ term.annotate_object(
155
144
  has_member,
156
145
  Reference(
157
146
  prefix=protein["namespace"],
@@ -167,18 +156,16 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
167
156
  return terms.values()
168
157
 
169
158
 
170
- """TREE"""
171
-
172
-
173
159
  def normalize_expasy_id(expasy_id: str) -> str:
174
160
  """Return a standardized ExPASy identifier string.
175
161
 
176
162
  :param expasy_id: A possibly non-normalized ExPASy identifier
163
+ :return: A normalized string.
177
164
  """
178
165
  return expasy_id.replace(" ", "")
179
166
 
180
167
 
181
- def give_edge(unnormalized_ec_code: str) -> tuple[int, Optional[str], str]:
168
+ def give_edge(unnormalized_ec_code: str) -> tuple[int, str | None, str]:
182
169
  """Return a (parent, child) tuple for given id."""
183
170
  levels = [x for x in unnormalized_ec_code.replace(" ", "").replace("-", "").split(".") if x]
184
171
  level = len(levels)
@@ -220,10 +207,11 @@ def get_tree(lines: Iterable[str]):
220
207
  return rv
221
208
 
222
209
 
223
- def get_database(lines: Iterable[str]) -> Mapping:
210
+ def get_database(lines: Iterable[str]) -> Mapping[str, dict[str, Any]]:
224
211
  """Parse the ExPASy database file and returns a list of enzyme entry dictionaries.
225
212
 
226
213
  :param lines: An iterator over the ExPASy database file or file-like
214
+ :returns: A mapping from EC code to data
227
215
  """
228
216
  rv = {}
229
217
  for groups in _group_by_id(lines):
@@ -256,7 +244,13 @@ def get_database(lines: Iterable[str]) -> Mapping:
256
244
  value = value.strip().removesuffix("and").rstrip(",").strip()
257
245
  ec_data_entry["transfer_id"] = _parse_transfer(value)
258
246
  elif descriptor == DE:
259
- ec_data_entry["concept"]["name"] = value.rstrip(".") # type:ignore
247
+ if "name" not in ec_data_entry["concept"]:
248
+ ec_data_entry["concept"]["name"] = ""
249
+ ec_data_entry["concept"]["name"] += value.rstrip(".") # type:ignore
250
+ elif descriptor == CA:
251
+ if "reaction" not in ec_data_entry:
252
+ ec_data_entry["reaction"] = ""
253
+ ec_data_entry["reaction"] += value.rstrip(".") # type:ignore
260
254
  elif descriptor == AN:
261
255
  ec_data_entry["synonyms"].append(value.rstrip(".")) # type:ignore
262
256
  elif descriptor == PR:
@@ -290,6 +284,9 @@ TRANSFER_SPLIT_RE = re.compile(r",\s*|\s+and\s+")
290
284
  def _parse_transfer(value: str) -> list[str]:
291
285
  """Parse transferred entry string.
292
286
 
287
+ :param value: A string for a transferred entry
288
+ :returns: A list of EC codes that it got transferred to
289
+
293
290
  >>> _parse_transfer("Transferred entry: 1.1.1.198, 1.1.1.227 and 1.1.1.228.")
294
291
  ['1.1.1.198', '1.1.1.227', '1.1.1.228']
295
292
  """
pyobo/sources/famplex.py CHANGED
@@ -8,8 +8,8 @@ import bioregistry
8
8
  from pystow.utils import get_commit
9
9
 
10
10
  from pyobo import get_name_id_mapping
11
- from pyobo.struct import Obo, Reference, Term
12
- from pyobo.struct.typedef import has_member, has_part, is_a, part_of
11
+ from pyobo.struct import Obo, Reference, Term, _parse_str_or_curie_or_uri
12
+ from pyobo.struct.typedef import has_citation, has_member, has_part, is_a, part_of
13
13
  from pyobo.utils.io import multidict
14
14
  from pyobo.utils.path import ensure_df
15
15
 
@@ -23,7 +23,7 @@ class FamPlexGetter(Obo):
23
23
 
24
24
  ontology = PREFIX
25
25
  dynamic_version = True
26
- typedefs = [has_member, has_part, is_a, part_of]
26
+ typedefs = [has_member, has_part, is_a, part_of, has_citation]
27
27
 
28
28
  def _get_version(self) -> str:
29
29
  return get_commit("sorgerlab", "famplex")
@@ -33,11 +33,6 @@ class FamPlexGetter(Obo):
33
33
  return get_terms(force=force, version=self._version_or_raise)
34
34
 
35
35
 
36
- def get_obo(force: bool = False) -> Obo:
37
- """Get FamPlex as OBO."""
38
- return FamPlexGetter(force=force)
39
-
40
-
41
36
  def get_terms(version: str, force: bool = False) -> Iterable[Term]:
42
37
  """Get the FamPlex terms."""
43
38
  base_url = f"https://raw.githubusercontent.com/sorgerlab/famplex/{version}"
@@ -106,33 +101,33 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
106
101
  for (entity,) in entities_df.values:
107
102
  reference = Reference(prefix=PREFIX, identifier=entity, name=entity)
108
103
  definition, provenance = id_to_definition.get(entity, (None, None))
109
- provenance_reference = (
110
- Reference.from_curie(provenance) if isinstance(provenance, str) else None
111
- )
112
104
  term = Term(
113
105
  reference=reference,
114
106
  definition=definition,
115
- provenance=[] if provenance_reference is None else [provenance_reference],
116
107
  )
117
108
 
109
+ provenance_reference = (
110
+ _parse_str_or_curie_or_uri(provenance) if isinstance(provenance, str) else None
111
+ )
112
+ if provenance_reference:
113
+ term.append_provenance(provenance_reference)
114
+
118
115
  for xref_reference in id_xrefs.get(entity, []):
119
116
  term.append_xref(xref_reference)
120
117
 
121
118
  for r, t in out_edges.get(reference, []):
122
- if r == "isa" and t.prefix == "fplx":
119
+ if r == "isa":
123
120
  term.append_parent(t)
124
- elif r == "isa":
125
- term.append_relationship(is_a, t)
126
121
  elif r == "partof":
127
- term.append_relationship(part_of, t)
122
+ term.annotate_object(part_of, t)
128
123
  else:
129
124
  logging.warning("unhandled relation %s", r)
130
125
 
131
126
  for r, h in in_edges.get(reference, []):
132
127
  if r == "isa":
133
- term.append_relationship(has_member, h)
128
+ term.annotate_object(has_member, h)
134
129
  elif r == "partof":
135
- term.append_relationship(has_part, h)
130
+ term.annotate_object(has_part, h)
136
131
  else:
137
132
  logging.warning("unhandled relation %s", r)
138
133
  yield term
pyobo/sources/flybase.py CHANGED
@@ -8,7 +8,7 @@ from tqdm.auto import tqdm
8
8
 
9
9
  from pyobo import Reference
10
10
  from pyobo.resources.so import get_so_name
11
- from pyobo.struct import Obo, Term, from_species, orthologous
11
+ from pyobo.struct import Obo, Term, _parse_str_or_curie_or_uri, from_species, orthologous
12
12
  from pyobo.utils.io import multisetdict
13
13
  from pyobo.utils.path import ensure_df
14
14
 
@@ -91,11 +91,6 @@ def _get_synonyms(version, force):
91
91
  return df # TODO use this
92
92
 
93
93
 
94
- def get_obo(force: bool = False) -> Obo:
95
- """Get OBO."""
96
- return FlyBaseGetter(force=force)
97
-
98
-
99
94
  GTYPE_TO_SO = {
100
95
  "SRP_RNA_gene": "0001269",
101
96
  "protein_coding_gene": "0001217",
@@ -154,11 +149,11 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
154
149
  for hgnc_curie in human_orthologs.get(identifier, []):
155
150
  if not hgnc_curie or pd.isna(hgnc_curie):
156
151
  continue
157
- hgnc_ortholog = Reference.from_curie(hgnc_curie)
152
+ hgnc_ortholog = _parse_str_or_curie_or_uri(hgnc_curie)
158
153
  if hgnc_ortholog is None:
159
154
  tqdm.write(f"[{PREFIX}] {identifier} had invalid ortholog: {hgnc_curie}")
160
155
  else:
161
- term.append_relationship(orthologous, hgnc_ortholog)
156
+ term.annotate_object(orthologous, hgnc_ortholog)
162
157
  taxonomy_id = abbr_to_taxonomy.get(organism)
163
158
  if taxonomy_id is not None:
164
159
  term.set_species(taxonomy_id)
pyobo/sources/gard.py ADDED
@@ -0,0 +1,62 @@
1
+ """Converter for GARD."""
2
+
3
+ from collections.abc import Iterable
4
+
5
+ import requests
6
+
7
+ from pyobo.struct import Obo, Term, default_reference
8
+
9
+ __all__ = [
10
+ "GARDGetter",
11
+ ]
12
+
13
+ PREFIX = "gard"
14
+ PP = "gard.category"
15
+ URL = "https://rarediseases.info.nih.gov/assets/diseases.trimmed.json"
16
+
17
+
18
+ class GARDGetter(Obo):
19
+ """An ontology representation of GARD."""
20
+
21
+ bioversions_key = ontology = PREFIX
22
+ dynamic_version = True
23
+
24
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
25
+ """Iterate over gene terms for GARD."""
26
+ yield from get_terms()
27
+
28
+
29
+ def get_terms() -> Iterable[Term]:
30
+ """Get GARD terms."""
31
+ rows = requests.get(URL, timeout=5).json()
32
+ categories = {
33
+ category: default_reference(
34
+ prefix=PREFIX, identifier=category.lower().replace(" ", "_"), name=category
35
+ )
36
+ for row in rows
37
+ for category in row.get("diseaseCategories", [])
38
+ }
39
+ categories["uncategorized"] = default_reference(
40
+ prefix=PREFIX, identifier="uncategorized", name="Uncategorized Disease"
41
+ )
42
+ for category_reference in categories.values():
43
+ yield Term(reference=category_reference)
44
+
45
+ for row in rows:
46
+ term = Term.from_triple(PREFIX, identifier=str(row.pop("id")), name=row.pop("name"))
47
+ _name = row.pop("encodedName", None)
48
+ for synonym in row.pop("synonyms", []):
49
+ synonym = synonym.strip()
50
+ if synonym:
51
+ term.append_synonym(synonym)
52
+ for category in row.pop("diseaseCategories", ["uncategorized"]):
53
+ term.append_parent(categories[category])
54
+
55
+ _spanish_id = row.pop("spanishId", None)
56
+ _spanish_name = row.pop("spanishName", None)
57
+
58
+ yield term
59
+
60
+
61
+ if __name__ == "__main__":
62
+ GARDGetter().cli()
@@ -0,0 +1,9 @@
1
+ """Sources from GeoNames."""
2
+
3
+ from .features import GeonamesFeatureGetter
4
+ from .geonames import GeonamesGetter
5
+
6
+ __all__ = [
7
+ "GeonamesFeatureGetter",
8
+ "GeonamesGetter",
9
+ ]
@@ -0,0 +1,28 @@
1
+ """Get terms from GeoNames Features."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from collections.abc import Iterable
7
+
8
+ from pyobo import Obo, Term
9
+ from pyobo.sources.geonames.utils import PREFIX_FEATURE, get_feature_terms
10
+
11
+ __all__ = ["GeonamesFeatureGetter"]
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class GeonamesFeatureGetter(Obo):
17
+ """An ontology representation of GeoNames features."""
18
+
19
+ ontology = PREFIX_FEATURE
20
+ dynamic_version = True
21
+
22
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
23
+ """Iterate over terms in the ontology."""
24
+ yield from get_feature_terms(force=force)
25
+
26
+
27
+ if __name__ == "__main__":
28
+ GeonamesFeatureGetter.cli()
@@ -3,53 +3,81 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import logging
6
- from collections.abc import Collection, Iterable, Mapping
6
+ from collections.abc import Iterable, Mapping
7
7
 
8
8
  import pandas as pd
9
9
  from pystow.utils import read_zipfile_csv
10
10
  from tqdm import tqdm
11
11
 
12
12
  from pyobo import Obo, Term
13
- from pyobo.struct import Reference, part_of
13
+ from pyobo.sources.geonames.utils import (
14
+ ADMIN1_URL,
15
+ ADMIN2_URL,
16
+ ADMIN_1,
17
+ ADMIN_2,
18
+ CITIES_URL,
19
+ CITY,
20
+ CODE_TYPEDEF,
21
+ COUNTRIES_URL,
22
+ FEATURE_TERM,
23
+ NATION,
24
+ P_CATEGORY,
25
+ PREFIX,
26
+ PREFIX_FEATURE,
27
+ SYNONYMS_DF_COLUMNS,
28
+ SYNONYMS_URL,
29
+ get_feature_terms,
30
+ )
31
+ from pyobo.struct import Reference, has_part, part_of
14
32
  from pyobo.utils.path import ensure_df, ensure_path
15
33
 
16
34
  __all__ = ["GeonamesGetter"]
17
35
 
18
36
  logger = logging.getLogger(__name__)
19
37
 
20
- PREFIX = "geonames"
21
- COUNTRIES_URL = "https://download.geonames.org/export/dump/countryInfo.txt"
22
- ADMIN1_URL = "https://download.geonames.org/export/dump/admin1CodesASCII.txt"
23
- ADMIN2_URL = "https://download.geonames.org/export/dump/admin2Codes.txt"
24
- CITIES_URL = "https://download.geonames.org/export/dump/cities15000.zip"
25
-
26
38
 
27
39
  class GeonamesGetter(Obo):
28
40
  """An ontology representation of GeoNames."""
29
41
 
30
42
  ontology = PREFIX
31
43
  dynamic_version = True
32
- typedefs = [part_of]
44
+ typedefs = [part_of, CODE_TYPEDEF, has_part]
33
45
 
34
46
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
35
47
  """Iterate over terms in the ontology."""
36
48
  return get_terms(force=force)
37
49
 
38
50
 
39
- def get_terms(*, force: bool = False) -> Collection[Term]:
51
+ def get_terms(*, force: bool = False) -> Iterable[Term]:
40
52
  """Get terms."""
53
+ yield Term(reference=NATION)
54
+ yield Term(reference=ADMIN_1).append_relationship(part_of, NATION)
55
+ yield Term(reference=ADMIN_2).append_relationship(part_of, ADMIN_1)
56
+ yield Term(reference=CITY)
57
+
58
+ # since the output here is only cities, we can slice this down
59
+ for term in get_feature_terms(force=force):
60
+ if term.identifier.startswith("P.") or term.pair == P_CATEGORY.pair or term == FEATURE_TERM:
61
+ yield term
62
+
41
63
  code_to_country = get_code_to_country(force=force)
64
+ yield from code_to_country.values()
65
+
42
66
  code_to_admin1 = get_code_to_admin1(code_to_country, force=force)
67
+ yield from code_to_admin1.values()
68
+
43
69
  code_to_admin2 = get_code_to_admin2(
44
70
  code_to_country=code_to_country, code_to_admin1=code_to_admin1, force=force
45
71
  )
72
+ yield from code_to_admin2.values()
73
+
46
74
  id_to_term = get_cities(
47
75
  code_to_country=code_to_country,
48
76
  code_to_admin1=code_to_admin1,
49
77
  code_to_admin2=code_to_admin2,
50
78
  force=force,
51
79
  )
52
- return id_to_term.values()
80
+ yield from list(id_to_term.values())
53
81
 
54
82
 
55
83
  def get_code_to_country(*, force: bool = False) -> Mapping[str, Term]:
@@ -70,9 +98,13 @@ def get_code_to_country(*, force: bool = False) -> Mapping[str, Term]:
70
98
  for identifier, name, code, fips, iso3 in countries_df[cols].values:
71
99
  if pd.isna(code):
72
100
  continue
73
- term = Term.from_triple(
74
- "geonames", identifier, name if pd.notna(name) else None, type="Instance"
101
+ term = Term(
102
+ reference=Reference(
103
+ prefix=PREFIX, identifier=identifier, name=name if pd.notna(name) else None
104
+ ),
105
+ type="Instance",
75
106
  )
107
+ term.append_parent(NATION)
76
108
  term.append_synonym(code)
77
109
  if name.startswith("The "):
78
110
  term.append_synonym(name.removeprefix("The "))
@@ -80,7 +112,7 @@ def get_code_to_country(*, force: bool = False) -> Mapping[str, Term]:
80
112
  term.append_synonym(fips)
81
113
  if pd.notna(iso3):
82
114
  term.append_synonym(iso3)
83
- term.append_property("code", code)
115
+ term.annotate_string(CODE_TYPEDEF, code)
84
116
  code_to_country[code] = term
85
117
  logger.info(f"got {len(code_to_country):,} country records")
86
118
  return code_to_country
@@ -104,10 +136,14 @@ def get_code_to_admin1(
104
136
  tqdm.write(f"Missing info for {name} / {asciiname} / {code=} / {identifier=}")
105
137
  continue
106
138
 
107
- term = Term.from_triple(
108
- "geonames", identifier, name if pd.notna(name) else None, type="Instance"
139
+ term = Term(
140
+ reference=Reference(
141
+ prefix=PREFIX, identifier=identifier, name=name if pd.notna(name) else None
142
+ ),
143
+ type="Instance",
109
144
  )
110
- term.append_property("code", code)
145
+ term.append_parent(ADMIN_1)
146
+ term.annotate_string(CODE_TYPEDEF, code)
111
147
  code_to_admin1[code] = term
112
148
 
113
149
  country_code = code.split(".")[0]
@@ -132,10 +168,14 @@ def get_code_to_admin2(
132
168
  for identifier, name, code in admin2_df[["geonames_id", "name", "code"]].values:
133
169
  if pd.isna(identifier) or pd.isna(code):
134
170
  continue
135
- term = Term.from_triple(
136
- "geonames", identifier, name if pd.notna(name) else None, type="Instance"
171
+ term = Term(
172
+ reference=Reference(
173
+ prefix=PREFIX, identifier=identifier, name=name if pd.notna(name) else None
174
+ ),
175
+ type="Instance",
137
176
  )
138
- term.append_property("code", code)
177
+ term.append_parent(ADMIN_2)
178
+ term.annotate_string(CODE_TYPEDEF, code)
139
179
  code_to_admin2[code] = term
140
180
  admin1_code = code.rsplit(".", 1)[0]
141
181
  admin1_term = code_to_admin1.get(admin1_code)
@@ -181,6 +221,19 @@ def _get_cities_df(force: bool = False) -> pd.DataFrame:
181
221
  return cities_df
182
222
 
183
223
 
224
+ def _get_synonyms_df(force: bool = False) -> pd.DataFrame:
225
+ """Get the synonyms dataframe."""
226
+ path = ensure_path(PREFIX, url=SYNONYMS_URL, force=force)
227
+ synonyms_df = read_zipfile_csv(
228
+ path=path,
229
+ inner_path="alternateNamesV2.txt",
230
+ header=None,
231
+ names=SYNONYMS_DF_COLUMNS,
232
+ dtype=str,
233
+ )
234
+ return synonyms_df
235
+
236
+
184
237
  def get_cities(
185
238
  code_to_country,
186
239
  code_to_admin1,
@@ -188,7 +241,8 @@ def get_cities(
188
241
  *,
189
242
  minimum_population: int = 100_000,
190
243
  force: bool = False,
191
- ) -> Mapping[str, Term]:
244
+ include_synonyms: bool = False,
245
+ ) -> dict[str, Term]:
192
246
  """Get a mapping from city code to term."""
193
247
  cities_df = _get_cities_df(force=force)
194
248
  cities_df = cities_df[cities_df.population.astype(int) > minimum_population]
@@ -200,11 +254,18 @@ def get_cities(
200
254
 
201
255
  cols = ["geonames_id", "name", "synonyms", "country_code", "admin1", "admin2", "feature_code"]
202
256
  for identifier, name, synonyms, country, admin1, admin2, feature_code in cities_df[cols].values:
203
- terms[identifier] = term = Term.from_triple(
204
- "geonames", identifier, name if pd.notna(name) else None, type="Instance"
257
+ terms[identifier] = term = Term(
258
+ reference=Reference(
259
+ prefix=PREFIX, identifier=identifier, name=name if pd.notna(name) else None
260
+ ),
261
+ type="Instance",
205
262
  )
206
- term.append_parent(Reference(prefix="geonames.feature", identifier=feature_code))
207
- if synonyms and not isinstance(synonyms, float):
263
+ # All cities are under the P branch, but the prefix is omitted for brevity in the TSV
264
+ term.append_parent(Reference(prefix=PREFIX_FEATURE, identifier=f"P.{feature_code}"))
265
+ term.append_parent(CITY)
266
+
267
+ if include_synonyms and synonyms and not isinstance(synonyms, float):
268
+ # TODO include language codes
208
269
  for synonym in synonyms:
209
270
  if pd.notna(synonym):
210
271
  term.append_synonym(synonym)
@@ -254,4 +315,4 @@ def get_city_to_country() -> dict[str, str]:
254
315
 
255
316
 
256
317
  if __name__ == "__main__":
257
- GeonamesGetter().write_default(write_obo=True, force=True)
318
+ GeonamesGetter.cli()