pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
  203. pyobo/apps/__init__.py +0 -3
  204. pyobo/apps/cli.py +0 -24
  205. pyobo/apps/gilda/__init__.py +0 -3
  206. pyobo/apps/gilda/__main__.py +0 -8
  207. pyobo/apps/gilda/app.py +0 -48
  208. pyobo/apps/gilda/cli.py +0 -36
  209. pyobo/apps/gilda/templates/base.html +0 -33
  210. pyobo/apps/gilda/templates/home.html +0 -11
  211. pyobo/apps/gilda/templates/matches.html +0 -32
  212. pyobo/apps/mapper/__init__.py +0 -3
  213. pyobo/apps/mapper/__main__.py +0 -11
  214. pyobo/apps/mapper/cli.py +0 -37
  215. pyobo/apps/mapper/mapper.py +0 -187
  216. pyobo/apps/mapper/templates/base.html +0 -35
  217. pyobo/apps/mapper/templates/mapper_home.html +0 -64
  218. pyobo/aws.py +0 -162
  219. pyobo/cli/aws.py +0 -47
  220. pyobo/identifier_utils.py +0 -142
  221. pyobo/normalizer.py +0 -232
  222. pyobo/registries/__init__.py +0 -16
  223. pyobo/registries/metaregistry.json +0 -507
  224. pyobo/registries/metaregistry.py +0 -135
  225. pyobo/sources/icd11.py +0 -105
  226. pyobo/xrefdb/__init__.py +0 -1
  227. pyobo/xrefdb/canonicalizer.py +0 -214
  228. pyobo/xrefdb/priority.py +0 -59
  229. pyobo/xrefdb/sources/__init__.py +0 -60
  230. pyobo/xrefdb/sources/biomappings.py +0 -36
  231. pyobo/xrefdb/sources/cbms2019.py +0 -91
  232. pyobo/xrefdb/sources/chembl.py +0 -83
  233. pyobo/xrefdb/sources/compath.py +0 -82
  234. pyobo/xrefdb/sources/famplex.py +0 -64
  235. pyobo/xrefdb/sources/gilda.py +0 -50
  236. pyobo/xrefdb/sources/intact.py +0 -113
  237. pyobo/xrefdb/sources/ncit.py +0 -133
  238. pyobo/xrefdb/sources/pubchem.py +0 -27
  239. pyobo/xrefdb/sources/wikidata.py +0 -116
  240. pyobo-0.11.1.dist-info/RECORD +0 -173
  241. pyobo-0.11.1.dist-info/WHEEL +0 -5
  242. pyobo-0.11.1.dist-info/top_level.txt +0 -1
@@ -7,7 +7,16 @@ import pandas as pd
7
7
  from tqdm.auto import tqdm
8
8
 
9
9
  from pyobo.resources.ncbitaxon import get_ncbitaxon_name
10
- from pyobo.struct import Obo, Reference, Synonym, Term, from_species, has_part
10
+ from pyobo.struct import (
11
+ Obo,
12
+ Reference,
13
+ Synonym,
14
+ Term,
15
+ _parse_str_or_curie_or_uri,
16
+ from_species,
17
+ has_citation,
18
+ has_part,
19
+ )
11
20
  from pyobo.utils.path import ensure_df
12
21
 
13
22
  __all__ = [
@@ -96,13 +105,14 @@ def _parse_xrefs(s) -> list[tuple[Reference, str]]:
96
105
  xref = xref.replace("protein ontology:PR_", "PR:")
97
106
  xref = xref.replace("rhea:rhea ", "rhea:")
98
107
  xref = xref.replace("rhea:Rhea ", "rhea:")
108
+ xref = xref.replace("rhea:RHEA ", "rhea:")
99
109
  xref = xref.replace("rhea:RHEA:rhea", "rhea:")
100
110
  xref = xref.replace("rhea:RHEA: ", "rhea:")
101
111
  xref = xref.replace("rhea:RHEA:rhea ", "rhea:")
102
112
  xref = xref.replace("intenz:RHEA:", "rhea:")
103
- xref = xref.replace("eccode::", "eccode:")
104
- xref = xref.replace("eccode:EC:", "eccode:")
105
- xref = xref.replace("intenz:EC:", "eccode:")
113
+ xref = xref.replace("eccode::", "ec:")
114
+ xref = xref.replace("eccode:EC:", "ec:")
115
+ xref = xref.replace("intenz:EC:", "ec:")
106
116
  xref = xref.replace("eccode:RHEA:", "rhea:")
107
117
  xref = xref.replace("efo:MONDO:", "MONDO:")
108
118
  xref = xref.replace("omim:MIM:", "omim:")
@@ -125,7 +135,7 @@ def _parse_xrefs(s) -> list[tuple[Reference, str]]:
125
135
  xref_curie = _clean_intenz(xref_curie)
126
136
 
127
137
  try:
128
- reference = Reference.from_curie(xref_curie)
138
+ reference = _parse_str_or_curie_or_uri(xref_curie)
129
139
  except ValueError:
130
140
  logger.warning("can not parse CURIE: %s", xref_curie)
131
141
  continue
@@ -146,18 +156,13 @@ class ComplexPortalGetter(Obo):
146
156
  """An ontology representation of the Complex Portal."""
147
157
 
148
158
  bioversions_key = ontology = PREFIX
149
- typedefs = [from_species, has_part]
159
+ typedefs = [from_species, has_part, has_citation]
150
160
 
151
161
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
152
162
  """Iterate over terms in the ontology."""
153
163
  return get_terms(version=self._version_or_raise)
154
164
 
155
165
 
156
- def get_obo(force: bool = False) -> Obo:
157
- """Get the ComplexPortal OBO."""
158
- return ComplexPortalGetter(force=force)
159
-
160
-
161
166
  def get_df(version: str, force: bool = False) -> pd.DataFrame:
162
167
  """Get a combine ComplexPortal dataframe."""
163
168
  url_base = f"ftp://ftp.ebi.ac.uk/pub/databases/intact/complex/{version}/complextab"
@@ -222,29 +227,24 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
222
227
  taxonomy_name,
223
228
  members,
224
229
  ) in it:
225
- synonyms = [Synonym(name=alias) for alias in aliases]
226
- _xrefs = []
227
- provenance = []
230
+ term = Term(
231
+ reference=Reference(prefix=PREFIX, identifier=complexportal_id, name=name),
232
+ definition=definition.strip() if pd.notna(definition) else None,
233
+ synonyms=[Synonym(name=alias) for alias in aliases],
234
+ )
228
235
  for reference, note in xrefs:
229
236
  if note == "identity":
230
- _xrefs.append(reference)
237
+ term.append_xref(reference)
231
238
  elif note == "see-also" and reference.prefix == "pubmed":
232
- provenance.append(reference)
239
+ term.append_provenance(reference)
233
240
  elif (note, reference.prefix) not in unhandled_xref_type:
234
241
  logger.debug(f"unhandled xref type: {note} / {reference.prefix}")
235
242
  unhandled_xref_type.add((note, reference.prefix))
236
243
 
237
- term = Term(
238
- reference=Reference(prefix=PREFIX, identifier=complexportal_id, name=name),
239
- definition=definition.strip() if pd.notna(definition) else None,
240
- synonyms=synonyms,
241
- xrefs=_xrefs,
242
- provenance=provenance,
243
- )
244
244
  term.set_species(identifier=taxonomy_id, name=taxonomy_name)
245
245
 
246
246
  for reference, _count in members:
247
- term.append_relationship(has_part, reference)
247
+ term.annotate_object(has_part, reference)
248
248
 
249
249
  yield term
250
250
 
pyobo/sources/conso.py CHANGED
@@ -4,7 +4,7 @@ from collections.abc import Iterable
4
4
 
5
5
  import pandas as pd
6
6
 
7
- from ..struct import Obo, Reference, Synonym, Term
7
+ from ..struct import Obo, Reference, Synonym, Term, _parse_str_or_curie_or_uri, has_citation
8
8
  from ..utils.io import multidict
9
9
  from ..utils.path import ensure_df
10
10
 
@@ -25,36 +25,28 @@ class CONSOGetter(Obo):
25
25
 
26
26
  ontology = PREFIX
27
27
  dynamic_version = True
28
+ typedefs = [has_citation]
28
29
 
29
30
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
30
31
  """Iterate over terms in the ontology."""
31
32
  return iter_terms()
32
33
 
33
34
 
34
- def get_obo() -> Obo:
35
- """Get CONSO as OBO."""
36
- return CONSOGetter()
37
-
38
-
39
35
  def iter_terms() -> Iterable[Term]:
40
36
  """Get CONSO terms."""
41
37
  terms_df = ensure_df(PREFIX, url=TERMS_URL)
42
38
 
43
39
  synonyms_df = ensure_df(PREFIX, url=SYNONYMS_URL)
44
40
  synonyms_df["reference"] = synonyms_df["reference"].map(
45
- lambda s: [Reference.from_curie(s)] if pd.notna(s) and s != "?" else [],
41
+ lambda s: [_parse_str_or_curie_or_uri(s)] if pd.notna(s) and s != "?" else [],
46
42
  )
47
- synonyms_df["specificity"] = synonyms_df["specificity"].map(
48
- lambda s: "EXACT" if pd.isna(s) or s == "?" else s
49
- )
50
-
51
43
  synonyms = multidict(
52
44
  (
53
45
  identifier,
54
46
  Synonym(
55
47
  name=synonym,
56
48
  provenance=provenance,
57
- specificity=specificity,
49
+ specificity=None if pd.isna(specificity) or specificity == "?" else specificity,
58
50
  ),
59
51
  )
60
52
  for identifier, synonym, provenance, specificity in synonyms_df.values
@@ -66,21 +58,21 @@ def iter_terms() -> Iterable[Term]:
66
58
  for _, row in terms_df.iterrows():
67
59
  if row["Name"] == "WITHDRAWN":
68
60
  continue
69
- provenance: list[Reference] = []
70
- for curie in row["References"].split(","):
71
- curie = curie.strip()
72
- if not curie:
73
- continue
74
- reference = Reference.from_curie(curie)
75
- if reference is not None:
76
- provenance.append(reference)
61
+
77
62
  identifier = row["Identifier"]
78
- yield Term(
63
+ term = Term(
79
64
  reference=Reference(prefix=PREFIX, identifier=identifier, name=row["Name"]),
80
65
  definition=row["Description"],
81
- provenance=provenance,
82
66
  synonyms=synonyms.get(identifier, []),
83
67
  )
68
+ for curie in row["References"].split(","):
69
+ curie = curie.strip()
70
+ if not curie:
71
+ continue
72
+ reference = _parse_str_or_curie_or_uri(curie)
73
+ if reference is not None:
74
+ term.append_provenance(reference)
75
+ yield term
84
76
 
85
77
 
86
78
  if __name__ == "__main__":
pyobo/sources/cpt.py CHANGED
File without changes
pyobo/sources/credit.py CHANGED
@@ -23,20 +23,12 @@ class CreditGetter(Obo):
23
23
 
24
24
  ontology = PREFIX
25
25
  static_version = "2022"
26
- idspaces = {
27
- PREFIX: "https://credit.niso.org/contributor-roles/",
28
- }
29
26
 
30
27
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
31
28
  """Iterate over terms in the ontology."""
32
29
  return get_terms(force=force)
33
30
 
34
31
 
35
- def get_obo(force: bool = False) -> Obo:
36
- """Get RGD as OBO."""
37
- return CreditGetter(force=force)
38
-
39
-
40
32
  def get_terms(force: bool = False) -> list[Term]:
41
33
  """Get terms from the Contributor Roles Taxonomy via GitHub."""
42
34
  path = ensure_path(PREFIX, url=url, name="picklist-api.json", force=force)
@@ -65,4 +57,4 @@ def get_terms(force: bool = False) -> list[Term]:
65
57
 
66
58
 
67
59
  if __name__ == "__main__":
68
- get_obo(force=True).write_default(write_obo=True)
60
+ CreditGetter.cli()
pyobo/sources/cvx.py CHANGED
@@ -1,11 +1,13 @@
1
1
  """Converter for CVX."""
2
2
 
3
+ import re
3
4
  from collections import defaultdict
4
5
  from collections.abc import Iterable
5
6
 
6
7
  import pandas as pd
7
8
 
8
- from pyobo import Obo, Reference, Term
9
+ from pyobo import Obo, Reference, Term, TypeDef, default_reference
10
+ from pyobo.struct.struct import acronym
9
11
 
10
12
  __all__ = [
11
13
  "CVXGetter",
@@ -13,6 +15,12 @@ __all__ = [
13
15
 
14
16
  cvx_url = "https://www2a.cdc.gov/vaccines/iis/iisstandards/downloads/cvx.txt"
15
17
  PREFIX = "cvx"
18
+ STATUS = TypeDef(
19
+ reference=default_reference(PREFIX, "status", name="has status"), is_metadata_tag=True
20
+ )
21
+ NONVACCINE = TypeDef(reference=default_reference(PREFIX, "nonvaccine"), is_metadata_tag=True)
22
+
23
+ ACRONYM_RE = re.compile("^[A-Z]+$")
16
24
 
17
25
 
18
26
  class CVXGetter(Obo):
@@ -20,6 +28,8 @@ class CVXGetter(Obo):
20
28
 
21
29
  ontology = PREFIX
22
30
  dynamic_version = True
31
+ synonym_typedefs = [acronym]
32
+ typedefs = [STATUS, NONVACCINE]
23
33
 
24
34
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
25
35
  """Iterate over terms in the ontology."""
@@ -71,8 +81,20 @@ def iter_terms() -> Iterable[Term]:
71
81
  reference=Reference(prefix=PREFIX, identifier=cvx, name=full_name),
72
82
  is_obsolete=is_obsolete,
73
83
  )
74
- if short_name != full_name:
75
- term.append_synonym(short_name)
84
+ if (
85
+ short_name.casefold()
86
+ == full_name.casefold()
87
+ .replace("virus vaccine", "")
88
+ .replace("vaccine", "")
89
+ .replace(" ", " ")
90
+ .strip()
91
+ ):
92
+ pass
93
+ elif short_name != full_name:
94
+ if ACRONYM_RE.match(short_name):
95
+ term.append_exact_synonym(short_name, type=acronym.reference)
96
+ else:
97
+ term.append_synonym(short_name)
76
98
  if pd.notna(notes):
77
99
  term.append_comment(notes)
78
100
  if is_obsolete:
@@ -80,9 +102,9 @@ def iter_terms() -> Iterable[Term]:
80
102
  if replacement_identifier:
81
103
  term.append_replaced_by(Reference(prefix=PREFIX, identifier=replacement_identifier))
82
104
  if pd.notna(status):
83
- term.append_property("status", status)
105
+ term.annotate_string(STATUS, status)
84
106
  if pd.notna(nonvaccine):
85
- term.append_property("nonvaccine", nonvaccine)
107
+ term.annotate_boolean(NONVACCINE, nonvaccine)
86
108
  terms[cvx] = term
87
109
 
88
110
  for child, parents in dd.items():
pyobo/sources/depmap.py CHANGED
@@ -1,7 +1,6 @@
1
1
  """DepMap cell lines."""
2
2
 
3
3
  from collections.abc import Iterable
4
- from typing import Optional
5
4
 
6
5
  import pandas as pd
7
6
  import pystow
@@ -10,7 +9,6 @@ from pyobo import Obo, Reference, Term
10
9
  from pyobo.struct.typedef import exact_match
11
10
 
12
11
  __all__ = [
13
- "get_obo",
14
12
  "DepMapGetter",
15
13
  ]
16
14
 
@@ -30,28 +28,24 @@ class DepMapGetter(Obo):
30
28
  return iter_terms(version=self._version_or_raise, force=force)
31
29
 
32
30
 
33
- def get_obo(*, force: bool = False) -> Obo:
34
- """Get DepMap cell lines as OBO."""
35
- return DepMapGetter(force=force)
36
-
37
-
38
- def get_url(version: Optional[str] = None) -> str:
31
+ def get_url(version: str | None = None) -> str:
39
32
  """Get the URL for the given version of the DepMap cell line metadata file.
40
33
 
41
34
  :param version: The version of the data
35
+
42
36
  :returns: The URL as a string for downloading the dat
43
37
 
44
38
  .. warning::
45
39
 
46
- This does not currently take the version into account. Need to write a crawler since data is not easy
47
- to access.
40
+ This does not currently take the version into account. Need to write a crawler
41
+ since data is not easy to access.
48
42
  """
49
43
  #: This is the DepMap Public 21Q2 version. There isn't a way to do this automatically without writing a crawler
50
44
  url = "https://ndownloader.figshare.com/files/27902376"
51
45
  return url
52
46
 
53
47
 
54
- def _fix_mangled_int(x: str) -> Optional[str]:
48
+ def _fix_mangled_int(x: str) -> str | None:
55
49
  return str(int(float(x))) if pd.notna(x) else None
56
50
 
57
51
 
@@ -74,7 +68,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
74
68
  columns
75
69
  ].values:
76
70
  if pd.isna(name):
77
- name = None
71
+ if pd.notna(sname):
72
+ name, sname = sname, None
73
+ else:
74
+ name = None
78
75
  term = Term.from_triple(PREFIX, identifier, name)
79
76
  if pd.notna(sname):
80
77
  term.append_synonym(sname)
@@ -9,7 +9,7 @@ from collections.abc import Iterable
9
9
  import pandas as pd
10
10
  from tqdm.auto import tqdm
11
11
 
12
- from pyobo.struct import Obo, Synonym, Term, from_species, has_gene_product
12
+ from pyobo.struct import Obo, Term, from_species, has_gene_product
13
13
  from pyobo.utils.path import ensure_df
14
14
 
15
15
  __all__ = [
@@ -41,11 +41,6 @@ class DictybaseGetter(Obo):
41
41
  return get_terms(force=force)
42
42
 
43
43
 
44
- def get_obo(force: bool = False) -> Obo:
45
- """Get dictyBase Gene as OBO."""
46
- return DictybaseGetter(force=force)
47
-
48
-
49
44
  def get_terms(force: bool = False) -> Iterable[Term]:
50
45
  """Get terms."""
51
46
  # TODO the mappings file has actually no uniprot at all, and requires text mining
@@ -67,7 +62,7 @@ def get_terms(force: bool = False) -> Iterable[Term]:
67
62
  term.append_synonym(synonym.strip())
68
63
  if synonyms and pd.notna(synonyms):
69
64
  for synonym in synonyms.split(","):
70
- term.append_synonym(Synonym(synonym.strip()))
65
+ term.append_synonym(synonym.strip())
71
66
  # for uniprot_id in uniprot_mappings.get(identifier, []):
72
67
  # if not uniprot_id or pd.isna(uniprot_id) or uniprot_id in {"unknown", "pseudogene"}:
73
68
  # continue
@@ -0,0 +1,9 @@
1
+ """Resources from DrugBank."""
2
+
3
+ from .drugbank import DrugBankGetter
4
+ from .drugbank_salt import DrugBankSaltGetter
5
+
6
+ __all__ = [
7
+ "DrugBankGetter",
8
+ "DrugBankSaltGetter",
9
+ ]
@@ -8,17 +8,17 @@ import itertools as itt
8
8
  import logging
9
9
  from collections.abc import Iterable, Mapping
10
10
  from functools import lru_cache
11
- from typing import Any, Optional
11
+ from typing import Any
12
12
  from xml.etree import ElementTree
13
13
 
14
14
  import pystow
15
15
  from tqdm.auto import tqdm
16
16
 
17
- from ..getters import NoBuildError
18
- from ..struct import Obo, Reference, Term
19
- from ..struct.typedef import has_inchi, has_salt, has_smiles
20
- from ..utils.cache import cached_pickle
21
- from ..utils.path import prefix_directory_join
17
+ from ...getters import NoBuildError
18
+ from ...struct import Obo, Reference, Term
19
+ from ...struct.typedef import has_inchi, has_salt, has_smiles
20
+ from ...utils.cache import cached_pickle
21
+ from ...utils.path import prefix_directory_join
22
22
 
23
23
  __all__ = [
24
24
  "DrugBankGetter",
@@ -40,11 +40,6 @@ class DrugBankGetter(Obo):
40
40
  return iter_terms(version=self._version_or_raise, force=force)
41
41
 
42
42
 
43
- def get_obo(force: bool = False) -> Obo:
44
- """Get DrugBank as OBO."""
45
- return DrugBankGetter(force=force)
46
-
47
-
48
43
  def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
49
44
  """Iterate over DrugBank terms in OBO."""
50
45
  for drug_info in iterate_drug_info(version, force=force):
@@ -120,13 +115,13 @@ def _make_term(drug_info: Mapping[str, Any]) -> Term:
120
115
  if identifier:
121
116
  term.append_xref(Reference(prefix=xref_prefix, identifier=identifier))
122
117
 
123
- for prop, debio_curie in [("smiles", has_smiles), ("inchi", has_inchi)]:
124
- identifier = drug_info.get(prop)
118
+ for key, typedef_ in [("smiles", has_smiles), ("inchi", has_inchi)]:
119
+ identifier = drug_info.get(key)
125
120
  if identifier:
126
- term.append_property(debio_curie, identifier)
121
+ term.annotate_string(typedef_, identifier)
127
122
 
128
123
  for salt in drug_info.get("salts", []):
129
- term.append_relationship(
124
+ term.annotate_object(
130
125
  has_salt,
131
126
  Reference(
132
127
  prefix="drugbank.salt",
@@ -139,7 +134,7 @@ def _make_term(drug_info: Mapping[str, Any]) -> Term:
139
134
 
140
135
 
141
136
  @lru_cache
142
- def get_xml_root(version: Optional[str] = None) -> ElementTree.Element:
137
+ def get_xml_root(version: str | None = None) -> ElementTree.Element:
143
138
  """Get the DrugBank XML parser root.
144
139
 
145
140
  Takes between 35-60 seconds.
@@ -2,8 +2,8 @@
2
2
 
3
3
  Run with ``python -m pyobo.sources.drugbank_salt``
4
4
 
5
- Get relations between drugbank salts and drugbank parents with
6
- ``pyobo relations drugbank --relation obo:has_salt`` or
5
+ Get relations between drugbank salts and drugbank parents with ``pyobo relations
6
+ drugbank --relation obo:has_salt`` or
7
7
 
8
8
  .. code-block:: python
9
9
 
@@ -16,7 +16,7 @@ import logging
16
16
  from collections.abc import Iterable
17
17
 
18
18
  from .drugbank import iterate_drug_info
19
- from ..struct import Obo, Reference, Term
19
+ from ...struct import Obo, Reference, Term
20
20
 
21
21
  __all__ = [
22
22
  "DrugBankSaltGetter",
@@ -38,11 +38,6 @@ class DrugBankSaltGetter(Obo):
38
38
  return iter_terms(version=self._version_or_raise, force=force)
39
39
 
40
40
 
41
- def get_obo(force: bool = False) -> Obo:
42
- """Get DrugBank Salts as OBO."""
43
- return DrugBankSaltGetter(force=force)
44
-
45
-
46
41
  def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
47
42
  """Iterate over DrugBank Salt terms in OBO."""
48
43
  for drug_info in iterate_drug_info(version, force=force):
@@ -7,6 +7,7 @@ from contextlib import closing
7
7
 
8
8
  import bioregistry
9
9
  import psycopg2
10
+ from pydantic import ValidationError
10
11
  from tqdm.auto import tqdm
11
12
 
12
13
  from pyobo.struct import Obo, Reference, Synonym, Term
@@ -32,18 +33,13 @@ class DrugCentralGetter(Obo):
32
33
  """An ontology representation of the DrugCentral database."""
33
34
 
34
35
  ontology = bioversions_key = PREFIX
35
- typedefs = [exact_match]
36
+ typedefs = [exact_match, has_inchi, has_smiles]
36
37
 
37
38
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
38
39
  """Iterate over terms in the ontology."""
39
40
  return iter_terms()
40
41
 
41
42
 
42
- def get_obo(force: bool = False) -> Obo:
43
- """Get DrugCentral OBO."""
44
- return DrugCentralGetter(force=force)
45
-
46
-
47
43
  def iter_terms() -> Iterable[Term]:
48
44
  """Iterate over DrugCentral terms."""
49
45
  with closing(psycopg2.connect(**PARAMS)) as conn:
@@ -71,10 +67,18 @@ def iter_terms() -> Iterable[Term]:
71
67
  if xref_prefix_norm == "pdb.ligand":
72
68
  # there is a weird invalid escaped \W appearing in pdb ligand ids
73
69
  identifier = identifier.strip()
74
- identifier = bioregistry.standardize_identifier(xref_prefix_norm, identifier)
75
- xrefs[str(drugcentral_id)].append(
76
- Reference(prefix=xref_prefix_norm, identifier=identifier)
77
- )
70
+
71
+ try:
72
+ xref = Reference(prefix=xref_prefix_norm, identifier=identifier)
73
+ except ValidationError:
74
+ # TODO mmsl is systematically incorrect, figure this out
75
+ if xref_prefix_norm != "mmsl":
76
+ tqdm.write(
77
+ f"[drugcentral:{drugcentral_id}] had invalid xref: {prefix}:{identifier}"
78
+ )
79
+ continue
80
+ else:
81
+ xrefs[str(drugcentral_id)].append(xref)
78
82
  with closing(conn.cursor()) as cur:
79
83
  cur.execute("SELECT id, name FROM public.synonyms")
80
84
  synonyms: defaultdict[str, list[Synonym]] = defaultdict(list)
@@ -85,16 +89,16 @@ def iter_terms() -> Iterable[Term]:
85
89
  drugcentral_id = str(drugcentral_id)
86
90
  term = Term(
87
91
  reference=Reference(prefix=PREFIX, identifier=drugcentral_id, name=name),
88
- definition=definition,
92
+ definition=definition.replace("\n", " ") if definition else None,
89
93
  synonyms=synonyms.get(drugcentral_id, []),
90
94
  xrefs=xrefs.get(drugcentral_id, []),
91
95
  )
92
96
  if inchi_key:
93
97
  term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
94
98
  if smiles:
95
- term.append_property(has_smiles, smiles)
99
+ term.annotate_string(has_smiles, smiles)
96
100
  if inchi:
97
- term.append_property(has_inchi, inchi)
101
+ term.annotate_string(has_inchi, inchi)
98
102
  if cas:
99
103
  term.append_exact_match(Reference(prefix="cas", identifier=cas))
100
104
  yield term