pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
  203. pyobo/aws.py +0 -162
  204. pyobo/cli/aws.py +0 -47
  205. pyobo/identifier_utils.py +0 -142
  206. pyobo/normalizer.py +0 -232
  207. pyobo/registries/__init__.py +0 -16
  208. pyobo/registries/metaregistry.json +0 -507
  209. pyobo/registries/metaregistry.py +0 -135
  210. pyobo/sources/icd11.py +0 -105
  211. pyobo/xrefdb/__init__.py +0 -1
  212. pyobo/xrefdb/canonicalizer.py +0 -214
  213. pyobo/xrefdb/priority.py +0 -59
  214. pyobo/xrefdb/sources/__init__.py +0 -60
  215. pyobo/xrefdb/sources/biomappings.py +0 -36
  216. pyobo/xrefdb/sources/cbms2019.py +0 -91
  217. pyobo/xrefdb/sources/chembl.py +0 -83
  218. pyobo/xrefdb/sources/compath.py +0 -82
  219. pyobo/xrefdb/sources/famplex.py +0 -64
  220. pyobo/xrefdb/sources/gilda.py +0 -50
  221. pyobo/xrefdb/sources/intact.py +0 -113
  222. pyobo/xrefdb/sources/ncit.py +0 -133
  223. pyobo/xrefdb/sources/pubchem.py +0 -27
  224. pyobo/xrefdb/sources/wikidata.py +0 -116
  225. pyobo-0.11.2.dist-info/RECORD +0 -157
  226. pyobo-0.11.2.dist-info/WHEEL +0 -5
  227. pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/sources/biogrid.py CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  from collections.abc import Mapping
4
4
  from functools import partial
5
- from typing import Optional
6
5
 
7
6
  import pandas as pd
8
7
 
@@ -43,7 +42,7 @@ taxonomy_remapping = { # so much for official names
43
42
  }
44
43
 
45
44
 
46
- def _lookup(name: str) -> Optional[str]:
45
+ def _lookup(name: str) -> str | None:
47
46
  if name in taxonomy_remapping:
48
47
  return taxonomy_remapping[name]
49
48
  return get_ncbitaxon_id(name)
pyobo/sources/ccle.py CHANGED
@@ -3,7 +3,6 @@
3
3
  import tarfile
4
4
  from collections.abc import Iterable
5
5
  from pathlib import Path
6
- from typing import Optional
7
6
 
8
7
  import pandas as pd
9
8
  import pystow
@@ -11,7 +10,6 @@ import pystow
11
10
  from pyobo import Obo, Reference, Term
12
11
 
13
12
  __all__ = [
14
- "get_obo",
15
13
  "CCLEGetter",
16
14
  ]
17
15
 
@@ -23,21 +21,18 @@ class CCLEGetter(Obo):
23
21
  """An ontology representation of the Cancer Cell Line Encyclopedia's cell lines."""
24
22
 
25
23
  ontology = bioregistry_key = PREFIX
24
+ name = "Cancer Cell Line Encyclopedia Cell Line"
26
25
 
27
26
  def __post_init__(self):
28
27
  self.data_version = VERSION
28
+ super().__post_init__()
29
29
 
30
30
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
31
31
  """Iterate over terms in the ontology."""
32
32
  return iter_terms(version=self._version_or_raise, force=force)
33
33
 
34
34
 
35
- def get_obo(*, force: bool = False) -> Obo:
36
- """Get CCLE Cells as OBO."""
37
- return CCLEGetter(force=force)
38
-
39
-
40
- def iter_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
35
+ def iter_terms(version: str | None = None, force: bool = False) -> Iterable[Term]:
41
36
  """Iterate over CCLE Cells."""
42
37
  df = ensure_df(version=version, force=force)
43
38
  for identifier, depmap_id, name in df.values:
@@ -54,21 +49,21 @@ def get_ccle_static_version() -> str:
54
49
  return "2019"
55
50
 
56
51
 
57
- def get_url(version: Optional[str] = None) -> str:
52
+ def get_url(version: str | None = None) -> str:
58
53
  """Get the cBioPortal URL for the given version of CCLE's cell lines."""
59
54
  if version is None:
60
55
  version = get_ccle_static_version()
61
56
  return f"https://cbioportal-datahub.s3.amazonaws.com/ccle_broad_{version}.tar.gz"
62
57
 
63
58
 
64
- def get_inner(version: Optional[str] = None) -> str:
59
+ def get_inner(version: str | None = None) -> str:
65
60
  """Get the inner tarfile path."""
66
61
  if version is None:
67
62
  version = get_ccle_static_version()
68
63
  return f"ccle_broad_{version}/data_clinical_sample.txt"
69
64
 
70
65
 
71
- def ensure(version: Optional[str] = None, **kwargs) -> Path:
66
+ def ensure(version: str | None = None, **kwargs) -> Path:
72
67
  """Ensure the given version is downloaded."""
73
68
  if version is None:
74
69
  version = get_ccle_static_version()
@@ -76,7 +71,7 @@ def ensure(version: Optional[str] = None, **kwargs) -> Path:
76
71
  return pystow.ensure("pyobo", "raw", PREFIX, version, url=url, **kwargs)
77
72
 
78
73
 
79
- def ensure_df(version: Optional[str] = None, force: bool = False) -> pd.DataFrame:
74
+ def ensure_df(version: str | None = None, force: bool = False) -> pd.DataFrame:
80
75
  """Get the CCLE clinical sample dataframe."""
81
76
  if version is None:
82
77
  version = get_ccle_static_version()
pyobo/sources/cgnc.py CHANGED
@@ -31,11 +31,6 @@ class CGNCGetter(Obo):
31
31
  return get_terms(force=force)
32
32
 
33
33
 
34
- def get_obo(force: bool = False) -> Obo:
35
- """Get CGNC as OBO."""
36
- return CGNCGetter(force=force)
37
-
38
-
39
34
  HEADER = [
40
35
  "cgnc_id",
41
36
  "ncbigene_id",
pyobo/sources/chebi.py CHANGED
@@ -7,9 +7,9 @@ from ..struct import Reference, TypeDef
7
7
  from ..utils.io import multisetdict
8
8
 
9
9
  __all__ = [
10
- "get_chebi_smiles_id_mapping",
11
10
  "get_chebi_id_smiles_mapping",
12
11
  "get_chebi_role_to_children",
12
+ "get_chebi_smiles_id_mapping",
13
13
  ]
14
14
 
15
15
 
@@ -0,0 +1,9 @@
1
+ """Resources from ChEMBL."""
2
+
3
+ from .chembl_compound import ChEMBLCompoundGetter
4
+ from .chembl_target import ChEMBLTargetGetter
5
+
6
+ __all__ = [
7
+ "ChEMBLCompoundGetter",
8
+ "ChEMBLTargetGetter",
9
+ ]
@@ -1,11 +1,7 @@
1
- """Converter for ChEMBL.
2
-
3
- Run with ``python -m pyobo.sources.chembl -vv``.
4
- """
1
+ """Converter for ChEMBL Compounds."""
5
2
 
6
3
  import logging
7
4
  from collections.abc import Iterable
8
- from contextlib import closing
9
5
 
10
6
  import chembl_downloader
11
7
 
@@ -50,28 +46,20 @@ class ChEMBLCompoundGetter(Obo):
50
46
  return iter_terms(version=self._version_or_raise)
51
47
 
52
48
 
53
- def get_obo(force: bool = False) -> Obo:
54
- """Return ChEMBL Compounds as OBO."""
55
- return ChEMBLCompoundGetter(force=force)
56
-
57
-
58
49
  def iter_terms(version: str) -> Iterable[Term]:
59
50
  """Iterate over ChEMBL compounds."""
60
- with chembl_downloader.connect(version=version) as conn:
61
- logger.info("using connection %s", conn)
62
- with closing(conn.cursor()) as cursor:
63
- logger.info("using cursor %s", cursor)
64
- cursor.execute(QUERY)
65
- for chembl_id, name, smiles, inchi, inchi_key in cursor.fetchall():
66
- # TODO add xrefs?
67
- term = Term.from_triple(prefix=PREFIX, identifier=chembl_id, name=name)
68
- if smiles:
69
- term.append_property(has_smiles, smiles)
70
- if inchi:
71
- term.append_property(has_inchi, inchi)
72
- if inchi_key:
73
- term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
74
- yield term
51
+ with chembl_downloader.cursor(version=version) as cursor:
52
+ cursor.execute(QUERY)
53
+ for chembl_id, name, smiles, inchi, inchi_key in cursor.fetchall():
54
+ # TODO add xrefs?
55
+ term = Term.from_triple(prefix=PREFIX, identifier=chembl_id, name=name)
56
+ if smiles:
57
+ term.annotate_string(has_smiles, smiles)
58
+ if inchi:
59
+ term.annotate_string(has_inchi, inchi)
60
+ if inchi_key:
61
+ term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
62
+ yield term
75
63
 
76
64
 
77
65
  if __name__ == "__main__":
@@ -0,0 +1,160 @@
1
+ """Converter for ChEMBL targets."""
2
+
3
+ import logging
4
+ from collections import defaultdict
5
+ from collections.abc import Iterable
6
+
7
+ import chembl_downloader
8
+ from tqdm import tqdm
9
+
10
+ from pyobo import default_reference
11
+ from pyobo.struct import Obo, Reference, Term
12
+ from pyobo.struct.typedef import (
13
+ exact_match,
14
+ has_component,
15
+ has_member,
16
+ has_participant,
17
+ )
18
+
19
+ __all__ = [
20
+ "ChEMBLTargetGetter",
21
+ ]
22
+
23
+ from pyobo.utils.path import ensure_df
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ PREFIX = "chembl.target"
28
+
29
+ TTYPE_QUERY = """\
30
+ SELECT TARGET_TYPE, TARGET_DESC, PARENT_TYPE
31
+ FROM TARGET_TYPE
32
+ """
33
+
34
+ QUERY = """\
35
+ SELECT
36
+ CHEMBL_ID,
37
+ PREF_NAME,
38
+ TARGET_TYPE,
39
+ TAX_ID
40
+ FROM TARGET_DICTIONARY
41
+ """
42
+
43
+
44
+ class ChEMBLTargetGetter(Obo):
45
+ """An ontology representation of ChEMBL targets."""
46
+
47
+ ontology = PREFIX
48
+ bioversions_key = "chembl"
49
+ typedefs = [exact_match, has_component, has_member, has_participant]
50
+ root_terms = [
51
+ default_reference(PREFIX, "undefined"),
52
+ default_reference(PREFIX, "molecular"),
53
+ default_reference(PREFIX, "non-molecular"),
54
+ ]
55
+
56
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
57
+ """Iterate over terms in the ontology."""
58
+ return iter_terms(version=self._version_or_raise)
59
+
60
+
61
+ def iter_terms(version: str) -> Iterable[Term]:
62
+ """Iterate over ChEMBL targets."""
63
+ chembl_to_uniprots = get_chembl_protein_equivalences(version=version)
64
+ target_types: dict[str, Term] = {}
65
+ parents: dict[str, str] = {}
66
+ with chembl_downloader.cursor(version=version) as cursor:
67
+ cursor.execute(TTYPE_QUERY)
68
+ for target_type, desc, parent in cursor.fetchall():
69
+ identifier = target_type.lower().replace(" ", "-")
70
+ target_types[target_type] = Term(
71
+ reference=default_reference(PREFIX, identifier, name=target_type),
72
+ definition=desc,
73
+ )
74
+ if parent:
75
+ parents[target_type] = parent
76
+
77
+ for child, parent in parents.items():
78
+ target_types[child].append_parent(target_types[parent])
79
+
80
+ yield from target_types.values()
81
+
82
+ with chembl_downloader.cursor(version=version) as cursor:
83
+ cursor.execute(QUERY)
84
+ for chembl_id, name, target_type, ncbitaxon_id in cursor.fetchall():
85
+ term = Term.from_triple(prefix=PREFIX, identifier=chembl_id, name=name)
86
+ if ncbitaxon_id:
87
+ term.set_species(str(ncbitaxon_id))
88
+ term.append_parent(target_types[target_type])
89
+
90
+ uniprot_ids = chembl_to_uniprots.get(chembl_id)
91
+ if uniprot_ids is None:
92
+ pass
93
+ elif target_type in {
94
+ "PROTEIN COMPLEX",
95
+ "CHIMERIC PROTEIN",
96
+ "PROTEIN COMPLEX GROUP",
97
+ "PROTEIN NUCLEIC-ACID COMPLEX",
98
+ "SELECTIVITY GROUP",
99
+ }:
100
+ for uniprot_id in uniprot_ids:
101
+ term.annotate_object(
102
+ has_component, Reference(prefix="uniprot", identifier=uniprot_id)
103
+ )
104
+ elif target_type == "PROTEIN FAMILY":
105
+ for uniprot_id in uniprot_ids:
106
+ term.annotate_object(
107
+ has_member, Reference(prefix="uniprot", identifier=uniprot_id)
108
+ )
109
+ elif target_type == "PROTEIN-PROTEIN INTERACTION":
110
+ for uniprot_id in uniprot_ids:
111
+ term.annotate_object(
112
+ has_participant, Reference(prefix="uniprot", identifier=uniprot_id)
113
+ )
114
+ elif target_type == "SINGLE PROTEIN":
115
+ if len(uniprot_ids) == 1:
116
+ term.append_exact_match(Reference(prefix="uniprot", identifier=uniprot_ids[0]))
117
+ else:
118
+ tqdm.write(
119
+ f"[chembl.target:{chembl_id}] multiple mappings found to single protein: {uniprot_ids}"
120
+ )
121
+ for uniprot_id in uniprot_ids:
122
+ term.append_xref(Reference(prefix="uniprot", identifier=uniprot_id))
123
+ elif len(uniprot_ids) == 1:
124
+ luid = uniprot_ids[0]
125
+ if luid.startswith("ENSG"):
126
+ reference = Reference(prefix="ensembl", identifier=luid)
127
+ else:
128
+ reference = Reference(prefix="uniprot", identifier=luid)
129
+ term.append_exact_match(reference)
130
+ else:
131
+ tqdm.write(
132
+ f"[chembl.target:{chembl_id}] need to handle multiple uniprots for {target_type} - {uniprot_ids}"
133
+ )
134
+
135
+ yield term
136
+
137
+
138
+ def get_chembl_protein_equivalences(version: str | None = None) -> dict[str, list[str]]:
139
+ """Get ChEMBL protein equivalences."""
140
+ if version is None:
141
+ version = chembl_downloader.latest()
142
+ url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}/chembl_uniprot_mapping.txt"
143
+ df = ensure_df(
144
+ PREFIX,
145
+ url=url,
146
+ sep="\t",
147
+ skiprows=1,
148
+ usecols=[0, 1],
149
+ names=["uniprot", "chembl"],
150
+ header=None,
151
+ # names=[TARGET_ID, SOURCE_ID], # switch around
152
+ )
153
+ dd = defaultdict(list)
154
+ for uniprot, chembl in df.values:
155
+ dd[chembl].append(uniprot)
156
+ return dict(dd)
157
+
158
+
159
+ if __name__ == "__main__":
160
+ ChEMBLTargetGetter.cli()
@@ -1,11 +1,12 @@
1
1
  """Converter for CiVIC Genes."""
2
2
 
3
+ import datetime
3
4
  from collections.abc import Iterable
4
- from typing import Optional
5
5
 
6
6
  import pandas as pd
7
7
 
8
- from pyobo.struct import Obo, Reference, Term
8
+ from pyobo import default_reference
9
+ from pyobo.struct import Obo, Reference, Term, TypeDef
9
10
  from pyobo.utils.path import ensure_df
10
11
 
11
12
  __all__ = [
@@ -15,38 +16,77 @@ __all__ = [
15
16
  PREFIX = "civic.gid"
16
17
  URL = "https://civicdb.org/downloads/nightly/nightly-GeneSummaries.tsv"
17
18
 
19
+ GENE = Term(reference=default_reference(PREFIX, "gene", name="gene"))
20
+ FACTOR = Term(reference=default_reference(PREFIX, "factor", name="factor"))
21
+ FUSION = Term(reference=default_reference(PREFIX, "fusion", name="fusion"))
22
+ HAS_3P = TypeDef.default(PREFIX, "has3p", name="has 3' gene", is_metadata_tag=False)
23
+ HAS_5P = TypeDef.default(PREFIX, "has5p", name="has 5' gene", is_metadata_tag=False)
18
24
 
19
- def _sort(_o, t):
20
- return int(t.identifier)
25
+ TYPES = {"Gene": GENE, "Factor": FACTOR, "Fusion": FUSION}
21
26
 
22
27
 
23
28
  class CIVICGeneGetter(Obo):
24
29
  """An ontology representation of CiVIC's gene nomenclature."""
25
30
 
26
31
  bioversions_key = ontology = PREFIX
27
- term_sort_key = _sort
32
+ typedefs = [HAS_3P, HAS_5P]
33
+ root_terms = [GENE.reference, FACTOR.reference, FUSION.reference]
28
34
 
29
35
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
30
36
  """Iterate over gene terms for CiVIC."""
31
- yield from get_terms(self.data_version, force=force)
37
+ yield from (GENE, FACTOR, FUSION)
38
+ yield from get_terms(self._version_or_raise, force=force)
32
39
 
33
40
 
34
- def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
41
+ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
35
42
  """Get CIVIC terms."""
36
- # if version is not None:
37
- # version_dt: datetime.date = dateutil.parser.parse(version)
38
- # else:
39
- # version_dt: datetime.date = datetime.today()
40
- # version = version_dt.strftime("01-%b-%Y")
43
+ dt = datetime.datetime.strptime(version, "%Y-%m-%d")
41
44
  # version is like 01-Feb-2024
42
- url = f"https://civicdb.org/downloads/{version}/{version}-GeneSummaries.tsv"
45
+ dt2 = datetime.datetime.strftime(dt, "%d-%b-%Y")
46
+ url = f"https://civicdb.org/downloads/{dt2}/{dt2}-GeneSummaries.tsv"
43
47
  df = ensure_df(prefix=PREFIX, url=url, sep="\t", force=force, dtype=str, version=version)
44
- for identifier, _, name, entrez_id, description, _last_review, _flag in df.values:
48
+ for (
49
+ identifier,
50
+ _,
51
+ type,
52
+ name,
53
+ aliases,
54
+ description,
55
+ _last_review_date,
56
+ _flag,
57
+ entrez_id,
58
+ ncit_id,
59
+ _5p_status,
60
+ _3p_status,
61
+ five_p_id,
62
+ _5p_name,
63
+ _5p_ncbigene,
64
+ three_p_id,
65
+ _3p_name,
66
+ _3p_ncbigene,
67
+ ) in df.values:
45
68
  term = Term(
46
69
  reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
47
70
  definition=description if pd.notna(description) else None,
48
71
  )
49
- term.append_exact_match(Reference(prefix="ncbigene", identifier=entrez_id))
72
+ term.append_parent(TYPES[type])
73
+ if pd.notna(entrez_id):
74
+ term.append_exact_match(Reference(prefix="ncbigene", identifier=entrez_id))
75
+ if pd.notna(ncit_id):
76
+ term.append_exact_match(Reference(prefix="ncit", identifier=ncit_id))
77
+ if pd.notna(aliases):
78
+ for alias in aliases.split(","):
79
+ if alias != name:
80
+ term.append_synonym(alias.strip())
81
+ if pd.notna(five_p_id):
82
+ term.append_relationship(
83
+ HAS_5P, Reference(prefix=PREFIX, identifier=five_p_id, name=_5p_name)
84
+ )
85
+ if pd.notna(three_p_id):
86
+ term.append_relationship(
87
+ HAS_3P, Reference(prefix=PREFIX, identifier=three_p_id, name=_3p_name)
88
+ )
89
+
50
90
  yield term
51
91
 
52
92
 
@@ -0,0 +1,160 @@
1
+ """A source for ClinicalTrials.gov."""
2
+
3
+ from collections.abc import Iterable
4
+
5
+ from clinicaltrials_downloader import get_studies_slim
6
+
7
+ from pyobo import Obo, Reference, Term, TypeDef, default_reference
8
+ from pyobo.struct.struct import CHARLIE_TERM, HUMAN_TERM, PYOBO_INJECTED
9
+ from pyobo.struct.typedef import has_contributor
10
+
11
+ __all__ = [
12
+ "ClinicalTrialsGetter",
13
+ ]
14
+
15
+ PREFIX = "clinicaltrials"
16
+
17
+ INVESTIGATES_CONDITION = TypeDef(
18
+ reference=default_reference(
19
+ prefix=PREFIX, identifier="investigates_condition", name="investigates condition"
20
+ ),
21
+ is_metadata_tag=True,
22
+ )
23
+ HAS_INTERVENTION = TypeDef(
24
+ reference=default_reference(
25
+ prefix=PREFIX, identifier="has_intervention", name="has intervention"
26
+ ),
27
+ is_metadata_tag=True,
28
+ )
29
+
30
+ STUDY_TERM = Term(reference=default_reference(PREFIX, "study", name="study"))
31
+
32
+ CLINICAL_TRIAL_TERM = Term(
33
+ reference=default_reference(PREFIX, "clinical-trial", name="clinical trial")
34
+ ).append_parent(STUDY_TERM)
35
+
36
+ INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
37
+ reference=default_reference(
38
+ PREFIX, "interventional-clinical-trial", name="interventional clinical trial"
39
+ )
40
+ ).append_parent(CLINICAL_TRIAL_TERM)
41
+
42
+ RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
43
+ reference=default_reference(
44
+ PREFIX,
45
+ "randomized-interventional-clinical-trial",
46
+ name="randomized interventional clinical trial",
47
+ )
48
+ ).append_parent(INTERVENTIONAL_CLINICAL_TRIAL_TERM)
49
+
50
+ NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
51
+ reference=default_reference(
52
+ PREFIX,
53
+ "non-randomized-interventional-clinical-trial",
54
+ name="non-randomized interventional clinical trial",
55
+ )
56
+ ).append_parent(INTERVENTIONAL_CLINICAL_TRIAL_TERM)
57
+
58
+ OBSERVATIONAL_CLINICAL_TRIAL_TERM = Term(
59
+ reference=default_reference(
60
+ PREFIX, "observational-clinical-trial", name="observational clinical trial"
61
+ )
62
+ ).append_parent(CLINICAL_TRIAL_TERM)
63
+
64
+ EXPANDED_ACCESS_STUDY_TERM = Term(
65
+ reference=default_reference(PREFIX, "expanded-access-study", name="expanded access study")
66
+ ).append_parent(STUDY_TERM)
67
+
68
+ TERMS = [
69
+ STUDY_TERM,
70
+ CLINICAL_TRIAL_TERM,
71
+ OBSERVATIONAL_CLINICAL_TRIAL_TERM,
72
+ INTERVENTIONAL_CLINICAL_TRIAL_TERM,
73
+ EXPANDED_ACCESS_STUDY_TERM,
74
+ RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
75
+ NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
76
+ ]
77
+
78
+ # These were identified as the 4 possibilities for study
79
+ # types in ClinicalTrials.gov. See summary script at
80
+ # https://gist.github.com/cthoyt/12a3cb3c63ad68d73fe5a2f0d506526f
81
+ PARENTS: dict[tuple[str | None, str | None], Term] = {
82
+ ("INTERVENTIONAL", None): INTERVENTIONAL_CLINICAL_TRIAL_TERM,
83
+ ("INTERVENTIONAL", "NA"): INTERVENTIONAL_CLINICAL_TRIAL_TERM,
84
+ ("INTERVENTIONAL", "RANDOMIZED"): RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
85
+ ("INTERVENTIONAL", "NON_RANDOMIZED"): NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
86
+ ("OBSERVATIONAL", None): OBSERVATIONAL_CLINICAL_TRIAL_TERM,
87
+ ("EXPANDED_ACCESS", None): EXPANDED_ACCESS_STUDY_TERM,
88
+ (None, None): STUDY_TERM,
89
+ }
90
+
91
+
92
+ class ClinicalTrialsGetter(Obo):
93
+ """Get the ClinicalTrials.gov database as an ontology."""
94
+
95
+ ontology = PREFIX
96
+ dynamic_version = True
97
+ typedefs = [has_contributor, INVESTIGATES_CONDITION, HAS_INTERVENTION]
98
+ root_terms = [STUDY_TERM.reference]
99
+
100
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
101
+ """Iterate over terms for studies."""
102
+ yield CHARLIE_TERM
103
+ yield HUMAN_TERM
104
+ for term in TERMS:
105
+ term.append_contributor(CHARLIE_TERM)
106
+ term.append_comment(PYOBO_INJECTED)
107
+ yield term
108
+ yield from iterate_studies()
109
+
110
+
111
+ def iterate_studies(*, force: bool = False) -> Iterable[Term]:
112
+ """Iterate over terms for studies."""
113
+ studies = get_studies_slim(force=force)
114
+ for study in studies:
115
+ yield _process_study(study)
116
+
117
+
118
+ def _process_study(raw_study) -> Term:
119
+ protocol_section = raw_study["protocolSection"]
120
+ identification_module = protocol_section["identificationModule"]
121
+ identifier = identification_module["nctId"]
122
+
123
+ name = identification_module.get("officialTitle")
124
+ synonym = identification_module.get("briefTitle")
125
+ if synonym and not name:
126
+ name, synonym = synonym, None
127
+
128
+ term = Term(
129
+ reference=Reference(prefix=PREFIX, identifier=identifier, name=name), type="Instance"
130
+ )
131
+ if synonym:
132
+ term.append_synonym(synonym)
133
+
134
+ design_module = protocol_section.get("designModule", {})
135
+ study_type = design_module.get("studyType")
136
+ allocation = design_module.get("designInfo", {}).get("allocation")
137
+ term.append_parent(PARENTS[study_type, allocation])
138
+
139
+ references_module = protocol_section.get("referencesModule", {})
140
+ for reference in references_module.get("references", []):
141
+ if pubmed_id := reference.get("pmid"):
142
+ term.append_see_also(Reference(prefix="pubmed", identifier=pubmed_id))
143
+
144
+ derived_section = raw_study["derivedSection"]
145
+ for mesh_record in derived_section.get("conditionBrowseModule", {}).get("meshes", []):
146
+ term.annotate_object(INVESTIGATES_CONDITION, _mesh(mesh_record))
147
+
148
+ for mesh_record in derived_section.get("interventionBrowseModule", {}).get("meshes", []):
149
+ term.annotate_object(HAS_INTERVENTION, _mesh(mesh_record))
150
+ return term
151
+
152
+
153
+ def _mesh(mesh_record: dict[str, str]) -> Reference:
154
+ return Reference(
155
+ prefix="mesh", identifier=mesh_record["id"], name=mesh_record.get("term") or None
156
+ )
157
+
158
+
159
+ if __name__ == "__main__":
160
+ ClinicalTrialsGetter.cli()