pyobo 0.12.3__py3-none-any.whl → 0.12.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +6 -0
  3. pyobo/api/__init__.py +3 -0
  4. pyobo/api/embedding.py +118 -0
  5. pyobo/api/names.py +8 -1
  6. pyobo/api/utils.py +0 -10
  7. pyobo/cli/cli.py +1 -6
  8. pyobo/constants.py +23 -0
  9. pyobo/getters.py +52 -35
  10. pyobo/sources/__init__.py +14 -1
  11. pyobo/sources/chembl/__init__.py +6 -0
  12. pyobo/sources/chembl/chembl_cell.py +94 -0
  13. pyobo/sources/chembl/chembl_mechanism.py +81 -0
  14. pyobo/sources/chembl/chembl_tissue.py +70 -0
  15. pyobo/sources/clinicaltrials.py +32 -33
  16. pyobo/sources/complexportal.py +5 -1
  17. pyobo/sources/hgnc/hgnc.py +13 -6
  18. pyobo/sources/iana_media_type.py +100 -0
  19. pyobo/sources/mesh.py +82 -29
  20. pyobo/sources/reactome.py +10 -3
  21. pyobo/sources/spdx.py +85 -0
  22. pyobo/sources/uniprot/uniprot.py +2 -2
  23. pyobo/sources/wikipathways.py +92 -7
  24. pyobo/struct/__init__.py +2 -0
  25. pyobo/struct/functional/dsl.py +10 -1
  26. pyobo/struct/functional/ontology.py +3 -3
  27. pyobo/struct/obo/reader.py +17 -53
  28. pyobo/struct/obograph/export.py +2 -2
  29. pyobo/struct/struct.py +115 -8
  30. pyobo/struct/struct_utils.py +10 -0
  31. pyobo/struct/typedef.py +15 -3
  32. pyobo/struct/vocabulary.py +8 -0
  33. pyobo/utils/cache.py +4 -3
  34. pyobo/utils/io.py +18 -56
  35. pyobo/utils/misc.py +135 -1
  36. pyobo/utils/path.py +34 -2
  37. pyobo/version.py +1 -1
  38. {pyobo-0.12.3.dist-info → pyobo-0.12.5.dist-info}/METADATA +5 -5
  39. {pyobo-0.12.3.dist-info → pyobo-0.12.5.dist-info}/RECORD +42 -36
  40. {pyobo-0.12.3.dist-info → pyobo-0.12.5.dist-info}/WHEEL +0 -0
  41. {pyobo-0.12.3.dist-info → pyobo-0.12.5.dist-info}/entry_points.txt +0 -0
  42. {pyobo-0.12.3.dist-info → pyobo-0.12.5.dist-info}/licenses/LICENSE +0 -0
pyobo/.DS_Store CHANGED
Binary file
pyobo/__init__.py CHANGED
@@ -48,6 +48,8 @@ from .api import (
48
48
  get_sssom_df,
49
49
  get_subhierarchy,
50
50
  get_synonyms,
51
+ get_text_embedding,
52
+ get_text_embedding_similarity,
51
53
  get_typedef_df,
52
54
  get_xref,
53
55
  get_xrefs,
@@ -70,6 +72,7 @@ from .struct import (
70
72
  SynonymTypeDef,
71
73
  Term,
72
74
  TypeDef,
75
+ build_ontology,
73
76
  default_reference,
74
77
  )
75
78
  from .struct.obo import from_obo_path, from_obonet
@@ -84,6 +87,7 @@ __all__ = [
84
87
  "SynonymTypeDef",
85
88
  "Term",
86
89
  "TypeDef",
90
+ "build_ontology",
87
91
  "default_reference",
88
92
  "ensure_path",
89
93
  "from_obo_path",
@@ -137,6 +141,8 @@ __all__ = [
137
141
  "get_sssom_df",
138
142
  "get_subhierarchy",
139
143
  "get_synonyms",
144
+ "get_text_embedding",
145
+ "get_text_embedding_similarity",
140
146
  "get_typedef_df",
141
147
  "get_version",
142
148
  "get_xref",
pyobo/api/__init__.py CHANGED
@@ -8,6 +8,7 @@ from .alts import (
8
8
  )
9
9
  from .combine import get_literal_mappings_subset
10
10
  from .edges import get_edges, get_edges_df, get_graph
11
+ from .embedding import get_text_embedding, get_text_embedding_similarity
11
12
  from .hierarchy import (
12
13
  get_ancestors,
13
14
  get_children,
@@ -116,6 +117,8 @@ __all__ = [
116
117
  "get_sssom_df",
117
118
  "get_subhierarchy",
118
119
  "get_synonyms",
120
+ "get_text_embedding",
121
+ "get_text_embedding_similarity",
119
122
  "get_typedef_df",
120
123
  "get_version",
121
124
  "get_xref",
pyobo/api/embedding.py ADDED
@@ -0,0 +1,118 @@
1
+ """Embeddings for entities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ import curies
8
+ import numpy as np
9
+
10
+ from pyobo.api.names import get_definition, get_name
11
+
12
+ if TYPE_CHECKING:
13
+ import sentence_transformers
14
+
15
+ __all__ = [
16
+ "get_text_embedding",
17
+ "get_text_embedding_model",
18
+ "get_text_embedding_similarity",
19
+ ]
20
+
21
+
22
+ def get_text_embedding_model() -> sentence_transformers.SentenceTransformer:
23
+ """Get the default text embedding model."""
24
+ from sentence_transformers import SentenceTransformer
25
+
26
+ model = SentenceTransformer("all-MiniLM-L6-v2")
27
+ return model
28
+
29
+
30
+ def _get_text(
31
+ reference: str | curies.Reference | curies.ReferenceTuple,
32
+ ) -> str | None:
33
+ name = get_name(reference)
34
+ if name is None:
35
+ return None
36
+ description = get_definition(reference)
37
+ if description:
38
+ name += " " + description
39
+ return name
40
+
41
+
42
+ def get_text_embedding(
43
+ reference: str | curies.Reference | curies.ReferenceTuple,
44
+ *,
45
+ model: sentence_transformers.SentenceTransformer | None = None,
46
+ ) -> np.ndarray | None:
47
+ """Get a text embedding for an entity, or return none if no text is available.
48
+
49
+ :param reference: A reference, either as a string or Reference object
50
+ :param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not given.
51
+ :return: A 1D numpy float array of embeddings from :class:`sentence_transformers`
52
+
53
+ .. code-block:: python
54
+
55
+ import pyobo
56
+
57
+ embedding = pyobo.get_text_embedding("GO:0000001")
58
+ # [-5.68335280e-02 7.96175096e-03 -3.36112119e-02 2.34440481e-03 ... ]
59
+
60
+ If you want to do multiple operations, load up the model for reuse
61
+
62
+ .. code-block:: python
63
+
64
+ import pyobo
65
+ from pyobo.api.embedding import get_text_embedding_model
66
+
67
+ model = get_text_embedding_model()
68
+ embedding = pyobo.get_text_embedding("GO:0000001", model=model)
69
+ # [-5.68335280e-02 7.96175096e-03 -3.36112119e-02 2.34440481e-03 ... ]
70
+ """
71
+ text = _get_text(reference)
72
+ if text is None:
73
+ return None
74
+ if model is None:
75
+ model = get_text_embedding_model()
76
+ res = model.encode([text])
77
+ return res[0]
78
+
79
+
80
+ def get_text_embedding_similarity(
81
+ reference_1: str | curies.Reference | curies.ReferenceTuple,
82
+ reference_2: str | curies.Reference | curies.ReferenceTuple,
83
+ *,
84
+ model: sentence_transformers.SentenceTransformer | None = None,
85
+ ) -> float | None:
86
+ """Get the pairwise similarity.
87
+
88
+ :param reference_1: A reference, given as a string or Reference object
89
+ :param reference_2: A second reference
90
+ :param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not given.
91
+ :returns:
92
+ A floating point similarity, if text is available for both references, otherwise none
93
+
94
+ .. code-block:: python
95
+
96
+ import pyobo
97
+
98
+ similarity = pyobo.get_text_embedding_similarity("GO:0000001", "GO:0000004")
99
+ # 0.24702128767967224
100
+
101
+ If you want to do multiple operations, load up the model for reuse
102
+
103
+ .. code-block:: python
104
+
105
+ import pyobo
106
+ from pyobo.api.embedding import get_text_embedding_model
107
+
108
+ model = get_text_embedding_model()
109
+ similarity = pyobo.get_text_embedding_similarity("GO:0000001", "GO:0000004", model=model)
110
+ # 0.24702128767967224
111
+ """
112
+ if model is None:
113
+ model = get_text_embedding_model()
114
+ e1 = get_text_embedding(reference_1, model=model)
115
+ e2 = get_text_embedding(reference_2, model=model)
116
+ if e1 is None or e2 is None:
117
+ return None
118
+ return model.similarity(e1, e2)[0][0].item()
pyobo/api/names.py CHANGED
@@ -166,7 +166,14 @@ def get_references(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> set[Refe
166
166
  ontology = get_ontology(prefix, **kwargs)
167
167
  return sorted(ontology.iterate_references())
168
168
 
169
- return set(_get_references())
169
+ try:
170
+ return set(_get_references())
171
+ except NoBuildError:
172
+ logger.debug("[%s] no build", prefix)
173
+ return set()
174
+ except (Exception, subprocess.CalledProcessError) as e:
175
+ logger.exception("[%s v%s] could not load: %s", prefix, version, e)
176
+ return set()
170
177
 
171
178
 
172
179
  @lru_cache
pyobo/api/utils.py CHANGED
@@ -19,7 +19,6 @@ __all__ = [
19
19
  "VersionError",
20
20
  "get_version",
21
21
  "get_version_pins",
22
- "safe_get_version",
23
22
  ]
24
23
 
25
24
  logger = logging.getLogger(__name__)
@@ -84,15 +83,6 @@ def get_version_from_kwargs(prefix: str, kwargs: GetOntologyKwargs) -> str | Non
84
83
  return get_version(prefix, strict=False)
85
84
 
86
85
 
87
- def safe_get_version(prefix: str) -> str:
88
- """Get the version."""
89
- # FIXME replace with get_version(prefix, strict=True)
90
- v = get_version(prefix)
91
- if v is None:
92
- raise ValueError
93
- return v
94
-
95
-
96
86
  @lru_cache(1)
97
87
  def get_version_pins() -> dict[str, str]:
98
88
  """Retrieve user-defined resource version pins.
pyobo/cli/cli.py CHANGED
@@ -90,12 +90,7 @@ def _has_no_download(prefix: str) -> bool:
90
90
  @lru_cache(maxsize=1)
91
91
  def _no_download() -> set[str]:
92
92
  """Get the list of prefixes not available as OBO."""
93
- return {
94
- prefix
95
- for prefix in bioregistry.read_registry()
96
- if bioregistry.get_obo_download(prefix) is None
97
- and bioregistry.get_owl_download(prefix) is None
98
- }
93
+ return {resource.prefix for resource in bioregistry.resources() if not resource.has_download()}
99
94
 
100
95
 
101
96
  main.add_command(lookup)
pyobo/constants.py CHANGED
@@ -4,7 +4,11 @@ from __future__ import annotations
4
4
 
5
5
  import logging
6
6
  import re
7
+ from collections.abc import Callable
8
+ from pathlib import Path
9
+ from typing import Literal, NamedTuple, TypeAlias
7
10
 
11
+ import bioregistry
8
12
  import pystow
9
13
  from typing_extensions import NotRequired, TypedDict
10
14
 
@@ -188,6 +192,8 @@ class IterHelperHelperDict(SlimGetOntologyKwargs):
188
192
  skip_set: set[str] | None
189
193
 
190
194
 
195
+ OntologyFormat: TypeAlias = Literal["obo", "owl", "json", "rdf"]
196
+
191
197
  #: from table 2 of the Functional OWL syntax definition
192
198
  #: at https://www.w3.org/TR/owl2-syntax/#IRIs
193
199
  DEFAULT_PREFIX_MAP = {
@@ -196,3 +202,20 @@ DEFAULT_PREFIX_MAP = {
196
202
  "xsd": "http://www.w3.org/2001/XMLSchema#",
197
203
  "owl": "http://www.w3.org/2002/07/owl#",
198
204
  }
205
+
206
+
207
+ class OntologyPathPack(NamedTuple):
208
+ """A format and path tuple."""
209
+
210
+ format: OntologyFormat
211
+ path: Path
212
+
213
+
214
+ #: Functions that get ontology files. Order matters in this list,
215
+ #: since order implicitly defines priority
216
+ ONTOLOGY_GETTERS: list[tuple[OntologyFormat, Callable[[str], str | None]]] = [
217
+ ("obo", bioregistry.get_obo_download),
218
+ ("owl", bioregistry.get_owl_download),
219
+ ("json", bioregistry.get_json_download),
220
+ ("rdf", bioregistry.get_rdf_download),
221
+ ]
pyobo/getters.py CHANGED
@@ -17,9 +17,11 @@ from pathlib import Path
17
17
  from textwrap import indent
18
18
  from typing import Any, TypeVar
19
19
 
20
+ import bioontologies.robot
20
21
  import bioregistry
21
22
  import click
22
23
  import pystow.utils
24
+ import requests.exceptions
23
25
  from tabulate import tabulate
24
26
  from tqdm.auto import tqdm
25
27
  from typing_extensions import Unpack
@@ -27,8 +29,10 @@ from typing_extensions import Unpack
27
29
  from .constants import (
28
30
  BUILD_SUBDIRECTORY_NAME,
29
31
  DATABASE_DIRECTORY,
32
+ ONTOLOGY_GETTERS,
30
33
  GetOntologyKwargs,
31
34
  IterHelperHelperDict,
35
+ OntologyPathPack,
32
36
  SlimGetOntologyKwargs,
33
37
  )
34
38
  from .identifier_utils import ParseError, wrap_norm_prefix
@@ -36,6 +40,7 @@ from .plugins import has_nomenclature_plugin, run_nomenclature_plugin
36
40
  from .struct import Obo
37
41
  from .struct.obo import from_obo_path, from_obonet
38
42
  from .utils.io import safe_open_writer
43
+ from .utils.misc import _get_version_from_artifact
39
44
  from .utils.path import ensure_path, prefix_directory_join
40
45
  from .version import get_git_hash, get_version
41
46
 
@@ -56,7 +61,7 @@ class UnhandledFormatError(NoBuildError):
56
61
 
57
62
 
58
63
  #: The following prefixes can not be loaded through ROBOT without
59
- #: turning off integrity checks
64
+ #: turning off integrity checks. This used to be part of _convert_to_obo
60
65
  REQUIRES_NO_ROBOT_CHECK = {
61
66
  "clo",
62
67
  "vo",
@@ -64,9 +69,18 @@ REQUIRES_NO_ROBOT_CHECK = {
64
69
  "orphanet",
65
70
  "foodon",
66
71
  "caloha",
72
+ # "aeon",
67
73
  }
68
74
 
69
75
 
76
+ def _convert_to_obo(path: Path) -> Path:
77
+ import bioontologies.robot
78
+
79
+ _converted_obo_path = path.with_suffix(".obo")
80
+ bioontologies.robot.convert(path, _converted_obo_path, check=False)
81
+ return _converted_obo_path
82
+
83
+
70
84
  @wrap_norm_prefix
71
85
  def get_ontology(
72
86
  prefix: str,
@@ -114,10 +128,20 @@ def get_ontology(
114
128
  """
115
129
  if force:
116
130
  force_process = True
131
+ if has_nomenclature_plugin(prefix):
132
+ obo = run_nomenclature_plugin(prefix, version=version)
133
+ if cache:
134
+ logger.debug("[%s] caching nomenclature plugin", prefix)
135
+ obo.write_default(force=force_process)
136
+ return obo
137
+
117
138
  if prefix == "uberon":
118
139
  logger.info("UBERON has so much garbage in it that defaulting to non-strict parsing")
119
140
  strict = False
120
141
 
142
+ if version is None:
143
+ version = _get_version_from_artifact(prefix)
144
+
121
145
  if force_process:
122
146
  obonet_json_gz_path = None
123
147
  elif not cache:
@@ -146,26 +170,14 @@ def get_ontology(
146
170
  else:
147
171
  logger.debug("[%s] no obonet cache found at %s", prefix, obonet_json_gz_path)
148
172
 
149
- if has_nomenclature_plugin(prefix):
150
- obo = run_nomenclature_plugin(prefix, version=version)
151
- if cache:
152
- logger.debug("[%s] caching nomenclature plugin", prefix)
153
- obo.write_default(force=force_process)
154
- return obo
155
-
156
- ontology_format, path = _ensure_ontology_path(prefix, force=force, version=version)
157
- if path is None:
173
+ path_pack = _ensure_ontology_path(prefix, force=force, version=version)
174
+ if path_pack is None:
158
175
  raise NoBuildError(prefix)
159
- elif ontology_format == "obo":
176
+ ontology_format, path = path_pack
177
+ if ontology_format == "obo":
160
178
  pass # all gucci
161
- elif ontology_format == "owl":
162
- import bioontologies.robot
163
-
164
- _converted_obo_path = path.with_suffix(".obo")
165
- if prefix in REQUIRES_NO_ROBOT_CHECK:
166
- robot_check = False
167
- bioontologies.robot.convert(path, _converted_obo_path, check=robot_check)
168
- path = _converted_obo_path
179
+ elif ontology_format in {"owl", "rdf"}:
180
+ path = _convert_to_obo(path)
169
181
  elif ontology_format == "json":
170
182
  from .struct.obograph import read_obograph
171
183
 
@@ -191,21 +203,21 @@ def get_ontology(
191
203
 
192
204
 
193
205
  def _ensure_ontology_path(
194
- prefix: str, force: bool, version: str | None
195
- ) -> tuple[str, Path] | tuple[None, None]:
196
- for ontology_format, url in [
197
- ("obo", bioregistry.get_obo_download(prefix)),
198
- ("owl", bioregistry.get_owl_download(prefix)),
199
- ("json", bioregistry.get_json_download(prefix)),
200
- ]:
201
- if url is not None:
202
- try:
203
- path = ensure_path(prefix, url=url, force=force, version=version)
204
- except (urllib.error.HTTPError, pystow.utils.DownloadError):
205
- continue
206
- else:
207
- return ontology_format, path
208
- return None, None
206
+ prefix: str, *, force: bool, version: str | None
207
+ ) -> OntologyPathPack | None:
208
+ for ontology_format, getter in ONTOLOGY_GETTERS:
209
+ url = getter(prefix)
210
+ if url is None:
211
+ continue
212
+ try:
213
+ path = ensure_path(prefix, url=url, force=force, version=version)
214
+ except (urllib.error.HTTPError, pystow.utils.DownloadError):
215
+ continue
216
+ except pystow.utils.UnexpectedDirectoryError:
217
+ continue # TODO report more info about the URL and the name it tried to make
218
+ else:
219
+ return OntologyPathPack(ontology_format, path)
220
+ return None
209
221
 
210
222
 
211
223
  SKIP = {
@@ -234,6 +246,7 @@ SKIP = {
234
246
  "gwascentral.phenotype": "website is down? or API changed?", # FIXME
235
247
  "gwascentral.study": "website is down? or API changed?", # FIXME
236
248
  "snomedct": "dead source",
249
+ "ero": "dead",
237
250
  }
238
251
 
239
252
  X = TypeVar("X")
@@ -349,6 +362,10 @@ def iter_helper_helper(
349
362
  logger.warning("[%s] unable to download - %s", prefix, e.reason)
350
363
  if strict and not bioregistry.is_deprecated(prefix):
351
364
  raise
365
+ except requests.exceptions.ConnectTimeout as e:
366
+ logger.warning("[%s] unable to download - %s", prefix, e)
367
+ if strict and not bioregistry.is_deprecated(prefix):
368
+ raise
352
369
  except ParseError as e:
353
370
  if not e.node:
354
371
  logger.warning("[%s] %s", prefix, e)
@@ -360,7 +377,7 @@ def iter_helper_helper(
360
377
  if "DrugBank" not in str(e):
361
378
  raise
362
379
  logger.warning("[drugbank] invalid credentials")
363
- except subprocess.CalledProcessError:
380
+ except (subprocess.CalledProcessError, bioontologies.robot.ROBOTError):
364
381
  logger.warning("[%s] ROBOT was unable to convert OWL to OBO", prefix)
365
382
  except ValueError as e:
366
383
  if _is_xml(e):
pyobo/sources/__init__.py CHANGED
@@ -6,7 +6,13 @@ from .antibodyregistry import AntibodyRegistryGetter
6
6
  from .bigg import BiGGCompartmentGetter, BiGGMetaboliteGetter, BiGGModelGetter, BiGGReactionGetter
7
7
  from .ccle import CCLEGetter
8
8
  from .cgnc import CGNCGetter
9
- from .chembl import ChEMBLCompoundGetter, ChEMBLTargetGetter
9
+ from .chembl import (
10
+ ChEMBLCellGetter,
11
+ ChEMBLCompoundGetter,
12
+ ChEMBLMechanismGetter,
13
+ ChEMBLTargetGetter,
14
+ ChEMBLTissueGetter,
15
+ )
10
16
  from .civic_gene import CIVICGeneGetter
11
17
  from .clinicaltrials import ClinicalTrialsGetter
12
18
  from .complexportal import ComplexPortalGetter
@@ -26,6 +32,7 @@ from .geonames import GeonamesFeatureGetter, GeonamesGetter
26
32
  from .gtdb import GTDBGetter
27
33
  from .gwascentral import GWASCentralPhenotypeGetter, GWASCentralStudyGetter
28
34
  from .hgnc import HGNCGetter, HGNCGroupGetter
35
+ from .iana_media_type import IANAGetter
29
36
  from .icd import ICD10Getter, ICD11Getter
30
37
  from .intact import IntactGetter
31
38
  from .interpro import InterProGetter
@@ -60,6 +67,7 @@ from .selventa import SCHEMGetter, SCOMPGetter, SDISGetter, SFAMGetter
60
67
  from .sgd import SGDGetter
61
68
  from .signor import SignorGetter
62
69
  from .slm import SLMGetter
70
+ from .spdx import SPDXLicenseGetter
63
71
  from .umls import UMLSGetter, UMLSSTyGetter
64
72
  from .unimod import UnimodGetter
65
73
  from .uniprot import UniProtGetter, UniProtPtmGetter
@@ -79,8 +87,11 @@ __all__ = [
79
87
  "CONSOGetter",
80
88
  "CPTGetter",
81
89
  "CVXGetter",
90
+ "ChEMBLCellGetter",
82
91
  "ChEMBLCompoundGetter",
92
+ "ChEMBLMechanismGetter",
83
93
  "ChEMBLTargetGetter",
94
+ "ChEMBLTissueGetter",
84
95
  "ClinicalTrialsGetter",
85
96
  "ComplexPortalGetter",
86
97
  "CreditGetter",
@@ -100,6 +111,7 @@ __all__ = [
100
111
  "GeonamesGetter",
101
112
  "HGNCGetter",
102
113
  "HGNCGroupGetter",
114
+ "IANAGetter",
103
115
  "ICD10Getter",
104
116
  "ICD11Getter",
105
117
  "ITISGetter",
@@ -142,6 +154,7 @@ __all__ = [
142
154
  "SFAMGetter",
143
155
  "SGDGetter",
144
156
  "SLMGetter",
157
+ "SPDXLicenseGetter",
145
158
  "SignorGetter",
146
159
  "UMLSGetter",
147
160
  "UMLSSTyGetter",
@@ -1,9 +1,15 @@
1
1
  """Resources from ChEMBL."""
2
2
 
3
+ from .chembl_cell import ChEMBLCellGetter
3
4
  from .chembl_compound import ChEMBLCompoundGetter
5
+ from .chembl_mechanism import ChEMBLMechanismGetter
4
6
  from .chembl_target import ChEMBLTargetGetter
7
+ from .chembl_tissue import ChEMBLTissueGetter
5
8
 
6
9
  __all__ = [
10
+ "ChEMBLCellGetter",
7
11
  "ChEMBLCompoundGetter",
12
+ "ChEMBLMechanismGetter",
8
13
  "ChEMBLTargetGetter",
14
+ "ChEMBLTissueGetter",
9
15
  ]
@@ -0,0 +1,94 @@
1
+ """Converter for ChEMBL cells."""
2
+
3
+ import logging
4
+ from collections.abc import Iterable
5
+
6
+ import chembl_downloader
7
+
8
+ from pyobo.struct import Obo, Reference, Term
9
+ from pyobo.struct.typedef import derives_from_organism, exact_match
10
+
11
+ __all__ = [
12
+ "ChEMBLCellGetter",
13
+ ]
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ PREFIX = "chembl.cell"
18
+
19
+
20
+ class ChEMBLCellGetter(Obo):
21
+ """An ontology representation of ChEMBL cells."""
22
+
23
+ ontology = PREFIX
24
+ bioversions_key = "chembl"
25
+ typedefs = [exact_match, derives_from_organism]
26
+
27
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
28
+ """Iterate over terms in the ontology."""
29
+ return iter_terms(version=self._version_or_raise)
30
+
31
+
32
+ QUERY = """\
33
+ SELECT
34
+ CHEMBL_ID,
35
+ CELL_NAME,
36
+ CELL_DESCRIPTION,
37
+ CELL_SOURCE_TISSUE,
38
+ CELL_SOURCE_TAX_ID,
39
+ CLO_ID,
40
+ EFO_ID,
41
+ CELLOSAURUS_ID,
42
+ CL_LINCS_ID,
43
+ CELL_ONTOLOGY_ID
44
+ FROM CELL_DICTIONARY
45
+ """
46
+
47
+
48
+ def iter_terms(version: str | None = None) -> Iterable[Term]:
49
+ """Iterate over ChEMBL cell terms."""
50
+ with chembl_downloader.cursor(version=version) as cursor:
51
+ cursor.execute(QUERY)
52
+ for (
53
+ chembl_id,
54
+ name,
55
+ desc,
56
+ _source_tissue,
57
+ taxid,
58
+ clo,
59
+ efo,
60
+ cellosaurus,
61
+ lincs,
62
+ cl,
63
+ ) in cursor.fetchall():
64
+ term = Term(
65
+ reference=Reference(prefix=PREFIX, identifier=chembl_id, name=name),
66
+ definition=desc if desc and desc != name else None,
67
+ )
68
+ if taxid:
69
+ term.append_relationship(
70
+ derives_from_organism, Reference(prefix="ncbitaxon", identifier=taxid)
71
+ )
72
+ # TODO how to annotate tissue, via TISSUE_DICTIONARY
73
+ if clo:
74
+ term.append_exact_match(
75
+ Reference(prefix="clo", identifier=clo.removeprefix("CLO_"))
76
+ )
77
+ if efo:
78
+ term.append_exact_match(
79
+ Reference(prefix="efo", identifier=efo.removeprefix("EFO_").removeprefix("EFO"))
80
+ )
81
+ if cellosaurus:
82
+ term.append_exact_match(
83
+ Reference(prefix="cellosaurus", identifier=cellosaurus.removeprefix("CVCL_"))
84
+ )
85
+ if lincs:
86
+ # with LCL- included!
87
+ term.append_exact_match(Reference(prefix="lincs.cell", identifier=lincs))
88
+ if cl:
89
+ term.append_exact_match(Reference(prefix="cl", identifier=cl.removeprefix("CL_")))
90
+ yield term
91
+
92
+
93
+ if __name__ == "__main__":
94
+ ChEMBLCellGetter.cli()
@@ -0,0 +1,81 @@
1
+ """Converter for ChEMBL mechanisms."""
2
+
3
+ import logging
4
+ from collections.abc import Iterable
5
+
6
+ import chembl_downloader
7
+
8
+ from pyobo.struct import CHARLIE_TERM, PYOBO_INJECTED, Obo, Term
9
+ from pyobo.struct.typedef import exact_match
10
+
11
+ __all__ = [
12
+ "ChEMBLMechanismGetter",
13
+ ]
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ PREFIX = "chembl.mechanism"
18
+ QUERY = "SELECT * from ACTION_TYPE"
19
+
20
+ ROOT = (
21
+ Term.default(PREFIX, "mechanism", name="mechanism")
22
+ .append_contributor(CHARLIE_TERM)
23
+ .append_comment(PYOBO_INJECTED)
24
+ )
25
+
26
+
27
+ class ChEMBLMechanismGetter(Obo):
28
+ """An ontology representation of ChEMBL mechanisms."""
29
+
30
+ ontology = PREFIX
31
+ bioversions_key = "chembl"
32
+ typedefs = [exact_match]
33
+ root_terms = [ROOT.reference]
34
+
35
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
36
+ """Iterate over terms in the ontology."""
37
+ return iter_terms(version=self._version_or_raise)
38
+
39
+
40
+ def normalize_chembl_mechanism(name: str) -> str:
41
+ """Normalize a ChEMBL mechanism name into an identifier."""
42
+ return name.lower().replace(" ", "-")
43
+
44
+
45
+ def _norm_name(name: str) -> str:
46
+ return name.lower().replace("rnai ", "RNAi ")
47
+
48
+
49
+ def get_pattern(version: str | None = None) -> str:
50
+ """Get a pattern."""
51
+ df = chembl_downloader.query("SELECT action_type from ACTION_TYPE", version=version)
52
+ parts = "|".join(sorted(normalize_chembl_mechanism(name) for (name,) in df.values))
53
+ return f"^[{parts}]$"
54
+
55
+
56
+ def iter_terms(version: str) -> Iterable[Term]:
57
+ """Iterate over ChEMBL mechanisms."""
58
+ df = chembl_downloader.query(QUERY, version=version)
59
+ terms = {}
60
+ parents = {}
61
+ for name, _description, parent in df.values:
62
+ identifier = normalize_chembl_mechanism(name)
63
+ terms[name] = Term.from_triple(prefix=PREFIX, identifier=identifier, name=_norm_name(name))
64
+ if name != parent: # protect against "other" which is a child of itself
65
+ parents[name] = parent
66
+ for child, parent in parents.items():
67
+ terms[child].append_parent(terms[parent])
68
+
69
+ # these are the three top-level things in the hierarchy, which
70
+ # we annotate onto a dummy parent term
71
+ for name in [
72
+ "POSITIVE MODULATOR",
73
+ "NEGATIVE MODULATOR",
74
+ "OTHER",
75
+ ]:
76
+ terms[name].append_parent(ROOT)
77
+ yield from terms.values()
78
+
79
+
80
+ if __name__ == "__main__":
81
+ ChEMBLMechanismGetter.cli()