pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/sources/biogrid.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
from collections.abc import Mapping
|
|
4
4
|
from functools import partial
|
|
5
|
-
from typing import Optional
|
|
6
5
|
|
|
7
6
|
import pandas as pd
|
|
8
7
|
|
|
@@ -43,7 +42,7 @@ taxonomy_remapping = { # so much for official names
|
|
|
43
42
|
}
|
|
44
43
|
|
|
45
44
|
|
|
46
|
-
def _lookup(name: str) ->
|
|
45
|
+
def _lookup(name: str) -> str | None:
|
|
47
46
|
if name in taxonomy_remapping:
|
|
48
47
|
return taxonomy_remapping[name]
|
|
49
48
|
return get_ncbitaxon_id(name)
|
pyobo/sources/ccle.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
import tarfile
|
|
4
4
|
from collections.abc import Iterable
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Optional
|
|
7
6
|
|
|
8
7
|
import pandas as pd
|
|
9
8
|
import pystow
|
|
@@ -11,7 +10,6 @@ import pystow
|
|
|
11
10
|
from pyobo import Obo, Reference, Term
|
|
12
11
|
|
|
13
12
|
__all__ = [
|
|
14
|
-
"get_obo",
|
|
15
13
|
"CCLEGetter",
|
|
16
14
|
]
|
|
17
15
|
|
|
@@ -23,21 +21,18 @@ class CCLEGetter(Obo):
|
|
|
23
21
|
"""An ontology representation of the Cancer Cell Line Encyclopedia's cell lines."""
|
|
24
22
|
|
|
25
23
|
ontology = bioregistry_key = PREFIX
|
|
24
|
+
name = "Cancer Cell Line Encyclopedia Cell Line"
|
|
26
25
|
|
|
27
26
|
def __post_init__(self):
|
|
28
27
|
self.data_version = VERSION
|
|
28
|
+
super().__post_init__()
|
|
29
29
|
|
|
30
30
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
31
31
|
"""Iterate over terms in the ontology."""
|
|
32
32
|
return iter_terms(version=self._version_or_raise, force=force)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def
|
|
36
|
-
"""Get CCLE Cells as OBO."""
|
|
37
|
-
return CCLEGetter(force=force)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def iter_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
|
|
35
|
+
def iter_terms(version: str | None = None, force: bool = False) -> Iterable[Term]:
|
|
41
36
|
"""Iterate over CCLE Cells."""
|
|
42
37
|
df = ensure_df(version=version, force=force)
|
|
43
38
|
for identifier, depmap_id, name in df.values:
|
|
@@ -54,21 +49,21 @@ def get_ccle_static_version() -> str:
|
|
|
54
49
|
return "2019"
|
|
55
50
|
|
|
56
51
|
|
|
57
|
-
def get_url(version:
|
|
52
|
+
def get_url(version: str | None = None) -> str:
|
|
58
53
|
"""Get the cBioPortal URL for the given version of CCLE's cell lines."""
|
|
59
54
|
if version is None:
|
|
60
55
|
version = get_ccle_static_version()
|
|
61
56
|
return f"https://cbioportal-datahub.s3.amazonaws.com/ccle_broad_{version}.tar.gz"
|
|
62
57
|
|
|
63
58
|
|
|
64
|
-
def get_inner(version:
|
|
59
|
+
def get_inner(version: str | None = None) -> str:
|
|
65
60
|
"""Get the inner tarfile path."""
|
|
66
61
|
if version is None:
|
|
67
62
|
version = get_ccle_static_version()
|
|
68
63
|
return f"ccle_broad_{version}/data_clinical_sample.txt"
|
|
69
64
|
|
|
70
65
|
|
|
71
|
-
def ensure(version:
|
|
66
|
+
def ensure(version: str | None = None, **kwargs) -> Path:
|
|
72
67
|
"""Ensure the given version is downloaded."""
|
|
73
68
|
if version is None:
|
|
74
69
|
version = get_ccle_static_version()
|
|
@@ -76,7 +71,7 @@ def ensure(version: Optional[str] = None, **kwargs) -> Path:
|
|
|
76
71
|
return pystow.ensure("pyobo", "raw", PREFIX, version, url=url, **kwargs)
|
|
77
72
|
|
|
78
73
|
|
|
79
|
-
def ensure_df(version:
|
|
74
|
+
def ensure_df(version: str | None = None, force: bool = False) -> pd.DataFrame:
|
|
80
75
|
"""Get the CCLE clinical sample dataframe."""
|
|
81
76
|
if version is None:
|
|
82
77
|
version = get_ccle_static_version()
|
pyobo/sources/cgnc.py
CHANGED
pyobo/sources/chebi.py
CHANGED
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
"""Converter for ChEMBL.
|
|
2
|
-
|
|
3
|
-
Run with ``python -m pyobo.sources.chembl -vv``.
|
|
4
|
-
"""
|
|
1
|
+
"""Converter for ChEMBL Compounds."""
|
|
5
2
|
|
|
6
3
|
import logging
|
|
7
4
|
from collections.abc import Iterable
|
|
8
|
-
from contextlib import closing
|
|
9
5
|
|
|
10
6
|
import chembl_downloader
|
|
11
7
|
|
|
@@ -50,28 +46,20 @@ class ChEMBLCompoundGetter(Obo):
|
|
|
50
46
|
return iter_terms(version=self._version_or_raise)
|
|
51
47
|
|
|
52
48
|
|
|
53
|
-
def get_obo(force: bool = False) -> Obo:
|
|
54
|
-
"""Return ChEMBL Compounds as OBO."""
|
|
55
|
-
return ChEMBLCompoundGetter(force=force)
|
|
56
|
-
|
|
57
|
-
|
|
58
49
|
def iter_terms(version: str) -> Iterable[Term]:
|
|
59
50
|
"""Iterate over ChEMBL compounds."""
|
|
60
|
-
with chembl_downloader.
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
if inchi_key:
|
|
73
|
-
term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
|
|
74
|
-
yield term
|
|
51
|
+
with chembl_downloader.cursor(version=version) as cursor:
|
|
52
|
+
cursor.execute(QUERY)
|
|
53
|
+
for chembl_id, name, smiles, inchi, inchi_key in cursor.fetchall():
|
|
54
|
+
# TODO add xrefs?
|
|
55
|
+
term = Term.from_triple(prefix=PREFIX, identifier=chembl_id, name=name)
|
|
56
|
+
if smiles:
|
|
57
|
+
term.annotate_string(has_smiles, smiles)
|
|
58
|
+
if inchi:
|
|
59
|
+
term.annotate_string(has_inchi, inchi)
|
|
60
|
+
if inchi_key:
|
|
61
|
+
term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
|
|
62
|
+
yield term
|
|
75
63
|
|
|
76
64
|
|
|
77
65
|
if __name__ == "__main__":
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Converter for ChEMBL targets."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
|
|
7
|
+
import chembl_downloader
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
|
|
10
|
+
from pyobo import default_reference
|
|
11
|
+
from pyobo.struct import Obo, Reference, Term
|
|
12
|
+
from pyobo.struct.typedef import (
|
|
13
|
+
exact_match,
|
|
14
|
+
has_component,
|
|
15
|
+
has_member,
|
|
16
|
+
has_participant,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"ChEMBLTargetGetter",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
from pyobo.utils.path import ensure_df
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
PREFIX = "chembl.target"
|
|
28
|
+
|
|
29
|
+
TTYPE_QUERY = """\
|
|
30
|
+
SELECT TARGET_TYPE, TARGET_DESC, PARENT_TYPE
|
|
31
|
+
FROM TARGET_TYPE
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
QUERY = """\
|
|
35
|
+
SELECT
|
|
36
|
+
CHEMBL_ID,
|
|
37
|
+
PREF_NAME,
|
|
38
|
+
TARGET_TYPE,
|
|
39
|
+
TAX_ID
|
|
40
|
+
FROM TARGET_DICTIONARY
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ChEMBLTargetGetter(Obo):
|
|
45
|
+
"""An ontology representation of ChEMBL targets."""
|
|
46
|
+
|
|
47
|
+
ontology = PREFIX
|
|
48
|
+
bioversions_key = "chembl"
|
|
49
|
+
typedefs = [exact_match, has_component, has_member, has_participant]
|
|
50
|
+
root_terms = [
|
|
51
|
+
default_reference(PREFIX, "undefined"),
|
|
52
|
+
default_reference(PREFIX, "molecular"),
|
|
53
|
+
default_reference(PREFIX, "non-molecular"),
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
57
|
+
"""Iterate over terms in the ontology."""
|
|
58
|
+
return iter_terms(version=self._version_or_raise)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def iter_terms(version: str) -> Iterable[Term]:
|
|
62
|
+
"""Iterate over ChEMBL targets."""
|
|
63
|
+
chembl_to_uniprots = get_chembl_protein_equivalences(version=version)
|
|
64
|
+
target_types: dict[str, Term] = {}
|
|
65
|
+
parents: dict[str, str] = {}
|
|
66
|
+
with chembl_downloader.cursor(version=version) as cursor:
|
|
67
|
+
cursor.execute(TTYPE_QUERY)
|
|
68
|
+
for target_type, desc, parent in cursor.fetchall():
|
|
69
|
+
identifier = target_type.lower().replace(" ", "-")
|
|
70
|
+
target_types[target_type] = Term(
|
|
71
|
+
reference=default_reference(PREFIX, identifier, name=target_type),
|
|
72
|
+
definition=desc,
|
|
73
|
+
)
|
|
74
|
+
if parent:
|
|
75
|
+
parents[target_type] = parent
|
|
76
|
+
|
|
77
|
+
for child, parent in parents.items():
|
|
78
|
+
target_types[child].append_parent(target_types[parent])
|
|
79
|
+
|
|
80
|
+
yield from target_types.values()
|
|
81
|
+
|
|
82
|
+
with chembl_downloader.cursor(version=version) as cursor:
|
|
83
|
+
cursor.execute(QUERY)
|
|
84
|
+
for chembl_id, name, target_type, ncbitaxon_id in cursor.fetchall():
|
|
85
|
+
term = Term.from_triple(prefix=PREFIX, identifier=chembl_id, name=name)
|
|
86
|
+
if ncbitaxon_id:
|
|
87
|
+
term.set_species(str(ncbitaxon_id))
|
|
88
|
+
term.append_parent(target_types[target_type])
|
|
89
|
+
|
|
90
|
+
uniprot_ids = chembl_to_uniprots.get(chembl_id)
|
|
91
|
+
if uniprot_ids is None:
|
|
92
|
+
pass
|
|
93
|
+
elif target_type in {
|
|
94
|
+
"PROTEIN COMPLEX",
|
|
95
|
+
"CHIMERIC PROTEIN",
|
|
96
|
+
"PROTEIN COMPLEX GROUP",
|
|
97
|
+
"PROTEIN NUCLEIC-ACID COMPLEX",
|
|
98
|
+
"SELECTIVITY GROUP",
|
|
99
|
+
}:
|
|
100
|
+
for uniprot_id in uniprot_ids:
|
|
101
|
+
term.annotate_object(
|
|
102
|
+
has_component, Reference(prefix="uniprot", identifier=uniprot_id)
|
|
103
|
+
)
|
|
104
|
+
elif target_type == "PROTEIN FAMILY":
|
|
105
|
+
for uniprot_id in uniprot_ids:
|
|
106
|
+
term.annotate_object(
|
|
107
|
+
has_member, Reference(prefix="uniprot", identifier=uniprot_id)
|
|
108
|
+
)
|
|
109
|
+
elif target_type == "PROTEIN-PROTEIN INTERACTION":
|
|
110
|
+
for uniprot_id in uniprot_ids:
|
|
111
|
+
term.annotate_object(
|
|
112
|
+
has_participant, Reference(prefix="uniprot", identifier=uniprot_id)
|
|
113
|
+
)
|
|
114
|
+
elif target_type == "SINGLE PROTEIN":
|
|
115
|
+
if len(uniprot_ids) == 1:
|
|
116
|
+
term.append_exact_match(Reference(prefix="uniprot", identifier=uniprot_ids[0]))
|
|
117
|
+
else:
|
|
118
|
+
tqdm.write(
|
|
119
|
+
f"[chembl.target:{chembl_id}] multiple mappings found to single protein: {uniprot_ids}"
|
|
120
|
+
)
|
|
121
|
+
for uniprot_id in uniprot_ids:
|
|
122
|
+
term.append_xref(Reference(prefix="uniprot", identifier=uniprot_id))
|
|
123
|
+
elif len(uniprot_ids) == 1:
|
|
124
|
+
luid = uniprot_ids[0]
|
|
125
|
+
if luid.startswith("ENSG"):
|
|
126
|
+
reference = Reference(prefix="ensembl", identifier=luid)
|
|
127
|
+
else:
|
|
128
|
+
reference = Reference(prefix="uniprot", identifier=luid)
|
|
129
|
+
term.append_exact_match(reference)
|
|
130
|
+
else:
|
|
131
|
+
tqdm.write(
|
|
132
|
+
f"[chembl.target:{chembl_id}] need to handle multiple uniprots for {target_type} - {uniprot_ids}"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
yield term
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def get_chembl_protein_equivalences(version: str | None = None) -> dict[str, list[str]]:
|
|
139
|
+
"""Get ChEMBL protein equivalences."""
|
|
140
|
+
if version is None:
|
|
141
|
+
version = chembl_downloader.latest()
|
|
142
|
+
url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}/chembl_uniprot_mapping.txt"
|
|
143
|
+
df = ensure_df(
|
|
144
|
+
PREFIX,
|
|
145
|
+
url=url,
|
|
146
|
+
sep="\t",
|
|
147
|
+
skiprows=1,
|
|
148
|
+
usecols=[0, 1],
|
|
149
|
+
names=["uniprot", "chembl"],
|
|
150
|
+
header=None,
|
|
151
|
+
# names=[TARGET_ID, SOURCE_ID], # switch around
|
|
152
|
+
)
|
|
153
|
+
dd = defaultdict(list)
|
|
154
|
+
for uniprot, chembl in df.values:
|
|
155
|
+
dd[chembl].append(uniprot)
|
|
156
|
+
return dict(dd)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
if __name__ == "__main__":
|
|
160
|
+
ChEMBLTargetGetter.cli()
|
pyobo/sources/civic_gene.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
"""Converter for CiVIC Genes."""
|
|
2
2
|
|
|
3
|
+
import datetime
|
|
3
4
|
from collections.abc import Iterable
|
|
4
|
-
from typing import Optional
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
-
from pyobo
|
|
8
|
+
from pyobo import default_reference
|
|
9
|
+
from pyobo.struct import Obo, Reference, Term, TypeDef
|
|
9
10
|
from pyobo.utils.path import ensure_df
|
|
10
11
|
|
|
11
12
|
__all__ = [
|
|
@@ -15,38 +16,77 @@ __all__ = [
|
|
|
15
16
|
PREFIX = "civic.gid"
|
|
16
17
|
URL = "https://civicdb.org/downloads/nightly/nightly-GeneSummaries.tsv"
|
|
17
18
|
|
|
19
|
+
GENE = Term(reference=default_reference(PREFIX, "gene", name="gene"))
|
|
20
|
+
FACTOR = Term(reference=default_reference(PREFIX, "factor", name="factor"))
|
|
21
|
+
FUSION = Term(reference=default_reference(PREFIX, "fusion", name="fusion"))
|
|
22
|
+
HAS_3P = TypeDef.default(PREFIX, "has3p", name="has 3' gene", is_metadata_tag=False)
|
|
23
|
+
HAS_5P = TypeDef.default(PREFIX, "has5p", name="has 5' gene", is_metadata_tag=False)
|
|
18
24
|
|
|
19
|
-
|
|
20
|
-
return int(t.identifier)
|
|
25
|
+
TYPES = {"Gene": GENE, "Factor": FACTOR, "Fusion": FUSION}
|
|
21
26
|
|
|
22
27
|
|
|
23
28
|
class CIVICGeneGetter(Obo):
|
|
24
29
|
"""An ontology representation of CiVIC's gene nomenclature."""
|
|
25
30
|
|
|
26
31
|
bioversions_key = ontology = PREFIX
|
|
27
|
-
|
|
32
|
+
typedefs = [HAS_3P, HAS_5P]
|
|
33
|
+
root_terms = [GENE.reference, FACTOR.reference, FUSION.reference]
|
|
28
34
|
|
|
29
35
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
30
36
|
"""Iterate over gene terms for CiVIC."""
|
|
31
|
-
yield from
|
|
37
|
+
yield from (GENE, FACTOR, FUSION)
|
|
38
|
+
yield from get_terms(self._version_or_raise, force=force)
|
|
32
39
|
|
|
33
40
|
|
|
34
|
-
def get_terms(version:
|
|
41
|
+
def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
35
42
|
"""Get CIVIC terms."""
|
|
36
|
-
|
|
37
|
-
# version_dt: datetime.date = dateutil.parser.parse(version)
|
|
38
|
-
# else:
|
|
39
|
-
# version_dt: datetime.date = datetime.today()
|
|
40
|
-
# version = version_dt.strftime("01-%b-%Y")
|
|
43
|
+
dt = datetime.datetime.strptime(version, "%Y-%m-%d")
|
|
41
44
|
# version is like 01-Feb-2024
|
|
42
|
-
|
|
45
|
+
dt2 = datetime.datetime.strftime(dt, "%d-%b-%Y")
|
|
46
|
+
url = f"https://civicdb.org/downloads/{dt2}/{dt2}-GeneSummaries.tsv"
|
|
43
47
|
df = ensure_df(prefix=PREFIX, url=url, sep="\t", force=force, dtype=str, version=version)
|
|
44
|
-
for
|
|
48
|
+
for (
|
|
49
|
+
identifier,
|
|
50
|
+
_,
|
|
51
|
+
type,
|
|
52
|
+
name,
|
|
53
|
+
aliases,
|
|
54
|
+
description,
|
|
55
|
+
_last_review_date,
|
|
56
|
+
_flag,
|
|
57
|
+
entrez_id,
|
|
58
|
+
ncit_id,
|
|
59
|
+
_5p_status,
|
|
60
|
+
_3p_status,
|
|
61
|
+
five_p_id,
|
|
62
|
+
_5p_name,
|
|
63
|
+
_5p_ncbigene,
|
|
64
|
+
three_p_id,
|
|
65
|
+
_3p_name,
|
|
66
|
+
_3p_ncbigene,
|
|
67
|
+
) in df.values:
|
|
45
68
|
term = Term(
|
|
46
69
|
reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
|
|
47
70
|
definition=description if pd.notna(description) else None,
|
|
48
71
|
)
|
|
49
|
-
term.
|
|
72
|
+
term.append_parent(TYPES[type])
|
|
73
|
+
if pd.notna(entrez_id):
|
|
74
|
+
term.append_exact_match(Reference(prefix="ncbigene", identifier=entrez_id))
|
|
75
|
+
if pd.notna(ncit_id):
|
|
76
|
+
term.append_exact_match(Reference(prefix="ncit", identifier=ncit_id))
|
|
77
|
+
if pd.notna(aliases):
|
|
78
|
+
for alias in aliases.split(","):
|
|
79
|
+
if alias != name:
|
|
80
|
+
term.append_synonym(alias.strip())
|
|
81
|
+
if pd.notna(five_p_id):
|
|
82
|
+
term.append_relationship(
|
|
83
|
+
HAS_5P, Reference(prefix=PREFIX, identifier=five_p_id, name=_5p_name)
|
|
84
|
+
)
|
|
85
|
+
if pd.notna(three_p_id):
|
|
86
|
+
term.append_relationship(
|
|
87
|
+
HAS_3P, Reference(prefix=PREFIX, identifier=three_p_id, name=_3p_name)
|
|
88
|
+
)
|
|
89
|
+
|
|
50
90
|
yield term
|
|
51
91
|
|
|
52
92
|
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""A source for ClinicalTrials.gov."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
|
|
5
|
+
from clinicaltrials_downloader import get_studies_slim
|
|
6
|
+
|
|
7
|
+
from pyobo import Obo, Reference, Term, TypeDef, default_reference
|
|
8
|
+
from pyobo.struct.struct import CHARLIE_TERM, HUMAN_TERM, PYOBO_INJECTED
|
|
9
|
+
from pyobo.struct.typedef import has_contributor
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ClinicalTrialsGetter",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
PREFIX = "clinicaltrials"
|
|
16
|
+
|
|
17
|
+
INVESTIGATES_CONDITION = TypeDef(
|
|
18
|
+
reference=default_reference(
|
|
19
|
+
prefix=PREFIX, identifier="investigates_condition", name="investigates condition"
|
|
20
|
+
),
|
|
21
|
+
is_metadata_tag=True,
|
|
22
|
+
)
|
|
23
|
+
HAS_INTERVENTION = TypeDef(
|
|
24
|
+
reference=default_reference(
|
|
25
|
+
prefix=PREFIX, identifier="has_intervention", name="has intervention"
|
|
26
|
+
),
|
|
27
|
+
is_metadata_tag=True,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
STUDY_TERM = Term(reference=default_reference(PREFIX, "study", name="study"))
|
|
31
|
+
|
|
32
|
+
CLINICAL_TRIAL_TERM = Term(
|
|
33
|
+
reference=default_reference(PREFIX, "clinical-trial", name="clinical trial")
|
|
34
|
+
).append_parent(STUDY_TERM)
|
|
35
|
+
|
|
36
|
+
INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
|
|
37
|
+
reference=default_reference(
|
|
38
|
+
PREFIX, "interventional-clinical-trial", name="interventional clinical trial"
|
|
39
|
+
)
|
|
40
|
+
).append_parent(CLINICAL_TRIAL_TERM)
|
|
41
|
+
|
|
42
|
+
RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
|
|
43
|
+
reference=default_reference(
|
|
44
|
+
PREFIX,
|
|
45
|
+
"randomized-interventional-clinical-trial",
|
|
46
|
+
name="randomized interventional clinical trial",
|
|
47
|
+
)
|
|
48
|
+
).append_parent(INTERVENTIONAL_CLINICAL_TRIAL_TERM)
|
|
49
|
+
|
|
50
|
+
NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
|
|
51
|
+
reference=default_reference(
|
|
52
|
+
PREFIX,
|
|
53
|
+
"non-randomized-interventional-clinical-trial",
|
|
54
|
+
name="non-randomized interventional clinical trial",
|
|
55
|
+
)
|
|
56
|
+
).append_parent(INTERVENTIONAL_CLINICAL_TRIAL_TERM)
|
|
57
|
+
|
|
58
|
+
OBSERVATIONAL_CLINICAL_TRIAL_TERM = Term(
|
|
59
|
+
reference=default_reference(
|
|
60
|
+
PREFIX, "observational-clinical-trial", name="observational clinical trial"
|
|
61
|
+
)
|
|
62
|
+
).append_parent(CLINICAL_TRIAL_TERM)
|
|
63
|
+
|
|
64
|
+
EXPANDED_ACCESS_STUDY_TERM = Term(
|
|
65
|
+
reference=default_reference(PREFIX, "expanded-access-study", name="expanded access study")
|
|
66
|
+
).append_parent(STUDY_TERM)
|
|
67
|
+
|
|
68
|
+
TERMS = [
|
|
69
|
+
STUDY_TERM,
|
|
70
|
+
CLINICAL_TRIAL_TERM,
|
|
71
|
+
OBSERVATIONAL_CLINICAL_TRIAL_TERM,
|
|
72
|
+
INTERVENTIONAL_CLINICAL_TRIAL_TERM,
|
|
73
|
+
EXPANDED_ACCESS_STUDY_TERM,
|
|
74
|
+
RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
|
|
75
|
+
NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
# These were identified as the 4 possibilities for study
|
|
79
|
+
# types in ClinicalTrials.gov. See summary script at
|
|
80
|
+
# https://gist.github.com/cthoyt/12a3cb3c63ad68d73fe5a2f0d506526f
|
|
81
|
+
PARENTS: dict[tuple[str | None, str | None], Term] = {
|
|
82
|
+
("INTERVENTIONAL", None): INTERVENTIONAL_CLINICAL_TRIAL_TERM,
|
|
83
|
+
("INTERVENTIONAL", "NA"): INTERVENTIONAL_CLINICAL_TRIAL_TERM,
|
|
84
|
+
("INTERVENTIONAL", "RANDOMIZED"): RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
|
|
85
|
+
("INTERVENTIONAL", "NON_RANDOMIZED"): NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
|
|
86
|
+
("OBSERVATIONAL", None): OBSERVATIONAL_CLINICAL_TRIAL_TERM,
|
|
87
|
+
("EXPANDED_ACCESS", None): EXPANDED_ACCESS_STUDY_TERM,
|
|
88
|
+
(None, None): STUDY_TERM,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ClinicalTrialsGetter(Obo):
|
|
93
|
+
"""Get the ClinicalTrials.gov database as an ontology."""
|
|
94
|
+
|
|
95
|
+
ontology = PREFIX
|
|
96
|
+
dynamic_version = True
|
|
97
|
+
typedefs = [has_contributor, INVESTIGATES_CONDITION, HAS_INTERVENTION]
|
|
98
|
+
root_terms = [STUDY_TERM.reference]
|
|
99
|
+
|
|
100
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
101
|
+
"""Iterate over terms for studies."""
|
|
102
|
+
yield CHARLIE_TERM
|
|
103
|
+
yield HUMAN_TERM
|
|
104
|
+
for term in TERMS:
|
|
105
|
+
term.append_contributor(CHARLIE_TERM)
|
|
106
|
+
term.append_comment(PYOBO_INJECTED)
|
|
107
|
+
yield term
|
|
108
|
+
yield from iterate_studies()
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def iterate_studies(*, force: bool = False) -> Iterable[Term]:
|
|
112
|
+
"""Iterate over terms for studies."""
|
|
113
|
+
studies = get_studies_slim(force=force)
|
|
114
|
+
for study in studies:
|
|
115
|
+
yield _process_study(study)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _process_study(raw_study) -> Term:
|
|
119
|
+
protocol_section = raw_study["protocolSection"]
|
|
120
|
+
identification_module = protocol_section["identificationModule"]
|
|
121
|
+
identifier = identification_module["nctId"]
|
|
122
|
+
|
|
123
|
+
name = identification_module.get("officialTitle")
|
|
124
|
+
synonym = identification_module.get("briefTitle")
|
|
125
|
+
if synonym and not name:
|
|
126
|
+
name, synonym = synonym, None
|
|
127
|
+
|
|
128
|
+
term = Term(
|
|
129
|
+
reference=Reference(prefix=PREFIX, identifier=identifier, name=name), type="Instance"
|
|
130
|
+
)
|
|
131
|
+
if synonym:
|
|
132
|
+
term.append_synonym(synonym)
|
|
133
|
+
|
|
134
|
+
design_module = protocol_section.get("designModule", {})
|
|
135
|
+
study_type = design_module.get("studyType")
|
|
136
|
+
allocation = design_module.get("designInfo", {}).get("allocation")
|
|
137
|
+
term.append_parent(PARENTS[study_type, allocation])
|
|
138
|
+
|
|
139
|
+
references_module = protocol_section.get("referencesModule", {})
|
|
140
|
+
for reference in references_module.get("references", []):
|
|
141
|
+
if pubmed_id := reference.get("pmid"):
|
|
142
|
+
term.append_see_also(Reference(prefix="pubmed", identifier=pubmed_id))
|
|
143
|
+
|
|
144
|
+
derived_section = raw_study["derivedSection"]
|
|
145
|
+
for mesh_record in derived_section.get("conditionBrowseModule", {}).get("meshes", []):
|
|
146
|
+
term.annotate_object(INVESTIGATES_CONDITION, _mesh(mesh_record))
|
|
147
|
+
|
|
148
|
+
for mesh_record in derived_section.get("interventionBrowseModule", {}).get("meshes", []):
|
|
149
|
+
term.annotate_object(HAS_INTERVENTION, _mesh(mesh_record))
|
|
150
|
+
return term
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _mesh(mesh_record: dict[str, str]) -> Reference:
|
|
154
|
+
return Reference(
|
|
155
|
+
prefix="mesh", identifier=mesh_record["id"], name=mesh_record.get("term") or None
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
if __name__ == "__main__":
|
|
160
|
+
ClinicalTrialsGetter.cli()
|