PyPI - pyobo - Versions diffs - 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

pyobo 0.11.2py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (227) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -113
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +108 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +183 -161
pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +196 -118
pyobo/gilda_utils.py +79 -200
pyobo/identifier_utils/__init__.py +41 -0
pyobo/identifier_utils/api.py +296 -0
pyobo/identifier_utils/model.py +130 -0
pyobo/identifier_utils/preprocessing.json +812 -0
pyobo/identifier_utils/preprocessing.py +61 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +43 -39
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1358 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +0 -5
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +3 -8
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +10 -3
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +270 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1413 -643
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +13 -11
pyobo/utils/io.py +17 -31
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +76 -70
pyobo/version.py +3 -3
{pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
pyobo-0.12.0.dist-info/RECORD +202 -0
pyobo-0.12.0.dist-info/WHEEL +4 -0
{pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo-0.11.2.dist-info/RECORD +0 -157
pyobo-0.11.2.dist-info/WHEEL +0 -5
pyobo-0.11.2.dist-info/top_level.txt +0 -1

pyobo/sources/biogrid.py CHANGED Viewed

@@ -2,7 +2,6 @@
 from collections.abc import Mapping
 from functools import partial
-from typing import Optional
 import pandas as pd
@@ -43,7 +42,7 @@ taxonomy_remapping = {  # so much for official names
 }
-def _lookup(name: str) -> Optional[str]:
+def _lookup(name: str) -> str | None:
     if name in taxonomy_remapping:
         return taxonomy_remapping[name]
     return get_ncbitaxon_id(name)

pyobo/sources/ccle.py CHANGED Viewed

@@ -3,7 +3,6 @@
 import tarfile
 from collections.abc import Iterable
 from pathlib import Path
-from typing import Optional
 import pandas as pd
 import pystow
@@ -11,7 +10,6 @@ import pystow
 from pyobo import Obo, Reference, Term
 __all__ = [
-    "get_obo",
     "CCLEGetter",
 ]
@@ -23,21 +21,18 @@ class CCLEGetter(Obo):
     """An ontology representation of the Cancer Cell Line Encyclopedia's cell lines."""
     ontology = bioregistry_key = PREFIX
+    name = "Cancer Cell Line Encyclopedia Cell Line"
     def __post_init__(self):
         self.data_version = VERSION
+        super().__post_init__()
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
         return iter_terms(version=self._version_or_raise, force=force)
-def get_obo(*, force: bool = False) -> Obo:
-    """Get CCLE Cells as OBO."""
-    return CCLEGetter(force=force)
-def iter_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
+def iter_terms(version: str | None = None, force: bool = False) -> Iterable[Term]:
     """Iterate over CCLE Cells."""
     df = ensure_df(version=version, force=force)
     for identifier, depmap_id, name in df.values:
@@ -54,21 +49,21 @@ def get_ccle_static_version() -> str:
     return "2019"
-def get_url(version: Optional[str] = None) -> str:
+def get_url(version: str | None = None) -> str:
     """Get the cBioPortal URL for the given version of CCLE's cell lines."""
     if version is None:
         version = get_ccle_static_version()
     return f"https://cbioportal-datahub.s3.amazonaws.com/ccle_broad_{version}.tar.gz"
-def get_inner(version: Optional[str] = None) -> str:
+def get_inner(version: str | None = None) -> str:
     """Get the inner tarfile path."""
     if version is None:
         version = get_ccle_static_version()
     return f"ccle_broad_{version}/data_clinical_sample.txt"
-def ensure(version: Optional[str] = None, **kwargs) -> Path:
+def ensure(version: str | None = None, **kwargs) -> Path:
     """Ensure the given version is downloaded."""
     if version is None:
         version = get_ccle_static_version()
@@ -76,7 +71,7 @@ def ensure(version: Optional[str] = None, **kwargs) -> Path:
     return pystow.ensure("pyobo", "raw", PREFIX, version, url=url, **kwargs)
-def ensure_df(version: Optional[str] = None, force: bool = False) -> pd.DataFrame:
+def ensure_df(version: str | None = None, force: bool = False) -> pd.DataFrame:
     """Get the CCLE clinical sample dataframe."""
     if version is None:
         version = get_ccle_static_version()

pyobo/sources/cgnc.py CHANGED Viewed

@@ -31,11 +31,6 @@ class CGNCGetter(Obo):
         return get_terms(force=force)
-def get_obo(force: bool = False) -> Obo:
-    """Get CGNC as OBO."""
-    return CGNCGetter(force=force)
 HEADER = [
     "cgnc_id",
     "ncbigene_id",

pyobo/sources/chebi.py CHANGED Viewed

@@ -7,9 +7,9 @@ from ..struct import Reference, TypeDef
 from ..utils.io import multisetdict
 __all__ = [
-    "get_chebi_smiles_id_mapping",
     "get_chebi_id_smiles_mapping",
     "get_chebi_role_to_children",
+    "get_chebi_smiles_id_mapping",
 ]

pyobo/sources/chembl/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Resources from ChEMBL."""
+from .chembl_compound import ChEMBLCompoundGetter
+from .chembl_target import ChEMBLTargetGetter
+__all__ = [
+    "ChEMBLCompoundGetter",
+    "ChEMBLTargetGetter",
+]

pyobo/sources/{chembl.py → chembl/chembl_compound.py} RENAMED Viewed

@@ -1,11 +1,7 @@
-"""Converter for ChEMBL.
-Run with ``python -m pyobo.sources.chembl -vv``.
-"""
+"""Converter for ChEMBL Compounds."""
 import logging
 from collections.abc import Iterable
-from contextlib import closing
 import chembl_downloader
@@ -50,28 +46,20 @@ class ChEMBLCompoundGetter(Obo):
         return iter_terms(version=self._version_or_raise)
-def get_obo(force: bool = False) -> Obo:
-    """Return ChEMBL Compounds as OBO."""
-    return ChEMBLCompoundGetter(force=force)
 def iter_terms(version: str) -> Iterable[Term]:
     """Iterate over ChEMBL compounds."""
-    with chembl_downloader.connect(version=version) as conn:
-        logger.info("using connection %s", conn)
-        with closing(conn.cursor()) as cursor:
-            logger.info("using cursor %s", cursor)
-            cursor.execute(QUERY)
-            for chembl_id, name, smiles, inchi, inchi_key in cursor.fetchall():
-                # TODO add xrefs?
-                term = Term.from_triple(prefix=PREFIX, identifier=chembl_id, name=name)
-                if smiles:
-                    term.append_property(has_smiles, smiles)
-                if inchi:
-                    term.append_property(has_inchi, inchi)
-                if inchi_key:
-                    term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
-                yield term
+    with chembl_downloader.cursor(version=version) as cursor:
+        cursor.execute(QUERY)
+        for chembl_id, name, smiles, inchi, inchi_key in cursor.fetchall():
+            # TODO add xrefs?
+            term = Term.from_triple(prefix=PREFIX, identifier=chembl_id, name=name)
+            if smiles:
+                term.annotate_string(has_smiles, smiles)
+            if inchi:
+                term.annotate_string(has_inchi, inchi)
+            if inchi_key:
+                term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
+            yield term
 if __name__ == "__main__":

pyobo/sources/chembl/chembl_target.py ADDED Viewed

@@ -0,0 +1,160 @@
+"""Converter for ChEMBL targets."""
+import logging
+from collections import defaultdict
+from collections.abc import Iterable
+import chembl_downloader
+from tqdm import tqdm
+from pyobo import default_reference
+from pyobo.struct import Obo, Reference, Term
+from pyobo.struct.typedef import (
+    exact_match,
+    has_component,
+    has_member,
+    has_participant,
+)
+__all__ = [
+    "ChEMBLTargetGetter",
+]
+from pyobo.utils.path import ensure_df
+logger = logging.getLogger(__name__)
+PREFIX = "chembl.target"
+TTYPE_QUERY = """\
+SELECT TARGET_TYPE, TARGET_DESC, PARENT_TYPE
+FROM TARGET_TYPE
+"""
+QUERY = """\
+SELECT
+    CHEMBL_ID,
+    PREF_NAME,
+    TARGET_TYPE,
+    TAX_ID
+FROM TARGET_DICTIONARY
+"""
+class ChEMBLTargetGetter(Obo):
+    """An ontology representation of ChEMBL targets."""
+    ontology = PREFIX
+    bioversions_key = "chembl"
+    typedefs = [exact_match, has_component, has_member, has_participant]
+    root_terms = [
+        default_reference(PREFIX, "undefined"),
+        default_reference(PREFIX, "molecular"),
+        default_reference(PREFIX, "non-molecular"),
+    ]
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return iter_terms(version=self._version_or_raise)
+def iter_terms(version: str) -> Iterable[Term]:
+    """Iterate over ChEMBL targets."""
+    chembl_to_uniprots = get_chembl_protein_equivalences(version=version)
+    target_types: dict[str, Term] = {}
+    parents: dict[str, str] = {}
+    with chembl_downloader.cursor(version=version) as cursor:
+        cursor.execute(TTYPE_QUERY)
+        for target_type, desc, parent in cursor.fetchall():
+            identifier = target_type.lower().replace(" ", "-")
+            target_types[target_type] = Term(
+                reference=default_reference(PREFIX, identifier, name=target_type),
+                definition=desc,
+            )
+            if parent:
+                parents[target_type] = parent
+        for child, parent in parents.items():
+            target_types[child].append_parent(target_types[parent])
+        yield from target_types.values()
+    with chembl_downloader.cursor(version=version) as cursor:
+        cursor.execute(QUERY)
+        for chembl_id, name, target_type, ncbitaxon_id in cursor.fetchall():
+            term = Term.from_triple(prefix=PREFIX, identifier=chembl_id, name=name)
+            if ncbitaxon_id:
+                term.set_species(str(ncbitaxon_id))
+            term.append_parent(target_types[target_type])
+            uniprot_ids = chembl_to_uniprots.get(chembl_id)
+            if uniprot_ids is None:
+                pass
+            elif target_type in {
+                "PROTEIN COMPLEX",
+                "CHIMERIC PROTEIN",
+                "PROTEIN COMPLEX GROUP",
+                "PROTEIN NUCLEIC-ACID COMPLEX",
+                "SELECTIVITY GROUP",
+            }:
+                for uniprot_id in uniprot_ids:
+                    term.annotate_object(
+                        has_component, Reference(prefix="uniprot", identifier=uniprot_id)
+                    )
+            elif target_type == "PROTEIN FAMILY":
+                for uniprot_id in uniprot_ids:
+                    term.annotate_object(
+                        has_member, Reference(prefix="uniprot", identifier=uniprot_id)
+                    )
+            elif target_type == "PROTEIN-PROTEIN INTERACTION":
+                for uniprot_id in uniprot_ids:
+                    term.annotate_object(
+                        has_participant, Reference(prefix="uniprot", identifier=uniprot_id)
+                    )
+            elif target_type == "SINGLE PROTEIN":
+                if len(uniprot_ids) == 1:
+                    term.append_exact_match(Reference(prefix="uniprot", identifier=uniprot_ids[0]))
+                else:
+                    tqdm.write(
+                        f"[chembl.target:{chembl_id}] multiple mappings found to single protein: {uniprot_ids}"
+                    )
+                    for uniprot_id in uniprot_ids:
+                        term.append_xref(Reference(prefix="uniprot", identifier=uniprot_id))
+            elif len(uniprot_ids) == 1:
+                luid = uniprot_ids[0]
+                if luid.startswith("ENSG"):
+                    reference = Reference(prefix="ensembl", identifier=luid)
+                else:
+                    reference = Reference(prefix="uniprot", identifier=luid)
+                term.append_exact_match(reference)
+            else:
+                tqdm.write(
+                    f"[chembl.target:{chembl_id}] need to handle multiple uniprots for {target_type} - {uniprot_ids}"
+                )
+            yield term
+def get_chembl_protein_equivalences(version: str | None = None) -> dict[str, list[str]]:
+    """Get ChEMBL protein equivalences."""
+    if version is None:
+        version = chembl_downloader.latest()
+    url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}/chembl_uniprot_mapping.txt"
+    df = ensure_df(
+        PREFIX,
+        url=url,
+        sep="\t",
+        skiprows=1,
+        usecols=[0, 1],
+        names=["uniprot", "chembl"],
+        header=None,
+        # names=[TARGET_ID, SOURCE_ID],  # switch around
+    )
+    dd = defaultdict(list)
+    for uniprot, chembl in df.values:
+        dd[chembl].append(uniprot)
+    return dict(dd)
+if __name__ == "__main__":
+    ChEMBLTargetGetter.cli()

pyobo/sources/civic_gene.py CHANGED Viewed

@@ -1,11 +1,12 @@
 """Converter for CiVIC Genes."""
+import datetime
 from collections.abc import Iterable
-from typing import Optional
 import pandas as pd
-from pyobo.struct import Obo, Reference, Term
+from pyobo import default_reference
+from pyobo.struct import Obo, Reference, Term, TypeDef
 from pyobo.utils.path import ensure_df
 __all__ = [
@@ -15,38 +16,77 @@ __all__ = [
 PREFIX = "civic.gid"
 URL = "https://civicdb.org/downloads/nightly/nightly-GeneSummaries.tsv"
+GENE = Term(reference=default_reference(PREFIX, "gene", name="gene"))
+FACTOR = Term(reference=default_reference(PREFIX, "factor", name="factor"))
+FUSION = Term(reference=default_reference(PREFIX, "fusion", name="fusion"))
+HAS_3P = TypeDef.default(PREFIX, "has3p", name="has 3' gene", is_metadata_tag=False)
+HAS_5P = TypeDef.default(PREFIX, "has5p", name="has 5' gene", is_metadata_tag=False)
-def _sort(_o, t):
-    return int(t.identifier)
+TYPES = {"Gene": GENE, "Factor": FACTOR, "Fusion": FUSION}
 class CIVICGeneGetter(Obo):
     """An ontology representation of CiVIC's gene nomenclature."""
     bioversions_key = ontology = PREFIX
-    term_sort_key = _sort
+    typedefs = [HAS_3P, HAS_5P]
+    root_terms = [GENE.reference, FACTOR.reference, FUSION.reference]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over gene terms for CiVIC."""
-        yield from get_terms(self.data_version, force=force)
+        yield from (GENE, FACTOR, FUSION)
+        yield from get_terms(self._version_or_raise, force=force)
-def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
+def get_terms(version: str, force: bool = False) -> Iterable[Term]:
     """Get CIVIC terms."""
-    # if version is not None:
-    #     version_dt: datetime.date = dateutil.parser.parse(version)
-    # else:
-    #     version_dt: datetime.date = datetime.today()
-    # version = version_dt.strftime("01-%b-%Y")
+    dt = datetime.datetime.strptime(version, "%Y-%m-%d")
     # version is like 01-Feb-2024
-    url = f"https://civicdb.org/downloads/{version}/{version}-GeneSummaries.tsv"
+    dt2 = datetime.datetime.strftime(dt, "%d-%b-%Y")
+    url = f"https://civicdb.org/downloads/{dt2}/{dt2}-GeneSummaries.tsv"
     df = ensure_df(prefix=PREFIX, url=url, sep="\t", force=force, dtype=str, version=version)
-    for identifier, _, name, entrez_id, description, _last_review, _flag in df.values:
+    for (
+        identifier,
+        _,
+        type,
+        name,
+        aliases,
+        description,
+        _last_review_date,
+        _flag,
+        entrez_id,
+        ncit_id,
+        _5p_status,
+        _3p_status,
+        five_p_id,
+        _5p_name,
+        _5p_ncbigene,
+        three_p_id,
+        _3p_name,
+        _3p_ncbigene,
+    ) in df.values:
         term = Term(
             reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
             definition=description if pd.notna(description) else None,
         )
-        term.append_exact_match(Reference(prefix="ncbigene", identifier=entrez_id))
+        term.append_parent(TYPES[type])
+        if pd.notna(entrez_id):
+            term.append_exact_match(Reference(prefix="ncbigene", identifier=entrez_id))
+        if pd.notna(ncit_id):
+            term.append_exact_match(Reference(prefix="ncit", identifier=ncit_id))
+        if pd.notna(aliases):
+            for alias in aliases.split(","):
+                if alias != name:
+                    term.append_synonym(alias.strip())
+        if pd.notna(five_p_id):
+            term.append_relationship(
+                HAS_5P, Reference(prefix=PREFIX, identifier=five_p_id, name=_5p_name)
+            )
+        if pd.notna(three_p_id):
+            term.append_relationship(
+                HAS_3P, Reference(prefix=PREFIX, identifier=three_p_id, name=_3p_name)
+            )
         yield term

pyobo/sources/clinicaltrials.py ADDED Viewed

@@ -0,0 +1,160 @@
+"""A source for ClinicalTrials.gov."""
+from collections.abc import Iterable
+from clinicaltrials_downloader import get_studies_slim
+from pyobo import Obo, Reference, Term, TypeDef, default_reference
+from pyobo.struct.struct import CHARLIE_TERM, HUMAN_TERM, PYOBO_INJECTED
+from pyobo.struct.typedef import has_contributor
+__all__ = [
+    "ClinicalTrialsGetter",
+]
+PREFIX = "clinicaltrials"
+INVESTIGATES_CONDITION = TypeDef(
+    reference=default_reference(
+        prefix=PREFIX, identifier="investigates_condition", name="investigates condition"
+    ),
+    is_metadata_tag=True,
+)
+HAS_INTERVENTION = TypeDef(
+    reference=default_reference(
+        prefix=PREFIX, identifier="has_intervention", name="has intervention"
+    ),
+    is_metadata_tag=True,
+)
+STUDY_TERM = Term(reference=default_reference(PREFIX, "study", name="study"))
+CLINICAL_TRIAL_TERM = Term(
+    reference=default_reference(PREFIX, "clinical-trial", name="clinical trial")
+).append_parent(STUDY_TERM)
+INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
+    reference=default_reference(
+        PREFIX, "interventional-clinical-trial", name="interventional clinical trial"
+    )
+).append_parent(CLINICAL_TRIAL_TERM)
+RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
+    reference=default_reference(
+        PREFIX,
+        "randomized-interventional-clinical-trial",
+        name="randomized interventional clinical trial",
+    )
+).append_parent(INTERVENTIONAL_CLINICAL_TRIAL_TERM)
+NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
+    reference=default_reference(
+        PREFIX,
+        "non-randomized-interventional-clinical-trial",
+        name="non-randomized interventional clinical trial",
+    )
+).append_parent(INTERVENTIONAL_CLINICAL_TRIAL_TERM)
+OBSERVATIONAL_CLINICAL_TRIAL_TERM = Term(
+    reference=default_reference(
+        PREFIX, "observational-clinical-trial", name="observational clinical trial"
+    )
+).append_parent(CLINICAL_TRIAL_TERM)
+EXPANDED_ACCESS_STUDY_TERM = Term(
+    reference=default_reference(PREFIX, "expanded-access-study", name="expanded access study")
+).append_parent(STUDY_TERM)
+TERMS = [
+    STUDY_TERM,
+    CLINICAL_TRIAL_TERM,
+    OBSERVATIONAL_CLINICAL_TRIAL_TERM,
+    INTERVENTIONAL_CLINICAL_TRIAL_TERM,
+    EXPANDED_ACCESS_STUDY_TERM,
+    RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
+    NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
+]
+# These were identified as the 4 possibilities for study
+# types in ClinicalTrials.gov. See summary script at
+# https://gist.github.com/cthoyt/12a3cb3c63ad68d73fe5a2f0d506526f
+PARENTS: dict[tuple[str | None, str | None], Term] = {
+    ("INTERVENTIONAL", None): INTERVENTIONAL_CLINICAL_TRIAL_TERM,
+    ("INTERVENTIONAL", "NA"): INTERVENTIONAL_CLINICAL_TRIAL_TERM,
+    ("INTERVENTIONAL", "RANDOMIZED"): RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
+    ("INTERVENTIONAL", "NON_RANDOMIZED"): NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
+    ("OBSERVATIONAL", None): OBSERVATIONAL_CLINICAL_TRIAL_TERM,
+    ("EXPANDED_ACCESS", None): EXPANDED_ACCESS_STUDY_TERM,
+    (None, None): STUDY_TERM,
+}
+class ClinicalTrialsGetter(Obo):
+    """Get the ClinicalTrials.gov database as an ontology."""
+    ontology = PREFIX
+    dynamic_version = True
+    typedefs = [has_contributor, INVESTIGATES_CONDITION, HAS_INTERVENTION]
+    root_terms = [STUDY_TERM.reference]
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms for studies."""
+        yield CHARLIE_TERM
+        yield HUMAN_TERM
+        for term in TERMS:
+            term.append_contributor(CHARLIE_TERM)
+            term.append_comment(PYOBO_INJECTED)
+            yield term
+        yield from iterate_studies()
+def iterate_studies(*, force: bool = False) -> Iterable[Term]:
+    """Iterate over terms for studies."""
+    studies = get_studies_slim(force=force)
+    for study in studies:
+        yield _process_study(study)
+def _process_study(raw_study) -> Term:
+    protocol_section = raw_study["protocolSection"]
+    identification_module = protocol_section["identificationModule"]
+    identifier = identification_module["nctId"]
+    name = identification_module.get("officialTitle")
+    synonym = identification_module.get("briefTitle")
+    if synonym and not name:
+        name, synonym = synonym, None
+    term = Term(
+        reference=Reference(prefix=PREFIX, identifier=identifier, name=name), type="Instance"
+    )
+    if synonym:
+        term.append_synonym(synonym)
+    design_module = protocol_section.get("designModule", {})
+    study_type = design_module.get("studyType")
+    allocation = design_module.get("designInfo", {}).get("allocation")
+    term.append_parent(PARENTS[study_type, allocation])
+    references_module = protocol_section.get("referencesModule", {})
+    for reference in references_module.get("references", []):
+        if pubmed_id := reference.get("pmid"):
+            term.append_see_also(Reference(prefix="pubmed", identifier=pubmed_id))
+    derived_section = raw_study["derivedSection"]
+    for mesh_record in derived_section.get("conditionBrowseModule", {}).get("meshes", []):
+        term.annotate_object(INVESTIGATES_CONDITION, _mesh(mesh_record))
+    for mesh_record in derived_section.get("interventionBrowseModule", {}).get("meshes", []):
+        term.annotate_object(HAS_INTERVENTION, _mesh(mesh_record))
+    return term
+def _mesh(mesh_record: dict[str, str]) -> Reference:
+    return Reference(
+        prefix="mesh", identifier=mesh_record["id"], name=mesh_record.get("term") or None
+    )
+if __name__ == "__main__":
+    ClinicalTrialsGetter.cli()

pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

pyobo 0.11.2py3-none-any.whl → 0.12.0py3-none-any.whl