PyPI - pyobo - Versions diffs - 0.10.12__py3-none-any.whl → 0.11.1__py3-none-any.whl - Mend

pyobo 0.10.12py3-none-any.whl → 0.11.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

pyobo/__init__.py +0 -2
pyobo/__main__.py +0 -2
pyobo/api/__init__.py +0 -2
pyobo/api/alts.py +6 -7
pyobo/api/hierarchy.py +14 -15
pyobo/api/metadata.py +3 -4
pyobo/api/names.py +31 -32
pyobo/api/properties.py +6 -7
pyobo/api/relations.py +12 -11
pyobo/api/species.py +5 -6
pyobo/api/typedefs.py +1 -3
pyobo/api/utils.py +61 -5
pyobo/api/xrefs.py +4 -5
pyobo/aws.py +3 -5
pyobo/cli/__init__.py +0 -2
pyobo/cli/aws.py +0 -2
pyobo/cli/cli.py +0 -4
pyobo/cli/database.py +1 -3
pyobo/cli/lookup.py +0 -2
pyobo/cli/utils.py +0 -2
pyobo/constants.py +1 -33
pyobo/getters.py +19 -26
pyobo/gilda_utils.py +19 -17
pyobo/identifier_utils.py +10 -10
pyobo/mocks.py +5 -6
pyobo/normalizer.py +24 -24
pyobo/obographs.py +8 -5
pyobo/plugins.py +3 -4
pyobo/py.typed +0 -0
pyobo/reader.py +19 -21
pyobo/registries/__init__.py +0 -2
pyobo/registries/metaregistry.py +6 -8
pyobo/resource_utils.py +1 -3
pyobo/resources/__init__.py +0 -2
pyobo/resources/ncbitaxon.py +2 -3
pyobo/resources/ro.py +2 -4
pyobo/resources/so.py +55 -0
pyobo/resources/so.tsv +2604 -0
pyobo/sources/README.md +15 -0
pyobo/sources/__init__.py +0 -2
pyobo/sources/agrovoc.py +3 -3
pyobo/sources/antibodyregistry.py +2 -3
pyobo/sources/biogrid.py +4 -4
pyobo/sources/ccle.py +3 -4
pyobo/sources/cgnc.py +1 -3
pyobo/sources/chebi.py +2 -4
pyobo/sources/chembl.py +1 -3
pyobo/sources/civic_gene.py +2 -3
pyobo/sources/complexportal.py +57 -20
pyobo/sources/conso.py +2 -4
pyobo/sources/cpt.py +1 -3
pyobo/sources/credit.py +1 -1
pyobo/sources/cvx.py +1 -3
pyobo/sources/depmap.py +3 -4
pyobo/sources/dictybase_gene.py +15 -12
pyobo/sources/drugbank.py +6 -7
pyobo/sources/drugbank_salt.py +3 -4
pyobo/sources/drugcentral.py +9 -8
pyobo/sources/expasy.py +33 -16
pyobo/sources/famplex.py +3 -5
pyobo/sources/flybase.py +5 -6
pyobo/sources/geonames.py +1 -1
pyobo/sources/gmt_utils.py +5 -6
pyobo/sources/go.py +4 -6
pyobo/sources/gwascentral_phenotype.py +1 -3
pyobo/sources/gwascentral_study.py +2 -3
pyobo/sources/hgnc.py +30 -26
pyobo/sources/hgncgenefamily.py +9 -11
pyobo/sources/icd10.py +3 -4
pyobo/sources/icd11.py +3 -4
pyobo/sources/icd_utils.py +6 -7
pyobo/sources/interpro.py +3 -5
pyobo/sources/itis.py +1 -3
pyobo/sources/kegg/__init__.py +0 -2
pyobo/sources/kegg/api.py +3 -4
pyobo/sources/kegg/genes.py +3 -4
pyobo/sources/kegg/genome.py +19 -9
pyobo/sources/kegg/pathway.py +5 -6
pyobo/sources/mesh.py +19 -21
pyobo/sources/mgi.py +1 -3
pyobo/sources/mirbase.py +13 -9
pyobo/sources/mirbase_constants.py +0 -2
pyobo/sources/mirbase_family.py +1 -3
pyobo/sources/mirbase_mature.py +1 -3
pyobo/sources/msigdb.py +4 -5
pyobo/sources/ncbigene.py +3 -5
pyobo/sources/npass.py +2 -4
pyobo/sources/omim_ps.py +1 -3
pyobo/sources/pathbank.py +35 -28
pyobo/sources/pfam.py +1 -3
pyobo/sources/pfam_clan.py +1 -3
pyobo/sources/pid.py +3 -5
pyobo/sources/pombase.py +7 -6
pyobo/sources/pubchem.py +2 -3
pyobo/sources/reactome.py +30 -11
pyobo/sources/rgd.py +3 -4
pyobo/sources/rhea.py +7 -8
pyobo/sources/ror.py +3 -2
pyobo/sources/selventa/__init__.py +0 -2
pyobo/sources/selventa/schem.py +1 -3
pyobo/sources/selventa/scomp.py +1 -3
pyobo/sources/selventa/sdis.py +1 -3
pyobo/sources/selventa/sfam.py +1 -3
pyobo/sources/sgd.py +1 -3
pyobo/sources/slm.py +29 -17
pyobo/sources/umls/__init__.py +0 -2
pyobo/sources/umls/__main__.py +0 -2
pyobo/sources/umls/get_synonym_types.py +1 -1
pyobo/sources/umls/umls.py +2 -4
pyobo/sources/uniprot/__init__.py +0 -2
pyobo/sources/uniprot/uniprot.py +11 -10
pyobo/sources/uniprot/uniprot_ptm.py +6 -5
pyobo/sources/utils.py +3 -5
pyobo/sources/wikipathways.py +1 -3
pyobo/sources/zfin.py +20 -9
pyobo/ssg/__init__.py +3 -2
pyobo/struct/__init__.py +0 -2
pyobo/struct/reference.py +22 -23
pyobo/struct/struct.py +132 -116
pyobo/struct/typedef.py +14 -10
pyobo/struct/utils.py +0 -2
pyobo/utils/__init__.py +0 -2
pyobo/utils/cache.py +14 -6
pyobo/utils/io.py +9 -10
pyobo/utils/iter.py +5 -6
pyobo/utils/misc.py +1 -3
pyobo/utils/ndex_utils.py +6 -7
pyobo/utils/path.py +4 -5
pyobo/version.py +3 -5
pyobo/xrefdb/__init__.py +0 -2
pyobo/xrefdb/canonicalizer.py +27 -18
pyobo/xrefdb/priority.py +0 -2
pyobo/xrefdb/sources/__init__.py +3 -4
pyobo/xrefdb/sources/biomappings.py +0 -2
pyobo/xrefdb/sources/cbms2019.py +0 -2
pyobo/xrefdb/sources/chembl.py +0 -2
pyobo/xrefdb/sources/compath.py +1 -3
pyobo/xrefdb/sources/famplex.py +3 -5
pyobo/xrefdb/sources/gilda.py +0 -2
pyobo/xrefdb/sources/intact.py +5 -5
pyobo/xrefdb/sources/ncit.py +1 -3
pyobo/xrefdb/sources/pubchem.py +2 -5
pyobo/xrefdb/sources/wikidata.py +2 -4
pyobo/xrefdb/xrefs_pipeline.py +15 -16
{pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/LICENSE +1 -1
pyobo-0.11.1.dist-info/METADATA +711 -0
pyobo-0.11.1.dist-info/RECORD +173 -0
{pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/WHEEL +1 -1
pyobo-0.11.1.dist-info/entry_points.txt +2 -0
pyobo-0.10.12.dist-info/METADATA +0 -499
pyobo-0.10.12.dist-info/RECORD +0 -169
pyobo-0.10.12.dist-info/entry_points.txt +0 -15
{pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/top_level.txt +0 -0

pyobo/sources/README.md ADDED Viewed

@@ -0,0 +1,15 @@
+# Sources
+1. Create a new module in `pyobo.sources` named with the prefix for the resource you're ontologizing
+2. Make sure your resource has a corresponding prefix in [the Bioregistry](https://github.com/biopragmatics/bioregistry)
+3. Subclass the `pyobo.Obo` class to represent your resource
+4. Add your resource to the list in `pyobo.sources.__init__`
+## What is in scope?
+1. Biomedical, semantic web, bibliographic, life sciences, and related natural sciences resources are welcome
+2. The source you want to ontologize should be an identifier resource, i.e., it mints its own identifiers. If you want
+   to ontologize some database that reuses some other identifier resource's identifiers, then this isn't the right
+   place.
+3. Resources that are not possible to download automatically are not in scope for PyOBO. Reproducibility and reusability
+   are core values of this software

pyobo/sources/__init__.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
 """Sources of OBO content."""
 from class_resolver import ClassResolver

pyobo/sources/agrovoc.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
 """Converter for AGROVOC."""
 import pystow
@@ -11,6 +9,8 @@ __all__ = [
     "ensure_agrovoc_graph",
 ]
+PREFIX = "agrovoc"
 def ensure_agrovoc_graph(version: str) -> Graph:
     """Download and parse the given version of AGROVOC."""
@@ -20,5 +20,5 @@ def ensure_agrovoc_graph(version: str) -> Graph:
     graph.bind("skosxl", "http://www.w3.org/2008/05/skos-xl#")
     graph.bind("skos", SKOS)
     graph.bind("dcterms", DCTERMS)
-    graph.bind("agrovoc", "http://aims.fao.org/aos/agrontology#")
+    graph.bind(PREFIX, "http://aims.fao.org/aos/agrontology#")
     return graph

pyobo/sources/antibodyregistry.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# -*- coding: utf-8 -*-
 """Converter for the Antibody Registry."""
 import logging
-from typing import Iterable, Mapping, Optional
+from collections.abc import Iterable, Mapping
+from typing import Optional
 import pandas as pd
 from bioregistry.utils import removeprefix

pyobo/sources/biogrid.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# -*- coding: utf-8 -*-
 """Extract and convert BioGRID identifiers."""
+from collections.abc import Mapping
 from functools import partial
-from typing import Mapping, Optional
+from typing import Optional
 import pandas as pd
@@ -77,7 +76,8 @@ def get_ncbigene_mapping() -> Mapping[str, str]:
     .. code-block:: python
         from pyobo import get_filtered_xrefs
-        biogrid_ncbigene_mapping = get_filtered_xrefs('biogrid', 'ncbigene')
+        biogrid_ncbigene_mapping = get_filtered_xrefs("biogrid", "ncbigene")
     """
     df = get_df()
     df = df.loc[df["IDENTIFIER_TYPE"] == "ENTREZ_GENE", ["BIOGRID_ID", "IDENTIFIER_VALUE"]]

pyobo/sources/ccle.py CHANGED Viewed

@@ -1,10 +1,9 @@
-# -*- coding: utf-8 -*-
 """Get the CCLE Cells, provided by cBioPortal."""
 import tarfile
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, Optional
+from typing import Optional
 import pandas as pd
 import pystow
@@ -25,7 +24,7 @@ class CCLEGetter(Obo):
     ontology = bioregistry_key = PREFIX
-    def __post_init__(self):  # noqa: D105
+    def __post_init__(self):
         self.data_version = VERSION
     def iter_terms(self, force: bool = False) -> Iterable[Term]:

pyobo/sources/cgnc.py CHANGED Viewed

@@ -1,9 +1,7 @@
-# -*- coding: utf-8 -*-
 """Converter for CGNC."""
 import logging
-from typing import Iterable
+from collections.abc import Iterable
 import pandas as pd

pyobo/sources/chebi.py CHANGED Viewed

@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
 """Converter for ChEBI."""
-from typing import Mapping, Set, Tuple
+from collections.abc import Mapping
 from ..api import get_filtered_properties_mapping, get_filtered_relations_df
 from ..struct import Reference, TypeDef
@@ -33,7 +31,7 @@ def get_chebi_smiles_id_mapping() -> Mapping[str, str]:
 has_role = TypeDef(reference=Reference(prefix="chebi", identifier="has_role"))
-def get_chebi_role_to_children() -> Mapping[str, Set[Tuple[str, str]]]:
+def get_chebi_role_to_children() -> Mapping[str, set[tuple[str, str]]]:
     """Get the ChEBI role to children mapping."""
     df = get_filtered_relations_df("chebi", relation=has_role)
     return multisetdict((role_id, ("chebi", chemical_id)) for chemical_id, _, role_id in df.values)

pyobo/sources/chembl.py CHANGED Viewed

@@ -1,13 +1,11 @@
-# -*- coding: utf-8 -*-
 """Converter for ChEMBL.
 Run with ``python -m pyobo.sources.chembl -vv``.
 """
 import logging
+from collections.abc import Iterable
 from contextlib import closing
-from typing import Iterable
 import chembl_downloader

pyobo/sources/civic_gene.py CHANGED Viewed

@@ -1,8 +1,7 @@
-# -*- coding: utf-8 -*-
 """Converter for CiVIC Genes."""
-from typing import Iterable, Optional
+from collections.abc import Iterable
+from typing import Optional
 import pandas as pd

pyobo/sources/complexportal.py CHANGED Viewed

@@ -1,9 +1,7 @@
-# -*- coding: utf-8 -*-
 """Converter for ComplexPortal."""
 import logging
-from typing import Iterable, List, Tuple
+from collections.abc import Iterable
 import pandas as pd
 from tqdm.auto import tqdm
@@ -52,7 +50,7 @@ DTYPE = {
 }
-def _parse_members(s) -> List[Tuple[Reference, str]]:
+def _parse_members(s) -> list[tuple[Reference, str]]:
     if pd.isna(s):
         return []
@@ -60,15 +58,35 @@ def _parse_members(s) -> List[Tuple[Reference, str]]:
     for member in s.split("|"):
         entity_id, count = member.split("(")
         count = count.rstrip(")")
-        if ":" in entity_id:
+        if entity_id.startswith("URS"):
+            prefix, identifier = "rnacentral", entity_id
+        elif entity_id.startswith("CPX"):
+            # TODO why self xref?
+            prefix, identifier = "complexportal", entity_id
+        elif entity_id.startswith("["):
+            continue  # this is a list of uniprot IDs, not sure what to do with this
+        elif entity_id.startswith("EBI-"):
+            continue
+        elif ":" not in entity_id:
+            if "PRO_" in entity_id:
+                prefix = "uniprot.chain"
+                identifier = entity_id.split("-")[1]
+            elif "-" in entity_id:
+                prefix, identifier = "uniprot.isoform", entity_id
+            else:
+                prefix, identifier = "uniprot", entity_id
+        else:
             prefix, identifier = entity_id.split(":", 1)
+        try:
+            reference = Reference(prefix=prefix, identifier=identifier)
+        except ValueError:
+            tqdm.write(f"failed to validate reference: {entity_id}")
         else:
-            prefix, identifier = "uniprot", entity_id
-        rv.append((Reference(prefix=prefix, identifier=identifier), count))
+            rv.append((reference, count))
     return rv
-def _parse_xrefs(s) -> List[Tuple[Reference, str]]:
+def _parse_xrefs(s) -> list[tuple[Reference, str]]:
     if pd.isna(s):
         return []
@@ -76,27 +94,40 @@ def _parse_xrefs(s) -> List[Tuple[Reference, str]]:
     for xref in s.split("|"):
         xref = xref.replace("protein ontology:PR:", "PR:")
         xref = xref.replace("protein ontology:PR_", "PR:")
+        xref = xref.replace("rhea:rhea ", "rhea:")
+        xref = xref.replace("rhea:Rhea ", "rhea:")
+        xref = xref.replace("rhea:RHEA:rhea", "rhea:")
+        xref = xref.replace("rhea:RHEA: ", "rhea:")
+        xref = xref.replace("rhea:RHEA:rhea ", "rhea:")
+        xref = xref.replace("intenz:RHEA:", "rhea:")
+        xref = xref.replace("eccode::", "eccode:")
+        xref = xref.replace("eccode:EC:", "eccode:")
+        xref = xref.replace("intenz:EC:", "eccode:")
+        xref = xref.replace("eccode:RHEA:", "rhea:")
+        xref = xref.replace("efo:MONDO:", "MONDO:")
+        xref = xref.replace("omim:MIM:", "omim:")
+        xref = xref.replace("efo:HP:", "HP:")
+        xref = xref.replace("efo:Orphanet:", "Orphanet:")
+        xref = xref.replace("orphanet:ORDO:", "Orphanet:")
+        xref = xref.replace("biorxiv:doi.org/", "doi:")
+        xref = xref.replace("emdb:EMDB-", "emdb:EMD-")
+        xref = xref.replace("wwpdb:EMD-", "emdb:EMD-")
+        xref = xref.replace("signor:CPX-", "complexportal:CPX-")
         try:
             xref_curie, note = xref.split("(")
         except ValueError:
             logger.warning("xref missing (: %s", xref)
             continue
         note = note.rstrip(")")
-        note.replace("rhea:rhea ", "rhea:")
-        note.replace("rhea:Rhea ", "rhea:")
-        note.replace("eccode::", "eccode:")
-        note.replace("eccode:EC:", "eccode:")
-        note.replace("eccode:RHEA:", "rhea:")
-        if note.lower().startswith("rhea "):
-            note = note[len("Rhea ") :]
-            if note.lower().startswith("rhea:rhea "):
-                note = note[len("rhea:rhea ") :]
-        if note.lower().startswith("EC:"):
-            note = note[len("EC:") :]
+        if xref_curie.startswith("intenz:"):
+            xref_curie = _clean_intenz(xref_curie)
         try:
             reference = Reference.from_curie(xref_curie)
         except ValueError:
-            logger.warning("can not parse CURIE: %s", xref)
+            logger.warning("can not parse CURIE: %s", xref_curie)
             continue
         if reference is None:
             logger.warning("reference is None after parsing: %s", xref)
@@ -105,6 +136,12 @@ def _parse_xrefs(s) -> List[Tuple[Reference, str]]:
     return rv
+def _clean_intenz(s: str) -> str:
+    for _ in range(3):
+        s = s.rstrip("-").rstrip(".")
+    return s
 class ComplexPortalGetter(Obo):
     """An ontology representation of the Complex Portal."""

pyobo/sources/conso.py CHANGED Viewed

@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
 """Converter for CONSO."""
-from typing import Iterable, List
+from collections.abc import Iterable
 import pandas as pd
@@ -68,7 +66,7 @@ def iter_terms() -> Iterable[Term]:
     for _, row in terms_df.iterrows():
         if row["Name"] == "WITHDRAWN":
             continue
-        provenance: List[Reference] = []
+        provenance: list[Reference] = []
         for curie in row["References"].split(","):
             curie = curie.strip()
             if not curie:

pyobo/sources/cpt.py CHANGED Viewed

@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
 """Converter for CPT."""
-from typing import Iterable
+from collections.abc import Iterable
 import pandas as pd

pyobo/sources/credit.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from __future__ import annotations
 import json
-from typing import Iterable
+from collections.abc import Iterable
 from more_itertools import chunked

pyobo/sources/cvx.py CHANGED Viewed

@@ -1,9 +1,7 @@
-# -*- coding: utf-8 -*-
 """Converter for CVX."""
 from collections import defaultdict
-from typing import Iterable
+from collections.abc import Iterable
 import pandas as pd

pyobo/sources/depmap.py CHANGED Viewed

@@ -1,8 +1,7 @@
-# -*- coding: utf-8 -*-
 """DepMap cell lines."""
-from typing import Iterable, Optional
+from collections.abc import Iterable
+from typing import Optional
 import pandas as pd
 import pystow
@@ -113,7 +112,7 @@ def ensure(version: str, force: bool = False) -> pd.DataFrame:
         url=get_url(version=version),
         name="sample_info.tsv",
         force=force,
-        read_csv_kwargs=dict(sep=",", dtype=str),
+        read_csv_kwargs={"sep": ",", "dtype": str},
     )

pyobo/sources/dictybase_gene.py CHANGED Viewed

@@ -1,18 +1,15 @@
-# -*- coding: utf-8 -*-
 """Converter for dictyBase gene.
 Note that normal dictybase idenififers are for sequences
 """
 import logging
-from typing import Iterable
+from collections.abc import Iterable
 import pandas as pd
 from tqdm.auto import tqdm
-from pyobo.struct import Obo, Reference, Synonym, Term, from_species, has_gene_product
-from pyobo.utils.io import multisetdict
+from pyobo.struct import Obo, Synonym, Term, from_species, has_gene_product
 from pyobo.utils.path import ensure_df
 __all__ = [
@@ -51,10 +48,11 @@ def get_obo(force: bool = False) -> Obo:
 def get_terms(force: bool = False) -> Iterable[Term]:
     """Get terms."""
+    # TODO the mappings file has actually no uniprot at all, and requires text mining
     # DDB ID	DDB_G ID	Name	UniProt ID
-    uniprot_mappings = multisetdict(
-        ensure_df(PREFIX, url=URL, force=force, name="uniprot_mappings.tsv", usecols=[1, 3]).values
-    )
+    # uniprot_mappings = multisetdict(
+    #     ensure_df(PREFIX, url=URL, force=force, name="uniprot_mappings.tsv", usecols=[1, 3]).values
+    # )
     terms = ensure_df(PREFIX, url=URL, force=force, name="gene_info.tsv")
     # GENE ID (DDB_G ID)	Gene Name	Synonyms	Gene products
@@ -70,10 +68,15 @@ def get_terms(force: bool = False) -> Iterable[Term]:
         if synonyms and pd.notna(synonyms):
             for synonym in synonyms.split(","):
                 term.append_synonym(Synonym(synonym.strip()))
-        for uniprot_id in uniprot_mappings.get(identifier, []):
-            if not uniprot_id or pd.isna(uniprot_id) or uniprot_id not in {"unknown", "pseudogene"}:
-                continue
-            term.append_relationship(has_gene_product, Reference.auto("uniprot", uniprot_id))
+        # for uniprot_id in uniprot_mappings.get(identifier, []):
+        #     if not uniprot_id or pd.isna(uniprot_id) or uniprot_id in {"unknown", "pseudogene"}:
+        #         continue
+        #     try:
+        #         uniprot_ref = Reference(prefix="uniprot", identifier=uniprot_id)
+        #     except ValueError:
+        #         tqdm.write(f"[dictybase.gene] invalid uniprot ref: {uniprot_id}")
+        #     else:
+        #         term.append_relationship(has_gene_product, uniprot_ref)
         term.set_species(identifier="44689", name="Dictyostelium discoideum")
         yield term

pyobo/sources/drugbank.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
 """Convert DrugBank to OBO.
 Run with ``python -m pyobo.sources.drugbank``
@@ -8,14 +6,15 @@ Run with ``python -m pyobo.sources.drugbank``
 import datetime
 import itertools as itt
 import logging
+from collections.abc import Iterable, Mapping
 from functools import lru_cache
-from typing import Any, Dict, Iterable, Mapping, Optional
+from typing import Any, Optional
 from xml.etree import ElementTree
 import pystow
 from tqdm.auto import tqdm
-from ..getters import NoBuild
+from ..getters import NoBuildError
 from ..struct import Obo, Reference, Term
 from ..struct.typedef import has_inchi, has_salt, has_smiles
 from ..utils.cache import cached_pickle
@@ -139,7 +138,7 @@ def _make_term(drug_info: Mapping[str, Any]) -> Term:
     return term
-@lru_cache()
+@lru_cache
 def get_xml_root(version: Optional[str] = None) -> ElementTree.Element:
     """Get the DrugBank XML parser root.
@@ -152,7 +151,7 @@ def get_xml_root(version: Optional[str] = None) -> ElementTree.Element:
         username = pystow.get_config("pyobo", "drugbank_username", raise_on_missing=True)
         password = pystow.get_config("pyobo", "drugbank_password", raise_on_missing=True)
     except ConfigError as e:
-        raise NoBuild from e
+        raise NoBuildError from e
     element = parse_drugbank(version=version, username=username, password=password)
     return element.getroot()
@@ -167,7 +166,7 @@ smiles_template = f"{ns}calculated-properties/{ns}property[{ns}kind='SMILES']/{n
 def _extract_drug_info(drug_xml: ElementTree.Element) -> Mapping[str, Any]:
     """Extract information from an XML element representing a drug."""
     # assert drug_xml.tag == f'{ns}drug'
-    row: Dict[str, Any] = {
+    row: dict[str, Any] = {
         "type": drug_xml.get("type"),
         "drugbank_id": drug_xml.findtext(f"{ns}drugbank-id[@primary='true']"),
         "cas": drug_xml.findtext(f"{ns}cas-number"),

pyobo/sources/drugbank_salt.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
 """Convert DrugBank Salts to OBO.
 Run with ``python -m pyobo.sources.drugbank_salt``
@@ -10,11 +8,12 @@ Get relations between drugbank salts and drugbank parents with
 .. code-block:: python
     import pyobo
-    df = pyobo.get_filtered_relations_df('drugbank', 'obo:has_salt')
+    df = pyobo.get_filtered_relations_df("drugbank", "obo:has_salt")
 """
 import logging
-from typing import Iterable
+from collections.abc import Iterable
 from .drugbank import iterate_drug_info
 from ..struct import Obo, Reference, Term

pyobo/sources/drugcentral.py CHANGED Viewed

@@ -1,11 +1,9 @@
-# -*- coding: utf-8 -*-
 """Get DrugCentral as OBO."""
 import logging
 from collections import defaultdict
+from collections.abc import Iterable
 from contextlib import closing
-from typing import DefaultDict, Iterable, List
 import bioregistry
 import psycopg2
@@ -25,9 +23,9 @@ PREFIX = "drugcentral"
 HOST = "unmtid-dbs.net"
 PORT = 5433
 USER = "drugman"
-PASSWORD = "dosage"
+PASSWORD = "dosage"  # noqa:S105
 DBNAME = "drugcentral"
-PARAMS = dict(dbname=DBNAME, user=USER, password=PASSWORD, host=HOST, port=PORT)
+PARAMS = {"dbname": DBNAME, "user": USER, "password": PASSWORD, "host": HOST, "port": PORT}
 class DrugCentralGetter(Obo):
@@ -58,7 +56,7 @@ def iter_terms() -> Iterable[Term]:
         with closing(conn.cursor()) as cur:
             cur.execute("SELECT struct_id, id_type, identifier FROM public.identifier")
             rows = cur.fetchall()
-            xrefs: DefaultDict[str, List[Reference]] = defaultdict(list)
+            xrefs: defaultdict[str, list[Reference]] = defaultdict(list)
             for drugcentral_id, prefix, identifier in tqdm(
                 rows, unit_scale=True, desc="loading xrefs"
             ):
@@ -70,13 +68,16 @@ def iter_terms() -> Iterable[Term]:
                 if xref_prefix_norm is None:
                     tqdm.write(f"did not normalize {prefix}:{identifier}")
                     continue
+                if xref_prefix_norm == "pdb.ligand":
+                    # there is a weird invalid escaped \W appearing in pdb ligand ids
+                    identifier = identifier.strip()
                 identifier = bioregistry.standardize_identifier(xref_prefix_norm, identifier)
                 xrefs[str(drugcentral_id)].append(
                     Reference(prefix=xref_prefix_norm, identifier=identifier)
                 )
         with closing(conn.cursor()) as cur:
             cur.execute("SELECT id, name FROM public.synonyms")
-            synonyms: DefaultDict[str, List[Synonym]] = defaultdict(list)
+            synonyms: defaultdict[str, list[Synonym]] = defaultdict(list)
             for drugcentral_id, synonym in cur.fetchall():
                 synonyms[str(drugcentral_id)].append(Synonym(name=synonym))
@@ -100,4 +101,4 @@ def iter_terms() -> Iterable[Term]:
 if __name__ == "__main__":
-    get_obo().write_default(write_obo=True)
+    DrugCentralGetter.cli()

pyobo/sources/expasy.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# -*- coding: utf-8 -*-
 """Convert ExPASy to OBO."""
 import logging
+import re
 from collections import defaultdict
-from typing import Any, Dict, Iterable, Mapping, Optional, Set, Tuple
+from collections.abc import Iterable, Mapping
+from typing import Any, Optional
 from .utils import get_go_mapping
 from ..struct import Obo, Reference, Synonym, Term
@@ -43,7 +43,7 @@ class ExpasyGetter(Obo):
     """A getter for ExPASy Enzyme Classes."""
     bioversions_key = ontology = PREFIX
-    typedefs = [has_member, enables]
+    typedefs = [has_member, enables, term_replaced_by]
     root_terms = [
         Reference(prefix="eccode", identifier="1"),
         Reference(prefix="eccode", identifier="2"),
@@ -76,7 +76,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
     with open(tree_path) as file:
         tree = get_tree(file)
-    terms: Dict[str, Term] = {}
+    terms: dict[str, Term] = {}
     child_to_parents = defaultdict(list)
     for ec_code, data in tree.items():
         terms[ec_code] = Term(
@@ -146,7 +146,9 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
         for domain in data.get("domains", []):
             term.append_relationship(
                 has_member,
-                Reference(prefix=domain["namespace"], identifier=domain["identifier"]),
+                Reference.model_validate(
+                    {"prefix": domain["namespace"], "identifier": domain["identifier"]},
+                ),
             )
         for protein in data.get("proteins", []):
             term.append_relationship(
@@ -176,7 +178,7 @@ def normalize_expasy_id(expasy_id: str) -> str:
     return expasy_id.replace(" ", "")
-def give_edge(unnormalized_ec_code: str) -> Tuple[int, Optional[str], str]:
+def give_edge(unnormalized_ec_code: str) -> tuple[int, Optional[str], str]:
     """Return a (parent, child) tuple for given id."""
     levels = [x for x in unnormalized_ec_code.replace(" ", "").replace("-", "").split(".") if x]
     level = len(levels)
@@ -227,7 +229,7 @@ def get_database(lines: Iterable[str]) -> Mapping:
     for groups in _group_by_id(lines):
         _, expasy_id = groups[0]
-        ec_data_entry: Dict[str, Any] = {
+        ec_data_entry: dict[str, Any] = {
             "concept": {
                 "namespace": PREFIX,
                 "identifier": expasy_id,
@@ -249,8 +251,10 @@ def get_database(lines: Iterable[str]) -> Mapping:
             elif descriptor == DE and value == "Deleted entry.":
                 ec_data_entry["deleted"] = True
             elif descriptor == DE and value.startswith("Transferred entry: "):
-                value = value[len("Transferred entry: ") :].rstrip().rstrip(".")
-                ec_data_entry["transfer_id"] = value.split(" and ")
+                # TODO There's a situation where there are enough transfers that it goes on to a second line
+                #  the following line just gives up on this one. or maybe I don't understand
+                value = value.strip().removesuffix("and").rstrip(",").strip()
+                ec_data_entry["transfer_id"] = _parse_transfer(value)
             elif descriptor == DE:
                 ec_data_entry["concept"]["name"] = value.rstrip(".")  # type:ignore
             elif descriptor == AN:
@@ -269,17 +273,30 @@ def get_database(lines: Iterable[str]) -> Mapping:
                         continue
                     uniprot_id, uniprot_accession = uniprot_entry.split(",")
                     ec_data_entry["proteins"].append(  # type:ignore
-                        dict(
-                            namespace="uniprot",
-                            name=uniprot_accession,
-                            identifier=uniprot_id,
-                        )
+                        {
+                            "namespace": "uniprot",
+                            "name": uniprot_accession,
+                            "identifier": uniprot_id,
+                        }
                     )
         rv[expasy_id] = ec_data_entry
     return rv
+TRANSFER_SPLIT_RE = re.compile(r",\s*|\s+and\s+")
+def _parse_transfer(value: str) -> list[str]:
+    """Parse transferred entry string.
+    >>> _parse_transfer("Transferred entry: 1.1.1.198, 1.1.1.227 and 1.1.1.228.")
+    ['1.1.1.198', '1.1.1.227', '1.1.1.228']
+    """
+    value = value[len("Transferred entry: ") :].rstrip().rstrip(".")
+    return sorted(x.strip().removeprefix("and").strip() for x in TRANSFER_SPLIT_RE.split(value))
 def _group_by_id(lines):
     """Group lines by identifier."""
     groups = []
@@ -300,7 +317,7 @@ def _group_by_id(lines):
     return groups
-def get_ec2go(version: str) -> Mapping[str, Set[Tuple[str, str]]]:
+def get_ec2go(version: str) -> Mapping[str, set[tuple[str, str]]]:
     """Get the EC mapping to GO activities."""
     url = "http://current.geneontology.org/ontology/external2go/ec2go"
     path = ensure_path(PREFIX, url=url, name="ec2go.tsv", version=version)

pyobo 0.10.12__py3-none-any.whl → 0.11.1__py3-none-any.whl

pyobo 0.10.12py3-none-any.whl → 0.11.1py3-none-any.whl