PyPI - pyobo - Versions diffs - 0.10.11__py3-none-any.whl → 0.10.12__py3-none-any.whl - Mend

pyobo 0.10.11py3-none-any.whl → 0.10.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

pyobo/api/names.py +27 -6
pyobo/api/utils.py +5 -0
pyobo/cli/lookup.py +2 -2
pyobo/constants.py +31 -1
pyobo/gilda_utils.py +21 -0
pyobo/identifier_utils.py +22 -5
pyobo/reader.py +1 -1
pyobo/sources/__init__.py +2 -0
pyobo/sources/antibodyregistry.py +2 -2
pyobo/sources/biogrid.py +3 -3
pyobo/sources/credit.py +68 -0
pyobo/sources/geonames.py +27 -9
pyobo/sources/hgnc.py +2 -2
pyobo/sources/mesh.py +3 -3
pyobo/sources/msigdb.py +1 -1
pyobo/sources/npass.py +1 -1
pyobo/sources/pubchem.py +3 -3
pyobo/sources/rgd.py +1 -1
pyobo/sources/rhea.py +2 -2
pyobo/sources/ror.py +67 -21
pyobo/sources/uniprot/uniprot.py +2 -2
pyobo/struct/struct.py +4 -3
pyobo/struct/typedef.py +10 -0
pyobo/utils/path.py +2 -1
pyobo/version.py +1 -1
pyobo/xrefdb/sources/__init__.py +6 -3
pyobo/xrefdb/sources/chembl.py +5 -5
pyobo/xrefdb/sources/pubchem.py +3 -2
pyobo/xrefdb/sources/wikidata.py +8 -1
{pyobo-0.10.11.dist-info → pyobo-0.10.12.dist-info}/METADATA +23 -23
{pyobo-0.10.11.dist-info → pyobo-0.10.12.dist-info}/RECORD +35 -35
{pyobo-0.10.11.dist-info → pyobo-0.10.12.dist-info}/WHEEL +1 -1
pyobo/xrefdb/bengo.py +0 -44
{pyobo-0.10.11.dist-info → pyobo-0.10.12.dist-info}/LICENSE +0 -0
{pyobo-0.10.11.dist-info → pyobo-0.10.12.dist-info}/entry_points.txt +0 -0
{pyobo-0.10.11.dist-info → pyobo-0.10.12.dist-info}/top_level.txt +0 -0

pyobo/api/names.py CHANGED Viewed

@@ -2,11 +2,15 @@
 """High-level API for nomenclature."""
+from __future__ import annotations
 import logging
 import subprocess
 from functools import lru_cache
 from typing import Callable, List, Mapping, Optional, Set, TypeVar
+from curies import Reference, ReferenceTuple
 from .alts import get_primary_identifier
 from .utils import get_version
 from ..getters import NoBuild, get_ontology
@@ -32,6 +36,8 @@ logger = logging.getLogger(__name__)
 def get_name_by_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]:
     """Get the name for a CURIE, if possible."""
+    if version is None:
+        version = get_version(curie.split(":")[0])
     prefix, identifier = normalize_curie(curie)
     if prefix and identifier:
         return get_name(prefix, identifier, version=version)
@@ -40,7 +46,8 @@ def get_name_by_curie(curie: str, *, version: Optional[str] = None) -> Optional[
 X = TypeVar("X")
-NO_BUILD_PREFIXES = set()
+NO_BUILD_PREFIXES: Set[str] = set()
+NO_BUILD_LOGGED: Set = set()
 def _help_get(
@@ -59,8 +66,10 @@ def _help_get(
             logger.warning("[%s] unable to look up results with %s", prefix, f)
             NO_BUILD_PREFIXES.add(prefix)
         return None
-    except ValueError:
-        logger.warning("[%s] unable to look up results with %s", prefix, f)
+    except ValueError as e:
+        if prefix not in NO_BUILD_PREFIXES:
+            logger.warning("[%s] value error while looking up results with %s: %s", prefix, f, e)
+            NO_BUILD_PREFIXES.add(prefix)
         return None
     if not mapping:
@@ -74,9 +83,17 @@ def _help_get(
 @wrap_norm_prefix
-def get_name(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
+def get_name(
+    prefix: str | Reference | ReferenceTuple,
+    identifier: Optional[str] = None,
+    /,
+    *,
+    version: Optional[str] = None,
+) -> Optional[str]:
     """Get the name for an entity."""
-    return _help_get(get_id_name_mapping, prefix, identifier, version=version)
+    if isinstance(prefix, (ReferenceTuple, Reference)):
+        prefix, identifier = prefix.prefix, prefix.identifier
+    return _help_get(get_id_name_mapping, prefix, identifier, version=version)  # type:ignore
 @lru_cache()
@@ -159,8 +176,12 @@ def get_name_id_mapping(
 @wrap_norm_prefix
-def get_definition(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
+def get_definition(
+    prefix: str, identifier: str | None = None, *, version: Optional[str] = None
+) -> Optional[str]:
     """Get the definition for an entity."""
+    if identifier is None:
+        prefix, _, identifier = prefix.rpartition(":")
     return _help_get(get_id_definition_mapping, prefix, identifier, version=version)

pyobo/api/utils.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Optional
 import bioversions
+from ..constants import VERSION_PINS
 from ..utils.path import prefix_directory_join
 __all__ = [
@@ -25,6 +26,10 @@ def get_version(prefix: str) -> Optional[str]:
     :param prefix: the resource name
     :return: The version if available else None
     """
+    # Prioritize loaded environmental variable VERSION_PINS dictionary
+    version = VERSION_PINS.get(prefix)
+    if version:
+        return version
     try:
         version = bioversions.get_version(prefix)
     except KeyError:

pyobo/cli/lookup.py CHANGED Viewed

@@ -282,7 +282,7 @@ def ancestors(prefix: str, identifier: str, force: bool, version: Optional[str])
     """Look up ancestors."""
     curies = get_ancestors(prefix=prefix, identifier=identifier, force=force, version=version)
     for curie in sorted(curies or []):
-        click.echo(f"{curie}\t{get_name_by_curie(curie)}")
+        click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")
 @lookup.command()
@@ -295,7 +295,7 @@ def descendants(prefix: str, identifier: str, force: bool, version: Optional[str
     """Look up descendants."""
     curies = get_descendants(prefix=prefix, identifier=identifier, force=force, version=version)
     for curie in sorted(curies or []):
-        click.echo(f"{curie}\t{get_name_by_curie(curie)}")
+        click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")
 @lookup.command()

pyobo/constants.py CHANGED Viewed

@@ -2,7 +2,9 @@
 """Constants for PyOBO."""
+import json
 import logging
+import os
 import re
 import pystow
@@ -11,6 +13,7 @@ __all__ = [
     "RAW_DIRECTORY",
     "DATABASE_DIRECTORY",
     "SPECIES_REMAPPING",
+    "VERSION_PINS",
 ]
 logger = logging.getLogger(__name__)
@@ -80,7 +83,6 @@ TYPEDEFS_FILE = "typedefs.tsv.gz"
 SPECIES_RECORD = "5334738"
 SPECIES_FILE = "species.tsv.gz"
 NCBITAXON_PREFIX = "NCBITaxon"
 DATE_FORMAT = "%d:%m:%Y %H:%M"
 PROVENANCE_PREFIXES = {
@@ -99,3 +101,31 @@ PROVENANCE_PREFIXES = {
     "isbn",
     "issn",
 }
+# Load version pin dictionary from the environmental variable VERSION_PINS
+try:
+    VERSION_PINS_STR = os.getenv("VERSION_PINS")
+    if not VERSION_PINS_STR:
+        VERSION_PINS = {}
+    else:
+        VERSION_PINS = json.loads(VERSION_PINS_STR)
+        for k, v in VERSION_PINS.items():
+            if not isinstance(k, str) or not isinstance(v, str):
+                logger.error("The prefix and version name must both be " "strings")
+            VERSION_PINS = {}
+            break
+except ValueError as e:
+    logger.error(
+        "The value for the environment variable VERSION_PINS must be a valid JSON string: %s" % e
+    )
+    VERSION_PINS = {}
+if VERSION_PINS:
+    logger.debug(
+        f"These are the resource versions that are pinned.\n{VERSION_PINS}. "
+        f"\nPyobo will download the latest version of a resource if it's "
+        f"not pinned.\nIf you want to use a specific version of a "
+        f"resource, edit your VERSION_PINS environmental "
+        f"variable which is a JSON string to include a prefix and version "
+        f"name."
+    )

pyobo/gilda_utils.py CHANGED Viewed

@@ -15,6 +15,7 @@ from gilda.term import filter_out_duplicates
 from tqdm.auto import tqdm
 from pyobo import (
+    get_descendants,
     get_id_name_mapping,
     get_id_species_mapping,
     get_id_synonyms_mapping,
@@ -247,3 +248,23 @@ def get_gilda_terms(
             )
             if term is not None:
                 yield term
+def get_gilda_term_subset(
+    source: str, ancestors: Union[str, List[str]], **kwargs
+) -> Iterable[gilda.term.Term]:
+    """Get a subset of terms."""
+    subset = {
+        descendant
+        for parent_curie in _ensure_list(ancestors)
+        for descendant in get_descendants(*parent_curie.split(":")) or []
+    }
+    for term in get_gilda_terms(source, **kwargs):
+        if bioregistry.curie_to_str(term.db, term.id) in subset:
+            yield term
+def _ensure_list(s: Union[str, List[str]]) -> List[str]:
+    if isinstance(s, str):
+        return [s]
+    return s

pyobo/identifier_utils.py CHANGED Viewed

@@ -2,11 +2,14 @@
 """Utilities for handling prefixes."""
+from __future__ import annotations
 import logging
 from functools import wraps
 from typing import Optional, Tuple, Union
 import bioregistry
+from curies import Reference, ReferenceTuple
 from .registries import (
     curie_has_blacklisted_prefix,
@@ -108,11 +111,25 @@ def wrap_norm_prefix(f):
     """Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
     @wraps(f)
-    def _wrapped(prefix, *args, **kwargs):
-        norm_prefix = bioregistry.normalize_prefix(prefix)
-        if norm_prefix is None:
-            raise ValueError(f"Invalid prefix: {prefix}")
-        return f(norm_prefix, *args, **kwargs)
+    def _wrapped(prefix: str | Reference | ReferenceTuple, *args, **kwargs):
+        if isinstance(prefix, str):
+            norm_prefix = bioregistry.normalize_prefix(prefix)
+            if norm_prefix is None:
+                raise ValueError(f"Invalid prefix: {prefix}")
+            prefix = norm_prefix
+        elif isinstance(prefix, Reference):
+            norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
+            if norm_prefix is None:
+                raise ValueError(f"Invalid prefix: {prefix.prefix}")
+            prefix = Reference(prefix=norm_prefix, identifier=prefix.identifier)
+        elif isinstance(prefix, ReferenceTuple):
+            norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
+            if norm_prefix is None:
+                raise ValueError(f"Invalid prefix: {prefix.prefix}")
+            prefix = ReferenceTuple(norm_prefix, prefix.identifier)
+        else:
+            raise TypeError
+        return f(prefix, *args, **kwargs)
     return _wrapped

pyobo/reader.py CHANGED Viewed

@@ -417,7 +417,7 @@ def _clean_definition(s: str) -> str:
     # if '\t' in s:
     #     logger.warning('has tab')
     return (
-        s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace("\d", "")  # noqa:W605
+        s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace(r"\d", "")  # noqa:W605
     )

pyobo/sources/__init__.py CHANGED Viewed

@@ -12,6 +12,7 @@ from .civic_gene import CIVICGeneGetter
 from .complexportal import ComplexPortalGetter
 from .conso import CONSOGetter
 from .cpt import CPTGetter
+from .credit import CreditGetter
 from .cvx import CVXGetter
 from .depmap import DepMapGetter
 from .dictybase_gene import DictybaseGetter
@@ -69,6 +70,7 @@ __all__ = [
     "CVXGetter",
     "ChEMBLCompoundGetter",
     "ComplexPortalGetter",
+    "CreditGetter",
     "DepMapGetter",
     "DictybaseGetter",
     "DrugBankGetter",

pyobo/sources/antibodyregistry.py CHANGED Viewed

@@ -5,12 +5,12 @@
 import logging
 from typing import Iterable, Mapping, Optional
-import bioversions
 import pandas as pd
 from bioregistry.utils import removeprefix
 from tqdm.auto import tqdm
 from pyobo import Obo, Term
+from pyobo.api.utils import get_version
 from pyobo.utils.path import ensure_df
 __all__ = [
@@ -27,7 +27,7 @@ CHUNKSIZE = 20_000
 def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame:
     """Get the BioGRID identifiers mapping dataframe."""
     if version is None:
-        version = bioversions.get_version(PREFIX)
+        version = get_version(PREFIX)
     df = ensure_df(
         PREFIX,
         url=URL,

pyobo/sources/biogrid.py CHANGED Viewed

@@ -5,9 +5,9 @@
 from functools import partial
 from typing import Mapping, Optional
-import bioversions
 import pandas as pd
+from pyobo.api.utils import get_version
 from pyobo.resources.ncbitaxon import get_ncbitaxon_id
 from pyobo.utils.cache import cached_mapping
 from pyobo.utils.path import ensure_df, prefix_directory_join
@@ -52,7 +52,7 @@ def _lookup(name: str) -> Optional[str]:
 def get_df() -> pd.DataFrame:
     """Get the BioGRID identifiers mapping dataframe."""
-    version = bioversions.get_version("biogrid")
+    version = get_version("biogrid")
     url = f"{BASE_URL}/BIOGRID-{version}/BIOGRID-IDENTIFIERS-{version}.tab.zip"
     df = ensure_df(PREFIX, url=url, skiprows=28, dtype=str, version=version)
     df["taxonomy_id"] = df["ORGANISM_OFFICIAL_NAME"].map(_lookup)
@@ -65,7 +65,7 @@ def get_df() -> pd.DataFrame:
         "cache",
         "xrefs",
         name="ncbigene.tsv",
-        version=partial(bioversions.get_version, PREFIX),
+        version=partial(get_version, PREFIX),
     ),
     header=["biogrid_id", "ncbigene_id"],
 )

pyobo/sources/credit.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""Converter for the Contributor Roles Taxonomy."""
+from __future__ import annotations
+import json
+from typing import Iterable
+from more_itertools import chunked
+from pyobo.struct import Obo, Term
+from pyobo.utils.path import ensure_path
+__all__ = [
+    "CreditGetter",
+]
+url = "https://api.github.com/repos/CASRAI-CRedIT/Dictionary/contents/Picklists/Contributor%20Roles"
+PREFIX = "credit"
+class CreditGetter(Obo):
+    """An ontology representation of the Contributor Roles Taxonomy."""
+    ontology = PREFIX
+    static_version = "2022"
+    idspaces = {
+        PREFIX: "https://credit.niso.org/contributor-roles/",
+    }
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return get_terms(force=force)
+def get_obo(force: bool = False) -> Obo:
+    """Get RGD as OBO."""
+    return CreditGetter(force=force)
+def get_terms(force: bool = False) -> list[Term]:
+    """Get terms from the Contributor Roles Taxonomy via GitHub."""
+    path = ensure_path(PREFIX, url=url, name="picklist-api.json", force=force)
+    with open(path) as f:
+        data = json.load(f)
+    terms = []
+    for x in data:
+        name = x["name"].removesuffix(".md").lower()
+        pp = ensure_path(PREFIX, "picklist", url=x["download_url"], backend="requests")
+        with open(pp) as f:
+            header, *rest = f.read().splitlines()
+            name = header = header.removeprefix("# Contributor Roles/")
+            dd = {k.removeprefix("## "): v for k, v in chunked(rest, 2)}
+            identifier = (
+                dd["Canonical URL"]
+                .removeprefix("https://credit.niso.org/contributor-roles/")
+                .rstrip("/")
+            )
+            desc = dd["Short definition"]
+            terms.append(
+                Term.from_triple(prefix=PREFIX, identifier=identifier, name=name, definition=desc)
+            )
+    return terms
+if __name__ == "__main__":
+    get_obo(force=True).write_default(write_obo=True)

pyobo/sources/geonames.py CHANGED Viewed

@@ -1,5 +1,7 @@
 """Get terms from geonames."""
+from __future__ import annotations
 import logging
 from typing import Collection, Iterable, Mapping
@@ -146,15 +148,7 @@ def get_code_to_admin2(
     return code_to_admin2
-def get_cities(
-    code_to_country,
-    code_to_admin1,
-    code_to_admin2,
-    *,
-    minimum_population: int = 100_000,
-    force: bool = False,
-) -> Mapping[str, Term]:
-    """Get a mapping from city code to term."""
+def _get_cities_df(force: bool = False) -> pd.DataFrame:
     columns = [
         "geonames_id",
         "name",
@@ -184,7 +178,19 @@ def get_cities(
         names=columns,
         dtype=str,
     )
+    return cities_df
+def get_cities(
+    code_to_country,
+    code_to_admin1,
+    code_to_admin2,
+    *,
+    minimum_population: int = 100_000,
+    force: bool = False,
+) -> Mapping[str, Term]:
+    """Get a mapping from city code to term."""
+    cities_df = _get_cities_df(force=force)
     cities_df = cities_df[cities_df.population.astype(int) > minimum_population]
     cities_df.synonyms = cities_df.synonyms.str.split(",")
@@ -235,5 +241,17 @@ def get_cities(
     return terms
+def get_city_to_country() -> dict[str, str]:
+    """Get a mapping from city GeoNames to country GeoNames id."""
+    rv = {}
+    code_to_country = get_code_to_country()
+    cities_df = _get_cities_df()
+    for city_geonames_id, country_code in cities_df[["geonames_id", "country_code"]].values:
+        if pd.isna(city_geonames_id) or pd.isna(country_code):
+            continue
+        rv[city_geonames_id] = code_to_country[country_code].identifier
+    return rv
 if __name__ == "__main__":
     GeonamesGetter().write_default(write_obo=True, force=True)

pyobo/sources/hgnc.py CHANGED Viewed

@@ -10,10 +10,10 @@ from collections import Counter, defaultdict
 from operator import attrgetter
 from typing import DefaultDict, Dict, Iterable, Optional
-import bioversions
 from tabulate import tabulate
 from tqdm.auto import tqdm
+from pyobo.api.utils import get_version
 from pyobo.struct import (
     Obo,
     Reference,
@@ -241,7 +241,7 @@ def get_obo(*, force: bool = False) -> Obo:
 def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:  # noqa:C901
     """Get HGNC terms."""
     if version is None:
-        version = bioversions.get_version("hgnc")
+        version = get_version("hgnc")
     unhandled_entry_keys: typing.Counter[str] = Counter()
     unhandle_locus_types: DefaultDict[str, Dict[str, Term]] = defaultdict(dict)
     path = ensure_path(

pyobo/sources/mesh.py CHANGED Viewed

@@ -11,6 +11,7 @@ from xml.etree.ElementTree import Element
 from tqdm.auto import tqdm
+from pyobo.api.utils import get_version
 from pyobo.identifier_utils import standardize_ec
 from pyobo.struct import Obo, Reference, Synonym, Term
 from pyobo.utils.cache import cached_json, cached_mapping
@@ -331,9 +332,8 @@ def get_mesh_category_curies(
     .. seealso:: https://meshb.nlm.nih.gov/treeView
     """
     if version is None:
-        import bioversions
-        version = bioversions.get_version("mesh")
+        version = get_version("mesh")
+        assert version is not None
     tree_to_mesh = get_tree_to_mesh_id(version=version)
     rv = []
     for i in range(1, 100):

pyobo/sources/msigdb.py CHANGED Viewed

@@ -137,7 +137,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
 def _get_definition(attrib) -> Optional[str]:
     rv = attrib["DESCRIPTION_FULL"].strip() or attrib["DESCRIPTION_BRIEF"].strip() or None
     if rv is not None:
-        return rv.replace("\d", "").replace("\s", "")  # noqa: W605
+        return rv.replace(r"\d", "").replace(r"\s", "")  # noqa: W605
     return None

pyobo/sources/npass.py CHANGED Viewed

@@ -64,7 +64,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
         )
         for xref_prefix, xref_id in [
-            ("chembl", chembl_id),
+            ("chembl.compound", chembl_id),
             # ("zinc", zinc_id),
         ]:
             if pd.notna(xref_id):

pyobo/sources/pubchem.py CHANGED Viewed

@@ -5,12 +5,12 @@
 import logging
 from typing import Iterable, Mapping, Optional
-import bioversions
 import pandas as pd
 from bioregistry.utils import removeprefix
 from tqdm.auto import tqdm
 from ..api import get_name_id_mapping
+from ..api.utils import get_version
 from ..struct import Obo, Reference, Synonym, Term
 from ..utils.iter import iterate_gzips_together
 from ..utils.path import ensure_df, ensure_path
@@ -26,7 +26,7 @@ PREFIX = "pubchem.compound"
 def _get_pubchem_extras_url(version: Optional[str], end: str) -> str:
     if version is None:
-        version = bioversions.get_version("pubchem")
+        version = get_version("pubchem")
     return f"ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/Monthly/{version}/Extras/{end}"
@@ -100,7 +100,7 @@ def get_pubchem_id_to_mesh_id(version: str) -> Mapping[str, str]:
 def _ensure_cid_name_path(*, version: Optional[str] = None, force: bool = False) -> str:
     if version is None:
-        version = bioversions.get_version("pubchem")
+        version = get_version("pubchem")
     # 2 tab-separated columns: compound_id, name
     cid_name_url = _get_pubchem_extras_url(version, "CID-Title.gz")
     cid_name_path = ensure_path(PREFIX, url=cid_name_url, version=version, force=force)

pyobo/sources/rgd.py CHANGED Viewed

@@ -28,7 +28,7 @@ old_name_type = SynonymTypeDef.from_text("old_name")
 # NOTE unigene id was discontinue in January 18th, 2021 dump
-GENES_URL = "https://download.rgd.mcw.edu/data_release/GENES.RAT.txt"
+GENES_URL = "https://download.rgd.mcw.edu/data_release/GENES_RAT.txt"
 GENES_HEADER = [
     "GENE_RGD_ID",
     "SYMBOL",

pyobo/sources/rhea.py CHANGED Viewed

@@ -5,9 +5,9 @@
 import logging
 from typing import TYPE_CHECKING, Dict, Iterable, Optional
-import bioversions
 import pystow
+from pyobo.api.utils import get_version
 from pyobo.struct import Obo, Reference, Term
 from pyobo.struct.typedef import (
     TypeDef,
@@ -63,7 +63,7 @@ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdfl
     """Get the Rhea RDF graph."""
     # see docs: https://ftp.expasy.org/databases/rhea/rdf/rhea_rdf_documentation.pdf
     if version is None:
-        version = bioversions.get_version(PREFIX)
+        version = get_version(PREFIX)
     return pystow.ensure_rdf(
         "pyobo",
         "raw",

pyobo 0.10.11__py3-none-any.whl → 0.10.12__py3-none-any.whl

pyobo 0.10.11py3-none-any.whl → 0.10.12py3-none-any.whl