PyPI - pyobo - Versions diffs - 0.12.4__py3-none-any.whl → 0.12.5__py3-none-any.whl - Mend

pyobo 0.12.4py3-none-any.whl → 0.12.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +6 -0
pyobo/api/__init__.py +3 -0
pyobo/api/embedding.py +118 -0
pyobo/api/utils.py +0 -10
pyobo/cli/cli.py +1 -6
pyobo/constants.py +23 -0
pyobo/getters.py +52 -35
pyobo/sources/__init__.py +14 -1
pyobo/sources/chembl/__init__.py +6 -0
pyobo/sources/chembl/chembl_cell.py +94 -0
pyobo/sources/chembl/chembl_mechanism.py +81 -0
pyobo/sources/chembl/chembl_tissue.py +70 -0
pyobo/sources/clinicaltrials.py +32 -33
pyobo/sources/complexportal.py +5 -1
pyobo/sources/hgnc/hgnc.py +13 -6
pyobo/sources/iana_media_type.py +100 -0
pyobo/sources/mesh.py +82 -29
pyobo/sources/reactome.py +10 -3
pyobo/sources/spdx.py +85 -0
pyobo/sources/uniprot/uniprot.py +2 -2
pyobo/sources/wikipathways.py +92 -7
pyobo/struct/__init__.py +2 -0
pyobo/struct/functional/dsl.py +10 -1
pyobo/struct/functional/ontology.py +3 -3
pyobo/struct/obo/reader.py +17 -53
pyobo/struct/obograph/export.py +2 -2
pyobo/struct/struct.py +115 -8
pyobo/struct/struct_utils.py +10 -0
pyobo/struct/typedef.py +15 -3
pyobo/struct/vocabulary.py +8 -0
pyobo/utils/cache.py +4 -3
pyobo/utils/io.py +18 -56
pyobo/utils/misc.py +135 -1
pyobo/utils/path.py +34 -2
pyobo/version.py +1 -1
{pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/METADATA +5 -5
{pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/RECORD +41 -35
{pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/WHEEL +0 -0
{pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/entry_points.txt +0 -0
{pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/licenses/LICENSE +0 -0

pyobo/struct/typedef.py CHANGED Viewed

@@ -15,7 +15,9 @@ __all__ = [
     "alternative_term",
     "broad_match",
     "close_match",
+    "contributes_to_condition",
     "default_typedefs",
+    "derives_from_organism",
     "editor_note",
     "enables",
     "exact_match",
@@ -24,10 +26,12 @@ __all__ = [
     "gene_product_member_of",
     "has_contributor",
     "has_dbxref",
+    "has_depiction",
     "has_end_date",
     "has_gene_product",
     "has_homepage",
     "has_inchi",
+    "has_mailbox",
     "has_mature",
     "has_member",
     "has_part",
@@ -103,12 +107,18 @@ has_component = TypeDef(
 derives_from = TypeDef(
     reference=Reference(prefix=RO_PREFIX, identifier="0001000", name="derives from"),
 )
+derives_from_organism = TypeDef(
+    reference=Reference(prefix="CLO", identifier="0037207", name="derives from organism")
+)
 molecularly_interacts_with = TypeDef(
     reference=Reference(prefix=RO_PREFIX, identifier="0002436", name="molecularly interacts with"),
 )
 located_in = TypeDef(
     reference=Reference(prefix=RO_PREFIX, identifier="0001025", name="located in"),
 )
+contributes_to_condition = TypeDef(
+    reference=Reference(prefix=RO_PREFIX, identifier="0003304", name="contributes to condition"),
+)
 exact_match = TypeDef(reference=v.exact_match, is_metadata_tag=True)
 narrow_match = TypeDef(reference=v.narrow_match, is_metadata_tag=True)
 broad_match = TypeDef(reference=v.broad_match, is_metadata_tag=True)
@@ -257,9 +267,11 @@ has_smiles = TypeDef(reference=v.has_smiles, is_metadata_tag=True).append_xref(v
 has_inchi = TypeDef(reference=v.has_inchi, is_metadata_tag=True).append_xref(v.debio_has_inchi)
-has_homepage = TypeDef(
-    reference=Reference(prefix="foaf", identifier="homepage", name="homepage"), is_metadata_tag=True
-)
+has_homepage = TypeDef(reference=v.has_homepage, is_metadata_tag=True)
+has_depiction = TypeDef(reference=v.has_depiction, is_metadata_tag=True)
+has_mailbox = TypeDef(reference=v.has_mailbox, is_metadata_tag=True)
+has_mailing_list = TypeDef(reference=v.has_mailing_list, is_metadata_tag=True)
+has_repository = TypeDef(reference=v.has_repository, is_metadata_tag=True)
 has_category = TypeDef(
     reference=Reference(prefix="biolink", identifier="category", name="has category"),

pyobo/struct/vocabulary.py CHANGED Viewed

@@ -90,6 +90,14 @@ has_description = _c(_v.has_description)
 has_license = _c(_v.has_license)
 has_title = _c(_v.has_title)
+has_homepage = Reference(prefix="foaf", identifier="homepage", name="has homepage")
+has_logo = Reference(prefix="foaf", identifier="logo", name="has logo")
+has_mailbox = Reference(prefix="foaf", identifier="mbox", name="has mailbox")
+has_depiction = Reference(prefix="foaf", identifier="depicted_by", name="depicted by")
+has_repository = Reference(prefix="doap", identifier="repository", name="has repository")
+has_mailing_list = Reference(prefix="doap", identifier="mailing-list", name="has mailing list")
+has_maintainer = Reference(prefix="doap", identifier="maintainer", name="has maintainer")
 has_part = Reference(prefix=BFO_PREFIX, identifier="0000051", name="has part")
 part_of = Reference(prefix=BFO_PREFIX, identifier="0000050", name="part of")
 orthologous = Reference(

pyobo/utils/cache.py CHANGED Viewed

@@ -12,8 +12,9 @@ from pystow.cache import CachedCollection as cached_collection  # noqa:N813
 from pystow.cache import CachedDataFrame as cached_df  # noqa:N813
 from pystow.cache import CachedJSON as cached_json  # noqa:N813
 from pystow.cache import CachedPickle as cached_pickle  # noqa:N813
+from pystow.utils import safe_open
-from .io import open_map_tsv, open_multimap_tsv, safe_open, write_map_tsv, write_multimap_tsv
+from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
 __all__ = [
     "cached_collection",
@@ -69,13 +70,13 @@ NODE_LINK_STYLE = "links"  # TODO update to "edges"
 def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
     """Read a graph that's gzipped nodelink."""
-    with safe_open(path, read=True) as file:
+    with safe_open(path, operation="read") as file:
         return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
 def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
     """Write a graph as gzipped nodelink."""
-    with safe_open(path, read=False) as file:
+    with safe_open(path, operation="write") as file:
         json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)

pyobo/utils/io.py CHANGED Viewed

@@ -1,27 +1,24 @@
 """I/O utilities."""
 import collections.abc
-import contextlib
-import csv
 import gzip
 import logging
 from collections import defaultdict
 from collections.abc import Generator, Iterable, Mapping
 from contextlib import contextmanager
 from pathlib import Path
-from typing import Literal, TextIO, TypeVar
+from typing import TypeVar, cast
 import pandas as pd
+import pystow.utils
+from pystow.utils import safe_open_reader, safe_open_writer
 from tqdm.auto import tqdm
 __all__ = [
-    "get_reader",
     "multidict",
     "multisetdict",
     "open_map_tsv",
     "open_multimap_tsv",
-    "open_reader",
-    "safe_open",
     "safe_open_writer",
     "write_iterable_tsv",
     "write_map_tsv",
@@ -34,35 +31,22 @@ X = TypeVar("X")
 Y = TypeVar("Y")
-@contextmanager
-def open_reader(path: str | Path, sep: str = "\t"):
-    """Open a file and get a reader for it."""
-    path = Path(path)
-    with safe_open(path, read=True) as file:
-        yield get_reader(file, sep=sep)
-def get_reader(x, sep: str = "\t"):
-    """Get a :func:`csv.reader` with PyOBO default settings."""
-    return csv.reader(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
 def open_map_tsv(
     path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
 ) -> Mapping[str, str]:
     """Load a mapping TSV file into a dictionary."""
-    with safe_open(path, read=True) as file:
+    rv = {}
+    with pystow.utils.safe_open_reader(path) as reader:
         if has_header:
-            next(file)  # throw away header
+            next(reader)  # throw away header
         if use_tqdm:
-            file = tqdm(file, desc=f"loading TSV from {path}")
-        rv = {}
-        for row in get_reader(file):
+            reader = tqdm(reader, desc=f"loading TSV from {path}")
+        for row in reader:
             if len(row) != 2:
                 logger.warning("[%s] malformed row can not be put in dict: %s", path, row)
                 continue
             rv[row[0]] = row[1]
-        return rv
+    return rv
 def open_multimap_tsv(
@@ -72,24 +56,27 @@ def open_multimap_tsv(
     has_header: bool = True,
 ) -> Mapping[str, list[str]]:
     """Load a mapping TSV file that has multiple mappings for each."""
-    return multidict(_help_multimap_tsv(path=path, use_tqdm=use_tqdm, has_header=has_header))
+    with _help_multimap_tsv(path=path, use_tqdm=use_tqdm, has_header=has_header) as file:
+        return multidict(file)
+@contextmanager
 def _help_multimap_tsv(
     path: str | Path,
     *,
     use_tqdm: bool = False,
     has_header: bool = True,
-) -> Iterable[tuple[str, str]]:
-    with safe_open(path, read=True) as file:
+) -> Generator[Iterable[tuple[str, str]], None, None]:
+    with safe_open_reader(path) as reader:
         if has_header:
             try:
-                next(file)  # throw away header
+                next(reader)  # throw away header
             except gzip.BadGzipFile as e:
                 raise ValueError(f"could not open file {path}") from e
         if use_tqdm:
-            file = tqdm(file, desc=f"loading TSV from {path}")
-        yield from get_reader(file)
+            yield tqdm(reader, desc=f"loading TSV from {path}")
+        else:
+            yield cast(Iterable[tuple[str, str]], reader)
 def multidict(pairs: Iterable[tuple[X, Y]]) -> Mapping[X, list[Y]]:
@@ -149,28 +136,3 @@ def write_iterable_tsv(
         if header is not None:
             writer.writerow(header)
         writer.writerows(it)
-@contextlib.contextmanager
-def safe_open(
-    path: str | Path, read: bool, encoding: str | None = None
-) -> Generator[TextIO, None, None]:
-    """Safely open a file for reading or writing text."""
-    path = Path(path).expanduser().resolve()
-    mode: Literal["rt", "wt"] = "rt" if read else "wt"
-    if path.suffix.endswith(".gz"):
-        with gzip.open(path, mode=mode, encoding=encoding) as file:
-            yield file
-    else:
-        with open(path, mode=mode) as file:
-            yield file
-@contextlib.contextmanager
-def safe_open_writer(f: str | Path | TextIO, *, delimiter: str = "\t"):  # type:ignore
-    """Open a CSV writer, wrapping :func:`csv.writer`."""
-    if isinstance(f, str | Path):
-        with safe_open(f, read=False) as file:
-            yield csv.writer(file, delimiter=delimiter)
-    else:
-        yield csv.writer(f, delimiter=delimiter)

pyobo/utils/misc.py CHANGED Viewed

@@ -1,9 +1,17 @@
 """Miscellaneous utilities."""
+from __future__ import annotations
 import logging
+from collections.abc import Callable
 from datetime import datetime
+import bioversions.utils
+from pyobo.constants import ONTOLOGY_GETTERS, OntologyFormat
 __all__ = [
+    "VERSION_GETTERS",
     "cleanup_version",
 ]
@@ -15,8 +23,11 @@ BIZARRE_LOGGED = set()
 VERSION_REWRITES = {
     "$Date: 2009/11/15 10:54:12 $": "2009-11-15",  # for owl
     "http://www.w3.org/2006/time#2016": "2016",  # for time
+    "https://purl.org/ontology/modalia#1.0.0": "1.0.0",  # for dalia
+}
+STATIC_VERSION_REWRITES = {
+    "orth": "2",
 }
-STATIC_VERSION_REWRITES = {"orth": "2"}
 VERSION_PREFIXES = [
     "http://www.orpha.net/version",
     "https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
@@ -27,17 +38,34 @@ VERSION_PREFIXES = [
     "http://purl.dataone.org/odo/SASAP/",  # like in http://purl.dataone.org/odo/SASAP/0.3.1
     "http://purl.dataone.org/odo/SENSO/",  # like in http://purl.dataone.org/odo/SENSO/0.1.0
     "https://purl.dataone.org/odo/ADCAD/",
+    "http://identifiers.org/combine.specifications/teddy.rel-",
+    "https://nfdi.fiz-karlsruhe.de/ontology/",
+    "http://www.w3.org/ns/prov-",
+    "https://raw.githubusercontent.com/enpadasi/Ontology-for-Nutritional-Studies/releases/download/v",
+    "http://purl.jp/bio/4/ontology/iobc/",  # like http://purl.jp/bio/4/ontology/iobc/1.6.0
+    "http://w3id.org/nfdi4ing/metadata4ing/",  # like http://w3id.org/nfdi4ing/metadata4ing/1.3.1
+    "http://www.semanticweb.com/OntoRxn/",  # like http://www.semanticweb.com/OntoRxn/0.2.5
+    "https://w3id.org/lehrplan/ontology/",  # like in https://w3id.org/lehrplan/ontology/1.0.0-4
 ]
 VERSION_PREFIX_SPLITS = [
     "http://www.ebi.ac.uk/efo/releases/v",
     "http://www.ebi.ac.uk/swo/swo.owl/",
     "http://semanticscience.org/ontology/sio/v",
     "http://ontology.neuinfo.org/NIF/ttl/nif/version/",
+    "http://nmrml.org/cv/v",  # as in http://nmrml.org/cv/v1.1.0/nmrCV
+    "http://enanomapper.github.io/ontologies/releases/",  # as in http://enanomapper.github.io/ontologies/releases/10.0/enanomapper
 ]
+BAD = {
+    "http://purl.obolibrary.org/obo",
+    "http://www.bioassayontology.org/bao/bao_complete",
+}
 def cleanup_version(data_version: str, prefix: str) -> str:
     """Clean the version information."""
+    # in case a literal string that wasn't parsed properly gets put in
+    data_version = data_version.strip('"')
     if data_version in VERSION_REWRITES:
         return VERSION_REWRITES[data_version]
@@ -74,3 +102,109 @@ def cleanup_version(data_version: str, prefix: str) -> str:
         logger.debug("[%s] bizarre version: %s", prefix, data_version)
         BIZARRE_LOGGED.add((prefix, data_version))
     return data_version
+def _get_obo_version(prefix: str, url: str, *, max_lines: int = 200) -> str | None:
+    rv = bioversions.utils.get_obo_version(url, max_lines=max_lines)
+    if rv is None:
+        return None
+    return cleanup_version(rv, prefix)
+def _get_owl_version(prefix: str, url: str, *, max_lines: int = 200) -> str | None:
+    rv = bioversions.utils.get_owl_xml_version(url, max_lines=max_lines)
+    if rv is None:
+        return None
+    return cleanup_version(rv, prefix)
+def _get_obograph_json_version(prefix: str, url: str) -> str | None:
+    rv = bioversions.utils.get_obograph_json_version(url)
+    if rv is None:
+        return None
+    return cleanup_version(rv, prefix)
+#: A mapping from data type to gersion getter function
+VERSION_GETTERS: dict[OntologyFormat, Callable[[str, str], str | None]] = {
+    "obo": _get_obo_version,
+    "owl": _get_owl_version,
+    "json": _get_obograph_json_version,
+}
+def _prioritize_version(
+    data_version: str | None,
+    ontology_prefix: str,
+    version: str | None,
+    date: datetime | None,
+) -> str | None:
+    """Process version information coming from several sources and normalize them."""
+    if ontology_prefix in STATIC_VERSION_REWRITES:
+        return STATIC_VERSION_REWRITES[ontology_prefix]
+    if version:
+        if version in BAD:
+            logger.debug("[%s] had known bad version, returning None: ", ontology_prefix, version)
+            return None
+        clean_injected_version = cleanup_version(version, prefix=ontology_prefix)
+        if not data_version:
+            logger.debug(
+                "[%s] did not have a version, overriding with %s",
+                ontology_prefix,
+                clean_injected_version,
+            )
+            return clean_injected_version
+        clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
+        if clean_data_version != clean_injected_version:
+            # in this case, we're going to trust the one that's passed
+            # through explicitly more than the graph's content
+            logger.debug(
+                "[%s] had version %s, overriding with %s",
+                ontology_prefix,
+                data_version,
+                version,
+            )
+        return clean_injected_version
+    if data_version:
+        if data_version in BAD:
+            logger.debug(
+                "[%s] had known bad version, returning None: ", ontology_prefix, data_version
+            )
+            return None
+        clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
+        logger.debug("[%s] using version %s", ontology_prefix, clean_data_version)
+        return clean_data_version
+    if date is not None:
+        derived_date_version = date.strftime("%Y-%m-%d")
+        logger.debug(
+            "[%s] does not report a version. falling back to date: %s",
+            ontology_prefix,
+            derived_date_version,
+        )
+        return derived_date_version
+    logger.debug("[%s] does not report a version nor a date", ontology_prefix)
+    return None
+def _get_version_from_artifact(prefix: str) -> str | None:
+    # assume that all possible files that can be downloaded
+    # are in sync and have the same version
+    for ontology_format, func in ONTOLOGY_GETTERS:
+        url = func(prefix)
+        if url is None:
+            continue
+        # Try to peak into the file to get the version without fully downloading
+        version_func = VERSION_GETTERS.get(ontology_format)
+        if version_func is None:
+            continue
+        version = version_func(prefix, url)
+        if version:
+            return cleanup_version(version, prefix=prefix)
+    return None

pyobo/utils/path.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Utilities for building paths."""
 import enum
+import json
 import logging
 from pathlib import Path
 from typing import Any, Literal
@@ -14,6 +15,7 @@ from ..constants import CACHE_SUBDIRECTORY_NAME, RAW_MODULE, RELATION_SUBDIRECTO
 __all__ = [
     "CacheArtifact",
     "ensure_df",
+    "ensure_json",
     "ensure_path",
     "get_cache_path",
     "get_relation_cache_path",
@@ -46,11 +48,13 @@ def ensure_path(
     version: VersionHint = None,
     name: str | None = None,
     force: bool = False,
-    backend: Literal["requests", "urllib"] = "urllib",
+    backend: Literal["requests", "urllib"] | None = None,
     verify: bool = True,
     **download_kwargs: Any,
 ) -> Path:
     """Download a file if it doesn't exist."""
+    if backend is None:
+        backend = "urllib"
     if verify:
         download_kwargs = {"backend": backend}
     else:
@@ -79,7 +83,7 @@ def ensure_df(
     sep: str = "\t",
     dtype=str,
     verify: bool = True,
-    backend: Literal["requests", "urllib"] = "urllib",
+    backend: Literal["requests", "urllib"] | None = None,
     **kwargs,
 ) -> pd.DataFrame:
     """Download a file and open as a dataframe."""
@@ -96,6 +100,34 @@ def ensure_df(
     return pd.read_csv(_path, sep=sep, dtype=dtype, **kwargs)
+def ensure_json(
+    prefix: str,
+    *parts: str,
+    url: str,
+    version: VersionHint = None,
+    name: str | None = None,
+    force: bool = False,
+    sep: str = "\t",
+    dtype=str,
+    verify: bool = True,
+    backend: Literal["requests", "urllib"] | None = None,
+    **kwargs,
+) -> pd.DataFrame:
+    """Download a file and open as JSON."""
+    _path = ensure_path(
+        prefix,
+        *parts,
+        url=url,
+        version=version,
+        name=name,
+        force=force,
+        verify=verify,
+        backend=backend,
+    )
+    with _path.open() as file:
+        return json.load(file)
 class CacheArtifact(enum.Enum):
     """An enumeration for."""

pyobo/version.py CHANGED Viewed

@@ -12,7 +12,7 @@ __all__ = [
     "get_version",
 ]
-VERSION = "0.12.4"
+VERSION = "0.12.5"
 def get_git_hash() -> str:

{pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pyobo
-Version: 0.12.4
+Version: 0.12.5
 Summary: A python package for handling and generating OBO
 Keywords: snekpack,cookiecutter,ontologies,biomedical ontologies,life sciences,natural sciences,bioinformatics,cheminformatics,Open Biomedical Ontologies,OBO
 Author: Charles Tapley Hoyt
@@ -36,9 +36,9 @@ Requires-Dist: more-click>=0.0.2
 Requires-Dist: humanize
 Requires-Dist: tabulate
 Requires-Dist: cachier
-Requires-Dist: pystow>=0.7.0
-Requires-Dist: bioversions>=0.8.0
-Requires-Dist: bioregistry>=0.12.7
+Requires-Dist: pystow>=0.7.5
+Requires-Dist: bioversions>=0.8.101
+Requires-Dist: bioregistry>=0.12.30
 Requires-Dist: bioontologies>=0.7.2
 Requires-Dist: ssslm>=0.0.13
 Requires-Dist: zenodo-client>=0.3.6
@@ -46,7 +46,7 @@ Requires-Dist: class-resolver>=0.6.0
 Requires-Dist: psycopg2-binary
 Requires-Dist: pydantic>=2.0
 Requires-Dist: curies>=0.10.17
-Requires-Dist: curies-processing>=0.1.0
+Requires-Dist: curies-processing>=0.1.2
 Requires-Dist: python-dateutil
 Requires-Dist: networkx>=3.4
 Requires-Dist: drugbank-downloader

pyobo 0.12.4__py3-none-any.whl → 0.12.5__py3-none-any.whl

pyobo 0.12.4py3-none-any.whl → 0.12.5py3-none-any.whl