PyPI - pyobo - Versions diffs - 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl - Mend

pyobo 0.11.2py3-none-any.whl → 0.12.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -117
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +107 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +210 -160
pyobo/cli/database_utils.py +155 -0
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +209 -191
pyobo/gilda_utils.py +52 -250
pyobo/identifier_utils/__init__.py +33 -0
pyobo/identifier_utils/api.py +305 -0
pyobo/identifier_utils/preprocessing.json +873 -0
pyobo/identifier_utils/preprocessing.py +27 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +48 -40
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1354 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +9 -6
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +8 -13
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +11 -4
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +272 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1484 -657
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +16 -15
pyobo/utils/io.py +51 -41
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +73 -70
pyobo/version.py +3 -3
pyobo-0.12.1.dist-info/METADATA +671 -0
pyobo-0.12.1.dist-info/RECORD +201 -0
pyobo-0.12.1.dist-info/WHEEL +4 -0
{pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo/xrefdb/xrefs_pipeline.py +0 -180
pyobo-0.11.2.dist-info/METADATA +0 -711
pyobo-0.11.2.dist-info/RECORD +0 -157
pyobo-0.11.2.dist-info/WHEEL +0 -5
pyobo-0.11.2.dist-info/top_level.txt +0 -1

pyobo/utils/cache.py CHANGED Viewed

@@ -1,12 +1,10 @@
 """Utilities for caching files."""
-import gzip
 import json
 import logging
-import os
 from collections.abc import Iterable, Mapping
 from pathlib import Path
-from typing import Generic, TypeVar, Union
+from typing import Generic, TypeVar
 import networkx as nx
 from pystow.cache import Cached
@@ -15,18 +13,18 @@ from pystow.cache import CachedDataFrame as cached_df  # noqa:N813
 from pystow.cache import CachedJSON as cached_json  # noqa:N813
 from pystow.cache import CachedPickle as cached_pickle  # noqa:N813
-from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
+from .io import open_map_tsv, open_multimap_tsv, safe_open, write_map_tsv, write_multimap_tsv
 __all__ = [
-    # from pystow
-    "cached_json",
     "cached_collection",
     "cached_df",
-    "cached_pickle",
     # implemented here
     "cached_graph",
+    # from pystow
+    "cached_json",
     "cached_mapping",
     "cached_multidict",
+    "cached_pickle",
 ]
 logger = logging.getLogger(__name__)
@@ -39,14 +37,15 @@ class _CachedMapping(Cached[X], Generic[X]):
     def __init__(
         self,
-        path: Union[str, Path, os.PathLike],
+        path: str | Path,
         header: Iterable[str],
         *,
         use_tqdm: bool = False,
         force: bool = False,
+        cache: bool = True,
     ):
         """Initialize the mapping cache."""
-        super().__init__(path=path, force=force)
+        super().__init__(path=path, cache=cache, force=force)
         self.header = header
         self.use_tqdm = use_tqdm
@@ -65,17 +64,19 @@ class CachedMapping(_CachedMapping[Mapping[str, str]]):
 cached_mapping = CachedMapping
+NODE_LINK_STYLE = "links"  # TODO update to "edges"
-def get_gzipped_graph(path: Union[str, Path]) -> nx.MultiDiGraph:
+def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
     """Read a graph that's gzipped nodelink."""
-    with gzip.open(path, "rt") as file:
-        return nx.node_link_graph(json.load(file))
+    with safe_open(path, read=True) as file:
+        return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
-def write_gzipped_graph(graph: nx.MultiDiGraph, path: Union[str, Path]) -> None:
+def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
     """Write a graph as gzipped nodelink."""
-    with gzip.open(path, "wt") as file:
-        json.dump(nx.node_link_data(graph), file)
+    with safe_open(path, read=False) as file:
+        json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)
 class CachedGraph(Cached[nx.MultiDiGraph]):

pyobo/utils/io.py CHANGED Viewed

@@ -1,33 +1,31 @@
 """I/O utilities."""
 import collections.abc
+import contextlib
 import csv
 import gzip
 import logging
-import time
 from collections import defaultdict
-from collections.abc import Iterable, Mapping
+from collections.abc import Generator, Iterable, Mapping
 from contextlib import contextmanager
 from pathlib import Path
-from typing import Optional, TypeVar, Union
-from xml.etree.ElementTree import Element
+from typing import Literal, TextIO, TypeVar
 import pandas as pd
-from lxml import etree
 from tqdm.auto import tqdm
 __all__ = [
-    "open_map_tsv",
-    "open_multimap_tsv",
+    "get_reader",
     "multidict",
     "multisetdict",
+    "open_map_tsv",
+    "open_multimap_tsv",
+    "open_reader",
+    "safe_open",
+    "safe_open_writer",
+    "write_iterable_tsv",
     "write_map_tsv",
     "write_multimap_tsv",
-    "write_iterable_tsv",
-    "parse_xml_gz",
-    "get_writer",
-    "open_reader",
-    "get_reader",
 ]
 logger = logging.getLogger(__name__)
@@ -37,10 +35,10 @@ Y = TypeVar("Y")
 @contextmanager
-def open_reader(path: Union[str, Path], sep: str = "\t"):
+def open_reader(path: str | Path, sep: str = "\t"):
     """Open a file and get a reader for it."""
     path = Path(path)
-    with gzip.open(path, "rt") if path.suffix == ".gz" else open(path) as file:
+    with safe_open(path, read=True) as file:
         yield get_reader(file, sep=sep)
@@ -49,16 +47,11 @@ def get_reader(x, sep: str = "\t"):
     return csv.reader(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
-def get_writer(x, sep: str = "\t"):
-    """Get a :func:`csv.writer` with PyOBO default settings."""
-    return csv.writer(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
 def open_map_tsv(
-    path: Union[str, Path], *, use_tqdm: bool = False, has_header: bool = True
+    path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
 ) -> Mapping[str, str]:
     """Load a mapping TSV file into a dictionary."""
-    with open(path) as file:
+    with safe_open(path, read=True) as file:
         if has_header:
             next(file)  # throw away header
         if use_tqdm:
@@ -73,7 +66,7 @@ def open_map_tsv(
 def open_multimap_tsv(
-    path: Union[str, Path],
+    path: str | Path,
     *,
     use_tqdm: bool = False,
     has_header: bool = True,
@@ -83,14 +76,17 @@ def open_multimap_tsv(
 def _help_multimap_tsv(
-    path: Union[str, Path],
+    path: str | Path,
     *,
     use_tqdm: bool = False,
     has_header: bool = True,
 ) -> Iterable[tuple[str, str]]:
-    with open(path) as file:
+    with safe_open(path, read=True) as file:
         if has_header:
-            next(file)  # throw away header
+            try:
+                next(file)  # throw away header
+            except gzip.BadGzipFile as e:
+                raise ValueError(f"could not open file {path}") from e
         if use_tqdm:
             file = tqdm(file, desc=f"loading TSV from {path}")
         yield from get_reader(file)
@@ -115,9 +111,9 @@ def multisetdict(pairs: Iterable[tuple[X, Y]]) -> dict[X, set[Y]]:
 def write_map_tsv(
     *,
-    path: Union[str, Path],
-    header: Optional[Iterable[str]] = None,
-    rv: Union[Iterable[tuple[str, str]], Mapping[str, str]],
+    path: str | Path,
+    header: Iterable[str] | None = None,
+    rv: Iterable[tuple[str, str]] | Mapping[str, str],
     sep: str = "\t",
 ) -> None:
     """Write a mapping dictionary to a TSV file."""
@@ -129,7 +125,7 @@ def write_map_tsv(
 def write_multimap_tsv(
     *,
-    path: Union[str, Path],
+    path: str | Path,
     header: Iterable[str],
     rv: Mapping[str, list[str]],
     sep: str = "\t",
@@ -141,26 +137,40 @@ def write_multimap_tsv(
 def write_iterable_tsv(
     *,
-    path: Union[str, Path],
-    header: Optional[Iterable[str]] = None,
+    path: str | Path,
+    header: Iterable[str] | None = None,
     it: Iterable[tuple[str, ...]],
     sep: str = "\t",
 ) -> None:
     """Write a mapping dictionary to a TSV file."""
     it = (row for row in it if all(cell is not None for cell in row))
     it = sorted(it)
-    with open(path, "w") as file:
-        writer = get_writer(file, sep=sep)
+    with safe_open_writer(path, delimiter=sep) as writer:
         if header is not None:
             writer.writerow(header)
         writer.writerows(it)
-def parse_xml_gz(path: Union[str, Path]) -> Element:
-    """Parse an XML file from a path to a GZIP file."""
-    path = Path(path).resolve()
-    t = time.time()
-    logger.info("parsing xml from %s", path)
-    tree = etree.parse(path.as_posix())  # type:ignore
-    logger.info("parsed xml in %.2f seconds", time.time() - t)
-    return tree.getroot()
+@contextlib.contextmanager
+def safe_open(
+    path: str | Path, read: bool, encoding: str | None = None
+) -> Generator[TextIO, None, None]:
+    """Safely open a file for reading or writing text."""
+    path = Path(path).expanduser().resolve()
+    mode: Literal["rt", "wt"] = "rt" if read else "wt"
+    if path.suffix.endswith(".gz"):
+        with gzip.open(path, mode=mode, encoding=encoding) as file:
+            yield file
+    else:
+        with open(path, mode=mode) as file:
+            yield file
+@contextlib.contextmanager
+def safe_open_writer(f: str | Path | TextIO, *, delimiter: str = "\t"):  # type:ignore
+    """Open a CSV writer, wrapping :func:`csv.writer`."""
+    if isinstance(f, str | Path):
+        with safe_open(f, read=False) as file:
+            yield csv.writer(file, delimiter=delimiter)
+    else:
+        yield csv.writer(f, delimiter=delimiter)

pyobo/utils/iter.py CHANGED Viewed

@@ -8,8 +8,8 @@ from typing import TypeVar
 from more_itertools import peekable
 __all__ = [
-    "iterate_together",
     "iterate_gzips_together",
+    "iterate_together",
 ]
 X = TypeVar("X")
@@ -20,9 +20,9 @@ Y = TypeVar("Y")
 def iterate_gzips_together(a_path, b_path) -> Iterable[tuple[str, str, list[str]]]:
     """Iterate over two gzipped files together."""
     with gzip.open(a_path, mode="rt", errors="ignore") as a, gzip.open(b_path, mode="rt") as b:
-        a = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
-        b = csv.reader(b, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
-        yield from iterate_together(a, b)
+        a_reader = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
+        b_reader = csv.reader(b, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
+        yield from iterate_together(a_reader, b_reader)  # type:ignore
 def iterate_together(
@@ -38,7 +38,7 @@ def iterate_together(
     - Each key in the index is present within both files
     """
     b_peekable = peekable(b)
-    b_index, _ = b_peekable.peek()
+    b_index: X | type[_Done] = b_peekable.peek()[0]
     for a_index, a_value in a:
         zs = []

pyobo/utils/misc.py CHANGED Viewed

@@ -1,79 +1,67 @@
 """Miscellaneous utilities."""
-import gzip
 import logging
-import os
 from datetime import datetime
-from subprocess import check_output
-from typing import Optional
 __all__ = [
-    "obo_to_obograph",
-    "obo_to_owl",
     "cleanup_version",
 ]
 logger = logging.getLogger(__name__)
-def obo_to_obograph(obo_path, obograph_path) -> None:
-    """Convert an OBO file to OBO Graph file with pronto."""
-    import pronto
-    ontology = pronto.Ontology(obo_path)
-    with gzip.open(obograph_path, "wb") as file:
-        ontology.dump(file, format="json")
-def obo_to_owl(obo_path, owl_path, owl_format: str = "ofn"):
-    """Convert an OBO file to an OWL file with ROBOT."""
-    args = ["robot", "convert", "-i", obo_path, "-o", owl_path, "--format", owl_format]
-    ret = check_output(  # noqa:S603
-        args,
-        cwd=os.path.dirname(__file__),
-    )
-    return ret.decode()
 BIZARRE_LOGGED = set()
+#: Rewrites for mostly static resources that have weird quirks
+VERSION_REWRITES = {
+    "$Date: 2009/11/15 10:54:12 $": "2009-11-15",  # for owl
+    "http://www.w3.org/2006/time#2016": "2016",  # for time
+}
+STATIC_VERSION_REWRITES = {"orth": "2"}
+VERSION_PREFIXES = [
+    "http://www.orpha.net/version",
+    "https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
+    "http://humanbehaviourchange.org/ontology/bcio.owl/",
+    "http://purl.org/pav/",
+    "http://identifiers.org/combine.specifications/teddy.rel-",
+    "https://purl.dataone.org/odo/MOSAIC/",
+    "http://purl.dataone.org/odo/SASAP/",  # like in http://purl.dataone.org/odo/SASAP/0.3.1
+    "http://purl.dataone.org/odo/SENSO/",  # like in http://purl.dataone.org/odo/SENSO/0.1.0
+    "https://purl.dataone.org/odo/ADCAD/",
+]
+VERSION_PREFIX_SPLITS = [
+    "http://www.ebi.ac.uk/efo/releases/v",
+    "http://www.ebi.ac.uk/swo/swo.owl/",
+    "http://semanticscience.org/ontology/sio/v",
+    "http://ontology.neuinfo.org/NIF/ttl/nif/version/",
+]
-def cleanup_version(data_version: str, prefix: str) -> Optional[str]:
+def cleanup_version(data_version: str, prefix: str) -> str:
     """Clean the version information."""
-    if data_version.endswith(".owl"):
-        data_version = data_version[: -len(".owl")]
+    if data_version in VERSION_REWRITES:
+        return VERSION_REWRITES[data_version]
+    data_version = data_version.removesuffix(".owl")
     if data_version.endswith(prefix):
         data_version = data_version[: -len(prefix)]
-    if data_version.startswith("releases/"):
-        data_version = data_version[len("releases/") :]
-    if prefix == "orth":
-        # TODO add bioversions for this
-        return "2"
+    data_version = data_version.removesuffix("/")
+    data_version = data_version.removeprefix("releases/")
+    data_version = data_version.removeprefix("release/")
-    version_prefixes = [
-        "http://www.orpha.net/version",
-        "https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
-        "http://humanbehaviourchange.org/ontology/bcio.owl/",
-        "http://purl.org/pav/",
-        "http://identifiers.org/combine.specifications/teddy.rel-",
-    ]
-    for version_prefix in version_prefixes:
+    for version_prefix in VERSION_PREFIXES:
         if data_version.startswith(version_prefix):
-            return data_version[len(version_prefix) :]
+            return data_version.removeprefix(version_prefix)
-    version_prefixes_split = [
-        "http://www.ebi.ac.uk/efo/releases/v",
-        "http://www.ebi.ac.uk/swo/swo.owl/",
-        "http://semanticscience.org/ontology/sio/v",
-        "http://ontology.neuinfo.org/NIF/ttl/nif/version/",
-    ]
-    for version_prefix_split in version_prefixes_split:
+    for version_prefix_split in VERSION_PREFIX_SPLITS:
         if data_version.startswith(version_prefix_split):
-            return data_version[len(version_prefix_split) :].split("/")[0]
+            return data_version.removeprefix(version_prefix_split).split("/")[0]
+    # use a heuristic to determine if the version is one of
+    # consecutive, major.minor, or semantic versioning (i.e., major.minor.patch)
     if data_version.replace(".", "").isnumeric():
-        return data_version  # consecutive, major.minor, or semantic versioning
+        return data_version
     for v in reversed(data_version.split("/")):
         v = v.strip()
         try:

pyobo/utils/ndex_utils.py CHANGED Viewed

File without changes

pyobo/utils/path.py CHANGED Viewed

@@ -1,60 +1,42 @@
 """Utilities for building paths."""
+import enum
 import logging
 from pathlib import Path
-from typing import Any, Callable, Literal, Optional, Union
+from typing import Any, Literal
 import pandas as pd
-import requests_ftp
-from pystow.utils import download, name_from_url, read_tarfile_csv
+from curies import Reference
+from pystow import VersionHint
-from .misc import cleanup_version
-from ..constants import RAW_MODULE
+from ..constants import CACHE_SUBDIRECTORY_NAME, RAW_MODULE, RELATION_SUBDIRECTORY_NAME
 __all__ = [
-    "prefix_directory_join",
-    "prefix_directory_join",
-    "prefix_cache_join",
-    "get_prefix_obo_path",
-    "ensure_path",
+    "CacheArtifact",
     "ensure_df",
-    "ensure_tar_df",
+    "ensure_path",
+    "get_cache_path",
+    "get_relation_cache_path",
+    "prefix_directory_join",
 ]
 logger = logging.getLogger(__name__)
-VersionHint = Union[None, str, Callable[[], Optional[str]]]
-requests_ftp.monkeypatch_session()
 def prefix_directory_join(
     prefix: str,
     *parts: str,
-    name: Optional[str] = None,
+    name: str | None = None,
     version: VersionHint = None,
     ensure_exists: bool = True,
 ) -> Path:
     """Join in the prefix directory."""
-    if version is None:
-        return RAW_MODULE.join(prefix, *parts, name=name, ensure_exists=ensure_exists)
-    if callable(version):
-        logger.info("[%s] looking up version", prefix)
-        version = version()
-        logger.info("[%s] got version %s", prefix, version)
-    elif not isinstance(version, str):
-        raise TypeError(f"Invalid type: {version} ({type(version)})")
-    if version is None:
-        raise AssertionError
-    version = cleanup_version(version, prefix=prefix)
-    if version is not None and "/" in version:
-        raise ValueError(f"[{prefix}] Can not have slash in version: {version}")
-    return RAW_MODULE.join(prefix, version, *parts, name=name, ensure_exists=ensure_exists)
-def get_prefix_obo_path(prefix: str, version: VersionHint = None, ext: str = "obo") -> Path:
-    """Get the canonical path to the OBO file."""
-    return prefix_directory_join(prefix, name=f"{prefix}.{ext}", version=version)
+    return RAW_MODULE.module(prefix).join(
+        *parts,
+        name=name,
+        ensure_exists=ensure_exists,
+        version=version,
+    )
 def ensure_path(
@@ -62,36 +44,29 @@ def ensure_path(
     *parts: str,
     url: str,
     version: VersionHint = None,
-    name: Optional[str] = None,
+    name: str | None = None,
     force: bool = False,
-    error_on_missing: bool = False,
     backend: Literal["requests", "urllib"] = "urllib",
     verify: bool = True,
-) -> str:
+    **download_kwargs: Any,
+) -> Path:
     """Download a file if it doesn't exist."""
-    if name is None:
-        name = name_from_url(url)
-    path = prefix_directory_join(prefix, *parts, name=name, version=version)
-    if not path.exists() and error_on_missing:
-        raise FileNotFoundError
-    kwargs: dict[str, Any]
     if verify:
-        kwargs = {"backend": backend}
+        download_kwargs = {"backend": backend}
     else:
         if backend != "requests":
             logger.warning("using requests since verify=False")
-        kwargs = {"backend": "requests", "verify": False}
+        download_kwargs = {"backend": "requests", "verify": False}
-    download(
+    path = RAW_MODULE.module(prefix).ensure(
+        *parts,
         url=url,
-        path=path,
+        name=name,
         force=force,
-        **kwargs,
+        version=version,
+        download_kwargs=download_kwargs,
     )
-    return path.as_posix()
+    return path
 def ensure_df(
@@ -99,7 +74,7 @@ def ensure_df(
     *parts: str,
     url: str,
     version: VersionHint = None,
-    name: Optional[str] = None,
+    name: str | None = None,
     force: bool = False,
     sep: str = "\t",
     dtype=str,
@@ -121,21 +96,49 @@ def ensure_df(
     return pd.read_csv(_path, sep=sep, dtype=dtype, **kwargs)
-def ensure_tar_df(
-    prefix: str,
-    *parts: str,
-    url: str,
-    inner_path: str,
-    version: VersionHint = None,
-    path: Optional[str] = None,
-    force: bool = False,
-    **kwargs,
-) -> pd.DataFrame:
-    """Download a tar file and open as a dataframe."""
-    path = ensure_path(prefix, *parts, url=url, version=version, name=path, force=force)
-    return read_tarfile_csv(path, inner_path=inner_path, **kwargs)
+class CacheArtifact(enum.Enum):
+    """An enumeration for."""
+    names = "names.tsv.gz"
+    definitions = "definitions.tsv.gz"
+    species = "species.tsv.gz"
+    mappings = "mappings.tsv.gz"
+    relations = "relations.tsv.gz"
+    alts = "alt_ids.tsv.gz"
+    typedefs = "typedefs.tsv.gz"
+    literal_mappings = "literal_mappings.tsv.gz"
+    references = "references.tsv.gz"
+    obsoletes = "obsolete.tsv.gz"
+    literal_properties = "literal_properties.tsv.gz"
+    object_properties = "object_properties.tsv.gz"
+    nodes = "nodes.tsv.gz"
+    edges = "edges.tsv.gz"
-def prefix_cache_join(prefix: str, *parts, name: Optional[str], version: VersionHint) -> Path:
-    """Ensure the prefix cache is available."""
-    return prefix_directory_join(prefix, "cache", *parts, name=name, version=version)
+    prefixes = "prefixes.json"
+    metadata = "metadata.json"
+def get_cache_path(
+    ontology: str,
+    name: CacheArtifact,
+    *,
+    version: str | None = None,
+) -> Path:
+    """Get a cache path."""
+    return prefix_directory_join(
+        ontology, CACHE_SUBDIRECTORY_NAME, name=name.value, version=version
+    )
+def get_relation_cache_path(
+    ontology: str,
+    reference: Reference,
+    *,
+    version: str | None = None,
+) -> Path:
+    """Get a relation cache path."""
+    return prefix_directory_join(
+        ontology, RELATION_SUBDIRECTORY_NAME, name=f"{reference.curie}.tsv", version=version
+    )

pyobo/version.py CHANGED Viewed

@@ -8,11 +8,11 @@ from subprocess import CalledProcessError, check_output
 __all__ = [
     "VERSION",
-    "get_version",
     "get_git_hash",
+    "get_version",
 ]
-VERSION = "0.11.2"
+VERSION = "0.12.1"
 def get_git_hash() -> str:
@@ -30,7 +30,7 @@ def get_git_hash() -> str:
             return ret.strip().decode("utf-8")[:8]
-def get_version(with_git_hash: bool = False):
+def get_version(with_git_hash: bool = False) -> str:
     """Get the PyOBO version string, including a git hash."""
     return f"{VERSION}-{get_git_hash()}" if with_git_hash else VERSION

pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

pyobo 0.11.2py3-none-any.whl → 0.12.1py3-none-any.whl