PyPI - pyobo - Versions diffs - 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

pyobo 0.11.1py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (242) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -113
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +108 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +183 -161
pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +196 -118
pyobo/gilda_utils.py +79 -200
pyobo/identifier_utils/__init__.py +41 -0
pyobo/identifier_utils/api.py +296 -0
pyobo/identifier_utils/model.py +130 -0
pyobo/identifier_utils/preprocessing.json +812 -0
pyobo/identifier_utils/preprocessing.py +61 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +43 -39
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1358 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +0 -5
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +3 -8
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +10 -3
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +270 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1413 -643
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +13 -11
pyobo/utils/io.py +17 -31
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +76 -70
pyobo/version.py +3 -3
{pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
pyobo-0.12.0.dist-info/RECORD +202 -0
pyobo-0.12.0.dist-info/WHEEL +4 -0
{pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
{pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
pyobo/apps/__init__.py +0 -3
pyobo/apps/cli.py +0 -24
pyobo/apps/gilda/__init__.py +0 -3
pyobo/apps/gilda/__main__.py +0 -8
pyobo/apps/gilda/app.py +0 -48
pyobo/apps/gilda/cli.py +0 -36
pyobo/apps/gilda/templates/base.html +0 -33
pyobo/apps/gilda/templates/home.html +0 -11
pyobo/apps/gilda/templates/matches.html +0 -32
pyobo/apps/mapper/__init__.py +0 -3
pyobo/apps/mapper/__main__.py +0 -11
pyobo/apps/mapper/cli.py +0 -37
pyobo/apps/mapper/mapper.py +0 -187
pyobo/apps/mapper/templates/base.html +0 -35
pyobo/apps/mapper/templates/mapper_home.html +0 -64
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo-0.11.1.dist-info/RECORD +0 -173
pyobo-0.11.1.dist-info/WHEEL +0 -5
pyobo-0.11.1.dist-info/top_level.txt +0 -1

pyobo/utils/cache.py CHANGED Viewed

@@ -3,10 +3,9 @@
 import gzip
 import json
 import logging
-import os
 from collections.abc import Iterable, Mapping
 from pathlib import Path
-from typing import Generic, TypeVar, Union
+from typing import Generic, TypeVar
 import networkx as nx
 from pystow.cache import Cached
@@ -18,15 +17,15 @@ from pystow.cache import CachedPickle as cached_pickle  # noqa:N813
 from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
 __all__ = [
-    # from pystow
-    "cached_json",
     "cached_collection",
     "cached_df",
-    "cached_pickle",
     # implemented here
     "cached_graph",
+    # from pystow
+    "cached_json",
     "cached_mapping",
     "cached_multidict",
+    "cached_pickle",
 ]
 logger = logging.getLogger(__name__)
@@ -39,14 +38,15 @@ class _CachedMapping(Cached[X], Generic[X]):
     def __init__(
         self,
-        path: Union[str, Path, os.PathLike],
+        path: str | Path,
         header: Iterable[str],
         *,
         use_tqdm: bool = False,
         force: bool = False,
+        cache: bool = True,
     ):
         """Initialize the mapping cache."""
-        super().__init__(path=path, force=force)
+        super().__init__(path=path, cache=cache, force=force)
         self.header = header
         self.use_tqdm = use_tqdm
@@ -65,17 +65,19 @@ class CachedMapping(_CachedMapping[Mapping[str, str]]):
 cached_mapping = CachedMapping
+NODE_LINK_STYLE = "links"  # TODO update to "edges"
-def get_gzipped_graph(path: Union[str, Path]) -> nx.MultiDiGraph:
+def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
     """Read a graph that's gzipped nodelink."""
     with gzip.open(path, "rt") as file:
-        return nx.node_link_graph(json.load(file))
+        return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
-def write_gzipped_graph(graph: nx.MultiDiGraph, path: Union[str, Path]) -> None:
+def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
     """Write a graph as gzipped nodelink."""
     with gzip.open(path, "wt") as file:
-        json.dump(nx.node_link_data(graph), file)
+        json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)
 class CachedGraph(Cached[nx.MultiDiGraph]):

pyobo/utils/io.py CHANGED Viewed

@@ -4,30 +4,26 @@ import collections.abc
 import csv
 import gzip
 import logging
-import time
 from collections import defaultdict
 from collections.abc import Iterable, Mapping
 from contextlib import contextmanager
 from pathlib import Path
-from typing import Optional, TypeVar, Union
-from xml.etree.ElementTree import Element
+from typing import TypeVar
 import pandas as pd
-from lxml import etree
 from tqdm.auto import tqdm
 __all__ = [
-    "open_map_tsv",
-    "open_multimap_tsv",
+    "get_reader",
+    "get_writer",
     "multidict",
     "multisetdict",
+    "open_map_tsv",
+    "open_multimap_tsv",
+    "open_reader",
+    "write_iterable_tsv",
     "write_map_tsv",
     "write_multimap_tsv",
-    "write_iterable_tsv",
-    "parse_xml_gz",
-    "get_writer",
-    "open_reader",
-    "get_reader",
 ]
 logger = logging.getLogger(__name__)
@@ -37,7 +33,7 @@ Y = TypeVar("Y")
 @contextmanager
-def open_reader(path: Union[str, Path], sep: str = "\t"):
+def open_reader(path: str | Path, sep: str = "\t"):
     """Open a file and get a reader for it."""
     path = Path(path)
     with gzip.open(path, "rt") if path.suffix == ".gz" else open(path) as file:
@@ -55,7 +51,7 @@ def get_writer(x, sep: str = "\t"):
 def open_map_tsv(
-    path: Union[str, Path], *, use_tqdm: bool = False, has_header: bool = True
+    path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
 ) -> Mapping[str, str]:
     """Load a mapping TSV file into a dictionary."""
     with open(path) as file:
@@ -73,7 +69,7 @@ def open_map_tsv(
 def open_multimap_tsv(
-    path: Union[str, Path],
+    path: str | Path,
     *,
     use_tqdm: bool = False,
     has_header: bool = True,
@@ -83,7 +79,7 @@ def open_multimap_tsv(
 def _help_multimap_tsv(
-    path: Union[str, Path],
+    path: str | Path,
     *,
     use_tqdm: bool = False,
     has_header: bool = True,
@@ -115,9 +111,9 @@ def multisetdict(pairs: Iterable[tuple[X, Y]]) -> dict[X, set[Y]]:
 def write_map_tsv(
     *,
-    path: Union[str, Path],
-    header: Optional[Iterable[str]] = None,
-    rv: Union[Iterable[tuple[str, str]], Mapping[str, str]],
+    path: str | Path,
+    header: Iterable[str] | None = None,
+    rv: Iterable[tuple[str, str]] | Mapping[str, str],
     sep: str = "\t",
 ) -> None:
     """Write a mapping dictionary to a TSV file."""
@@ -129,7 +125,7 @@ def write_map_tsv(
 def write_multimap_tsv(
     *,
-    path: Union[str, Path],
+    path: str | Path,
     header: Iterable[str],
     rv: Mapping[str, list[str]],
     sep: str = "\t",
@@ -141,8 +137,8 @@ def write_multimap_tsv(
 def write_iterable_tsv(
     *,
-    path: Union[str, Path],
-    header: Optional[Iterable[str]] = None,
+    path: str | Path,
+    header: Iterable[str] | None = None,
     it: Iterable[tuple[str, ...]],
     sep: str = "\t",
 ) -> None:
@@ -154,13 +150,3 @@ def write_iterable_tsv(
         if header is not None:
             writer.writerow(header)
         writer.writerows(it)
-def parse_xml_gz(path: Union[str, Path]) -> Element:
-    """Parse an XML file from a path to a GZIP file."""
-    path = Path(path).resolve()
-    t = time.time()
-    logger.info("parsing xml from %s", path)
-    tree = etree.parse(path.as_posix())  # type:ignore
-    logger.info("parsed xml in %.2f seconds", time.time() - t)
-    return tree.getroot()

pyobo/utils/iter.py CHANGED Viewed

@@ -8,8 +8,8 @@ from typing import TypeVar
 from more_itertools import peekable
 __all__ = [
-    "iterate_together",
     "iterate_gzips_together",
+    "iterate_together",
 ]
 X = TypeVar("X")
@@ -20,9 +20,9 @@ Y = TypeVar("Y")
 def iterate_gzips_together(a_path, b_path) -> Iterable[tuple[str, str, list[str]]]:
     """Iterate over two gzipped files together."""
     with gzip.open(a_path, mode="rt", errors="ignore") as a, gzip.open(b_path, mode="rt") as b:
-        a = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
-        b = csv.reader(b, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
-        yield from iterate_together(a, b)
+        a_reader = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
+        b_reader = csv.reader(b, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
+        yield from iterate_together(a_reader, b_reader)  # type:ignore
 def iterate_together(
@@ -38,7 +38,7 @@ def iterate_together(
     - Each key in the index is present within both files
     """
     b_peekable = peekable(b)
-    b_index, _ = b_peekable.peek()
+    b_index: X | type[_Done] = b_peekable.peek()[0]
     for a_index, a_value in a:
         zs = []

pyobo/utils/misc.py CHANGED Viewed

@@ -1,79 +1,67 @@
 """Miscellaneous utilities."""
-import gzip
 import logging
-import os
 from datetime import datetime
-from subprocess import check_output
-from typing import Optional
 __all__ = [
-    "obo_to_obograph",
-    "obo_to_owl",
     "cleanup_version",
 ]
 logger = logging.getLogger(__name__)
-def obo_to_obograph(obo_path, obograph_path) -> None:
-    """Convert an OBO file to OBO Graph file with pronto."""
-    import pronto
-    ontology = pronto.Ontology(obo_path)
-    with gzip.open(obograph_path, "wb") as file:
-        ontology.dump(file, format="json")
-def obo_to_owl(obo_path, owl_path, owl_format: str = "ofn"):
-    """Convert an OBO file to an OWL file with ROBOT."""
-    args = ["robot", "convert", "-i", obo_path, "-o", owl_path, "--format", owl_format]
-    ret = check_output(  # noqa:S603
-        args,
-        cwd=os.path.dirname(__file__),
-    )
-    return ret.decode()
 BIZARRE_LOGGED = set()
+#: Rewrites for mostly static resources that have weird quirks
+VERSION_REWRITES = {
+    "$Date: 2009/11/15 10:54:12 $": "2009-11-15",  # for owl
+    "http://www.w3.org/2006/time#2016": "2016",  # for time
+}
+STATIC_VERSION_REWRITES = {"orth": "2"}
+VERSION_PREFIXES = [
+    "http://www.orpha.net/version",
+    "https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
+    "http://humanbehaviourchange.org/ontology/bcio.owl/",
+    "http://purl.org/pav/",
+    "http://identifiers.org/combine.specifications/teddy.rel-",
+    "https://purl.dataone.org/odo/MOSAIC/",
+    "http://purl.dataone.org/odo/SASAP/",  # like in http://purl.dataone.org/odo/SASAP/0.3.1
+    "http://purl.dataone.org/odo/SENSO/",  # like in http://purl.dataone.org/odo/SENSO/0.1.0
+    "https://purl.dataone.org/odo/ADCAD/",
+]
+VERSION_PREFIX_SPLITS = [
+    "http://www.ebi.ac.uk/efo/releases/v",
+    "http://www.ebi.ac.uk/swo/swo.owl/",
+    "http://semanticscience.org/ontology/sio/v",
+    "http://ontology.neuinfo.org/NIF/ttl/nif/version/",
+]
-def cleanup_version(data_version: str, prefix: str) -> Optional[str]:
+def cleanup_version(data_version: str, prefix: str) -> str:
     """Clean the version information."""
-    if data_version.endswith(".owl"):
-        data_version = data_version[: -len(".owl")]
+    if data_version in VERSION_REWRITES:
+        return VERSION_REWRITES[data_version]
+    data_version = data_version.removesuffix(".owl")
     if data_version.endswith(prefix):
         data_version = data_version[: -len(prefix)]
-    if data_version.startswith("releases/"):
-        data_version = data_version[len("releases/") :]
-    if prefix == "orth":
-        # TODO add bioversions for this
-        return "2"
+    data_version = data_version.removesuffix("/")
+    data_version = data_version.removeprefix("releases/")
+    data_version = data_version.removeprefix("release/")
-    version_prefixes = [
-        "http://www.orpha.net/version",
-        "https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
-        "http://humanbehaviourchange.org/ontology/bcio.owl/",
-        "http://purl.org/pav/",
-        "http://identifiers.org/combine.specifications/teddy.rel-",
-    ]
-    for version_prefix in version_prefixes:
+    for version_prefix in VERSION_PREFIXES:
         if data_version.startswith(version_prefix):
-            return data_version[len(version_prefix) :]
+            return data_version.removeprefix(version_prefix)
-    version_prefixes_split = [
-        "http://www.ebi.ac.uk/efo/releases/v",
-        "http://www.ebi.ac.uk/swo/swo.owl/",
-        "http://semanticscience.org/ontology/sio/v",
-        "http://ontology.neuinfo.org/NIF/ttl/nif/version/",
-    ]
-    for version_prefix_split in version_prefixes_split:
+    for version_prefix_split in VERSION_PREFIX_SPLITS:
         if data_version.startswith(version_prefix_split):
-            return data_version[len(version_prefix_split) :].split("/")[0]
+            return data_version.removeprefix(version_prefix_split).split("/")[0]
+    # use a heuristic to determine if the version is one of
+    # consecutive, major.minor, or semantic versioning (i.e., major.minor.patch)
     if data_version.replace(".", "").isnumeric():
-        return data_version  # consecutive, major.minor, or semantic versioning
+        return data_version
     for v in reversed(data_version.split("/")):
         v = v.strip()
         try:

pyobo/utils/ndex_utils.py CHANGED Viewed

File without changes

pyobo/utils/path.py CHANGED Viewed

@@ -1,60 +1,42 @@
 """Utilities for building paths."""
+import enum
 import logging
 from pathlib import Path
-from typing import Any, Callable, Literal, Optional, Union
+from typing import Any, Literal
 import pandas as pd
-import requests_ftp
-from pystow.utils import download, name_from_url, read_tarfile_csv
+from curies import Reference
+from pystow import VersionHint
-from .misc import cleanup_version
-from ..constants import RAW_MODULE
+from ..constants import CACHE_SUBDIRECTORY_NAME, RAW_MODULE, RELATION_SUBDIRECTORY_NAME
 __all__ = [
-    "prefix_directory_join",
-    "prefix_directory_join",
-    "prefix_cache_join",
-    "get_prefix_obo_path",
-    "ensure_path",
+    "CacheArtifact",
     "ensure_df",
-    "ensure_tar_df",
+    "ensure_path",
+    "get_cache_path",
+    "get_relation_cache_path",
+    "prefix_directory_join",
 ]
 logger = logging.getLogger(__name__)
-VersionHint = Union[None, str, Callable[[], Optional[str]]]
-requests_ftp.monkeypatch_session()
 def prefix_directory_join(
     prefix: str,
     *parts: str,
-    name: Optional[str] = None,
+    name: str | None = None,
     version: VersionHint = None,
     ensure_exists: bool = True,
 ) -> Path:
     """Join in the prefix directory."""
-    if version is None:
-        return RAW_MODULE.join(prefix, *parts, name=name, ensure_exists=ensure_exists)
-    if callable(version):
-        logger.info("[%s] looking up version", prefix)
-        version = version()
-        logger.info("[%s] got version %s", prefix, version)
-    elif not isinstance(version, str):
-        raise TypeError(f"Invalid type: {version} ({type(version)})")
-    if version is None:
-        raise AssertionError
-    version = cleanup_version(version, prefix=prefix)
-    if version is not None and "/" in version:
-        raise ValueError(f"[{prefix}] Can not have slash in version: {version}")
-    return RAW_MODULE.join(prefix, version, *parts, name=name, ensure_exists=ensure_exists)
-def get_prefix_obo_path(prefix: str, version: VersionHint = None, ext: str = "obo") -> Path:
-    """Get the canonical path to the OBO file."""
-    return prefix_directory_join(prefix, name=f"{prefix}.{ext}", version=version)
+    return RAW_MODULE.module(prefix).join(
+        *parts,
+        name=name,
+        ensure_exists=ensure_exists,
+        version=version,
+    )
 def ensure_path(
@@ -62,36 +44,29 @@ def ensure_path(
     *parts: str,
     url: str,
     version: VersionHint = None,
-    name: Optional[str] = None,
+    name: str | None = None,
     force: bool = False,
-    error_on_missing: bool = False,
     backend: Literal["requests", "urllib"] = "urllib",
     verify: bool = True,
-) -> str:
+    **download_kwargs: Any,
+) -> Path:
     """Download a file if it doesn't exist."""
-    if name is None:
-        name = name_from_url(url)
-    path = prefix_directory_join(prefix, *parts, name=name, version=version)
-    if not path.exists() and error_on_missing:
-        raise FileNotFoundError
-    kwargs: dict[str, Any]
     if verify:
-        kwargs = {"backend": backend}
+        download_kwargs = {"backend": backend}
     else:
         if backend != "requests":
             logger.warning("using requests since verify=False")
-        kwargs = {"backend": "requests", "verify": False}
+        download_kwargs = {"backend": "requests", "verify": False}
-    download(
+    path = RAW_MODULE.module(prefix).ensure(
+        *parts,
         url=url,
-        path=path,
+        name=name,
         force=force,
-        **kwargs,
+        version=version,
+        download_kwargs=download_kwargs,
     )
-    return path.as_posix()
+    return path
 def ensure_df(
@@ -99,7 +74,7 @@ def ensure_df(
     *parts: str,
     url: str,
     version: VersionHint = None,
-    name: Optional[str] = None,
+    name: str | None = None,
     force: bool = False,
     sep: str = "\t",
     dtype=str,
@@ -121,21 +96,52 @@ def ensure_df(
     return pd.read_csv(_path, sep=sep, dtype=dtype, **kwargs)
-def ensure_tar_df(
-    prefix: str,
-    *parts: str,
-    url: str,
-    inner_path: str,
-    version: VersionHint = None,
-    path: Optional[str] = None,
-    force: bool = False,
-    **kwargs,
-) -> pd.DataFrame:
-    """Download a tar file and open as a dataframe."""
-    path = ensure_path(prefix, *parts, url=url, version=version, name=path, force=force)
-    return read_tarfile_csv(path, inner_path=inner_path, **kwargs)
+class CacheArtifact(enum.Enum):
+    """An enumeration for."""
+    names = "names.tsv"
+    definitions = "definitions.tsv"
+    species = "species.tsv"
+    synonyms = "synonyms.tsv"  # deprecated
+    xrefs = "xrefs.tsv"  # deprecated
+    mappings = "mappings.tsv"
+    relations = "relations.tsv"
+    alts = "alt_ids.tsv"
+    typedefs = "typedefs.tsv"
+    literal_mappings = "literal_mappings.tsv"
+    references = "references.tsv"
+    obsoletes = "obsolete.tsv"
+    properties = "properties.tsv"  # deprecated
+    literal_properties = "literal_properties.tsv"
+    object_properties = "object_properties.tsv"
+    nodes = "nodes.tsv"
+    edges = "edges.tsv"
-def prefix_cache_join(prefix: str, *parts, name: Optional[str], version: VersionHint) -> Path:
-    """Ensure the prefix cache is available."""
-    return prefix_directory_join(prefix, "cache", *parts, name=name, version=version)
+    prefixes = "prefixes.json"
+    metadata = "metadata.json"
+def get_cache_path(
+    ontology: str,
+    name: CacheArtifact,
+    *,
+    version: str | None = None,
+) -> Path:
+    """Get a cache path."""
+    return prefix_directory_join(
+        ontology, CACHE_SUBDIRECTORY_NAME, name=name.value, version=version
+    )
+def get_relation_cache_path(
+    ontology: str,
+    reference: Reference,
+    *,
+    version: str | None = None,
+) -> Path:
+    """Get a relation cache path."""
+    return prefix_directory_join(
+        ontology, RELATION_SUBDIRECTORY_NAME, name=f"{reference.curie}.tsv", version=version
+    )

pyobo/version.py CHANGED Viewed

@@ -8,11 +8,11 @@ from subprocess import CalledProcessError, check_output
 __all__ = [
     "VERSION",
-    "get_version",
     "get_git_hash",
+    "get_version",
 ]
-VERSION = "0.11.1"
+VERSION = "0.12.0"
 def get_git_hash() -> str:
@@ -30,7 +30,7 @@ def get_git_hash() -> str:
             return ret.strip().decode("utf-8")[:8]
-def get_version(with_git_hash: bool = False):
+def get_version(with_git_hash: bool = False) -> str:
     """Get the PyOBO version string, including a git hash."""
     return f"{VERSION}-{get_git_hash()}" if with_git_hash else VERSION

pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

pyobo 0.11.1py3-none-any.whl → 0.12.0py3-none-any.whl