PyPI - pyobo - Versions diffs - 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl - Mend

pyobo 0.12.4py3-none-any.whl → 0.12.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +6 -0
pyobo/api/__init__.py +3 -0
pyobo/api/embedding.py +118 -0
pyobo/api/utils.py +0 -10
pyobo/cli/cli.py +1 -6
pyobo/cli/database.py +7 -1
pyobo/constants.py +23 -0
pyobo/getters.py +52 -35
pyobo/identifier_utils/api.py +3 -1
pyobo/sources/__init__.py +14 -1
pyobo/sources/chembl/__init__.py +6 -0
pyobo/sources/chembl/chembl_cell.py +94 -0
pyobo/sources/chembl/chembl_mechanism.py +81 -0
pyobo/sources/chembl/chembl_tissue.py +70 -0
pyobo/sources/clinicaltrials.py +32 -33
pyobo/sources/complexportal.py +5 -1
pyobo/sources/drugcentral.py +2 -1
pyobo/sources/hgnc/hgnc.py +13 -6
pyobo/sources/iana_media_type.py +100 -0
pyobo/sources/mesh.py +82 -29
pyobo/sources/reactome.py +10 -3
pyobo/sources/spdx.py +89 -0
pyobo/sources/uniprot/uniprot.py +2 -2
pyobo/sources/wikipathways.py +92 -7
pyobo/struct/__init__.py +2 -0
pyobo/struct/functional/dsl.py +10 -1
pyobo/struct/functional/ontology.py +3 -3
pyobo/struct/obo/reader.py +17 -53
pyobo/struct/obograph/export.py +2 -2
pyobo/struct/struct.py +125 -8
pyobo/struct/struct_utils.py +10 -0
pyobo/struct/typedef.py +15 -3
pyobo/struct/vocabulary.py +8 -0
pyobo/utils/cache.py +4 -3
pyobo/utils/io.py +18 -56
pyobo/utils/misc.py +142 -1
pyobo/utils/path.py +34 -2
pyobo/version.py +1 -1
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/METADATA +11 -7
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/RECORD +44 -38
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/WHEEL +0 -0
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/entry_points.txt +0 -0
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/licenses/LICENSE +0 -0

pyobo/struct/struct.py CHANGED Viewed

@@ -8,6 +8,7 @@ import json
 import logging
 import os
 import sys
+import tempfile
 import warnings
 from collections import ChainMap, defaultdict
 from collections.abc import Callable, Collection, Iterable, Iterator, Mapping, Sequence
@@ -25,6 +26,7 @@ import ssslm
 from curies import Converter, ReferenceTuple
 from curies import vocabulary as _cv
 from more_click import force_option, verbose_option
+from pystow.utils import safe_open
 from tqdm.auto import tqdm
 from typing_extensions import Self
@@ -70,7 +72,7 @@ from ..constants import (
     TARGET_PREFIX,
 )
 from ..utils.cache import write_gzipped_graph
-from ..utils.io import multidict, safe_open, write_iterable_tsv
+from ..utils.io import multidict, write_iterable_tsv
 from ..utils.path import (
     CacheArtifact,
     get_cache_path,
@@ -87,6 +89,7 @@ __all__ = [
     "TypeDef",
     "abbreviation",
     "acronym",
+    "build_ontology",
     "make_ad_hoc_ontology",
 ]
@@ -746,13 +749,23 @@ class Obo:
             help="Re-process the data, but don't download it again.",
         )
         @click.option("--owl", is_flag=True, help="Write OWL via ROBOT")
+        @click.option("--obo", is_flag=True, help="Write OBO")
         @click.option("--ofn", is_flag=True, help="Write Functional OWL (OFN)")
         @click.option("--ttl", is_flag=True, help="Write turtle RDF via OFN")
+        @click.option("--cache/--no-cache", is_flag=True, help="Write the cache", default=True)
         @click.option(
             "--version", help="Specify data version to get. Use this if bioversions is acting up."
         )
-        def _main(force: bool, owl: bool, ofn: bool, ttl: bool, version: str | None, rewrite: bool):
-            rewrite = True
+        def _main(
+            force: bool,
+            obo: bool,
+            owl: bool,
+            ofn: bool,
+            ttl: bool,
+            version: str | None,
+            rewrite: bool,
+            cache: bool,
+        ) -> None:
             try:
                 inst = cls(force=force, data_version=version)
             except Exception as e:
@@ -760,13 +773,14 @@ class Obo:
                 sys.exit(1)
             inst.write_default(
                 write_obograph=False,
-                write_obo=False,
+                write_obo=obo,
                 write_owl=owl,
                 write_ofn=ofn,
                 write_ttl=ttl,
                 write_nodes=True,
                 force=force or rewrite,
                 use_tqdm=True,
+                write_cache=cache,
             )
         return _main
@@ -909,6 +923,8 @@ class Obo:
                     end = f'"{obo_escape_slim(value.value)}" {reference_escape(value.datatype, ontology_prefix=self.ontology)}'
                 case Reference():
                     end = reference_escape(value, ontology_prefix=self.ontology)
+                case _:
+                    raise TypeError(f"Invalid property value: {value}")
             yield f"property_value: {reference_escape(predicate, ontology_prefix=self.ontology)} {end}"
     def _iterate_property_pairs(self) -> Iterable[Annotation]:
@@ -925,10 +941,21 @@ class Obo:
                 license_literal = OBOLiteral.string(license_spdx_id)
             yield Annotation(v.has_license, license_literal)
-        # Description
         if description := bioregistry.get_description(self.ontology):
-            description = obo_escape_slim(description.strip())
             yield Annotation(v.has_description, OBOLiteral.string(description.strip()))
+        if homepage := bioregistry.get_homepage(self.ontology):
+            yield Annotation(v.has_homepage, OBOLiteral.uri(homepage))
+        if repository := bioregistry.get_repository(self.ontology):
+            yield Annotation(v.has_repository, OBOLiteral.uri(repository))
+        if logo := bioregistry.get_logo(self.ontology):
+            yield Annotation(v.has_logo, OBOLiteral.uri(logo))
+        if mailing_list := bioregistry.get_mailing_list(self.ontology):
+            yield Annotation(v.has_mailing_list, OBOLiteral.string(mailing_list))
+        if (maintainer := bioregistry.get_contact(self.ontology)) and maintainer.orcid:
+            yield Annotation(
+                v.has_maintainer,
+                Reference(prefix="orcid", identifier=maintainer.orcid, name=maintainer.name),
+            )
         # Root terms
         for root_term in self.root_terms or []:
@@ -973,7 +1000,7 @@ class Obo:
                 unit="line",
             )
         if isinstance(file, str | Path | os.PathLike):
-            with safe_open(file, read=False) as fh:
+            with safe_open(file, operation="write") as fh:
                 self._write_lines(it, fh)
         else:
             self._write_lines(it, file)
@@ -995,6 +1022,15 @@ class Obo:
         ofn = get_ofn_from_obo(self)
         ofn.write_funowl(path)
+    def write_owl(self, path: str | Path) -> None:
+        """Write OWL, by first outputting OFN then converting with ROBOT."""
+        from bioontologies import robot
+        with tempfile.TemporaryDirectory() as directory:
+            ofn_path = Path(directory).joinpath("tmp.ofn")
+            self.write_ofn(ofn_path)
+            robot.convert(ofn_path, path)
     def write_rdf(self, path: str | Path) -> None:
         """Write as Turtle RDF."""
         from .functional.obo_to_functional import get_ofn_from_obo
@@ -1149,7 +1185,7 @@ class Obo:
         metadata = self.get_metadata()
         for path in (self._root_metadata_path, self._get_cache_path(CacheArtifact.metadata)):
             logger.debug("[%s] caching metadata to %s", self._prefix_version, path)
-            with safe_open(path, read=False) as file:
+            with safe_open(path, operation="write") as file:
                 json.dump(metadata, file, indent=2)
     def write_prefix_map(self) -> None:
@@ -2265,6 +2301,87 @@ class AdHocOntologyBase(Obo):
     """A base class for ad-hoc ontologies."""
+def build_ontology(
+    prefix: str,
+    *,
+    terms: list[Term] | None = None,
+    synonym_typedefs: list[SynonymTypeDef] | None = None,
+    typedefs: list[TypeDef] | None = None,
+    name: str | None = None,  # inferred
+    version: str | None = None,
+    idspaces: dict[str, str] | None = None,
+    root_terms: list[Reference] | None = None,
+    subsetdefs: list[tuple[Reference, str]] | None = None,
+    properties: list[Annotation] | None = None,
+    imports: list[str] | None = None,
+    description: str | None = None,
+    homepage: str | None = None,
+    mailing_list: str | None = None,
+    logo: str | None = None,
+    repository: str | None = None,
+) -> Obo:
+    """Build an ontology from parts."""
+    if name is None:
+        name = bioregistry.get_name(prefix)
+    # TODO auto-populate license and other properties
+    if properties is None:
+        properties = []
+    if typedefs is None:
+        typedefs = []
+    if description:
+        from .typedef import has_description
+        properties.append(Annotation.string(has_description.reference, description))
+        if has_description not in typedefs:
+            typedefs.append(has_description)  # TODO get proper typedef
+    if homepage:
+        from .typedef import has_homepage
+        properties.append(Annotation.uri(has_homepage.reference, homepage))
+        if has_homepage not in typedefs:
+            typedefs.append(has_homepage)
+    if logo:
+        from .typedef import has_depiction
+        properties.append(Annotation.uri(has_depiction.reference, logo))
+        if has_depiction not in typedefs:
+            typedefs.append(has_depiction)
+    if mailing_list:
+        from .typedef import has_mailing_list
+        properties.append(Annotation.string(has_mailing_list.reference, mailing_list))
+        if has_mailing_list not in typedefs:
+            typedefs.append(has_mailing_list)
+    if repository:
+        from .typedef import has_repository
+        properties.append(Annotation.uri(has_repository.reference, repository))
+        if has_repository not in typedefs:
+            typedefs.append(has_repository)
+    return make_ad_hoc_ontology(
+        _ontology=prefix,
+        _name=name,
+        # _auto_generated_by
+        _typedefs=typedefs,
+        _synonym_typedefs=synonym_typedefs,
+        # _date: datetime.datetime | None = None,
+        _data_version=version,
+        _idspaces=idspaces,
+        _root_terms=root_terms,
+        _subsetdefs=subsetdefs,
+        _property_values=properties,
+        _imports=imports,
+        terms=terms,
+    )
 def make_ad_hoc_ontology(
     _ontology: str,
     _name: str | None = None,

pyobo/struct/struct_utils.py CHANGED Viewed

@@ -63,6 +63,16 @@ class Annotation(NamedTuple):
         """Return a literal property for a float."""
         return cls(predicate, OBOLiteral.float(value))
+    @classmethod
+    def uri(cls, predicate: Reference, uri: str) -> Self:
+        """Return a literal property for a URI."""
+        return cls(predicate, OBOLiteral.uri(uri))
+    @classmethod
+    def string(cls, predicate: Reference, value: str, *, language: str | None = None) -> Self:
+        """Return a literal property for a float."""
+        return cls(predicate, OBOLiteral.string(value, language=language))
     @staticmethod
     def _sort_key(x: Annotation):
         return x.predicate, _reference_or_literal_key(x.value)

pyobo/struct/typedef.py CHANGED Viewed

@@ -15,7 +15,9 @@ __all__ = [
     "alternative_term",
     "broad_match",
     "close_match",
+    "contributes_to_condition",
     "default_typedefs",
+    "derives_from_organism",
     "editor_note",
     "enables",
     "exact_match",
@@ -24,10 +26,12 @@ __all__ = [
     "gene_product_member_of",
     "has_contributor",
     "has_dbxref",
+    "has_depiction",
     "has_end_date",
     "has_gene_product",
     "has_homepage",
     "has_inchi",
+    "has_mailbox",
     "has_mature",
     "has_member",
     "has_part",
@@ -103,12 +107,18 @@ has_component = TypeDef(
 derives_from = TypeDef(
     reference=Reference(prefix=RO_PREFIX, identifier="0001000", name="derives from"),
 )
+derives_from_organism = TypeDef(
+    reference=Reference(prefix="CLO", identifier="0037207", name="derives from organism")
+)
 molecularly_interacts_with = TypeDef(
     reference=Reference(prefix=RO_PREFIX, identifier="0002436", name="molecularly interacts with"),
 )
 located_in = TypeDef(
     reference=Reference(prefix=RO_PREFIX, identifier="0001025", name="located in"),
 )
+contributes_to_condition = TypeDef(
+    reference=Reference(prefix=RO_PREFIX, identifier="0003304", name="contributes to condition"),
+)
 exact_match = TypeDef(reference=v.exact_match, is_metadata_tag=True)
 narrow_match = TypeDef(reference=v.narrow_match, is_metadata_tag=True)
 broad_match = TypeDef(reference=v.broad_match, is_metadata_tag=True)
@@ -257,9 +267,11 @@ has_smiles = TypeDef(reference=v.has_smiles, is_metadata_tag=True).append_xref(v
 has_inchi = TypeDef(reference=v.has_inchi, is_metadata_tag=True).append_xref(v.debio_has_inchi)
-has_homepage = TypeDef(
-    reference=Reference(prefix="foaf", identifier="homepage", name="homepage"), is_metadata_tag=True
-)
+has_homepage = TypeDef(reference=v.has_homepage, is_metadata_tag=True)
+has_depiction = TypeDef(reference=v.has_depiction, is_metadata_tag=True)
+has_mailbox = TypeDef(reference=v.has_mailbox, is_metadata_tag=True)
+has_mailing_list = TypeDef(reference=v.has_mailing_list, is_metadata_tag=True)
+has_repository = TypeDef(reference=v.has_repository, is_metadata_tag=True)
 has_category = TypeDef(
     reference=Reference(prefix="biolink", identifier="category", name="has category"),

pyobo/struct/vocabulary.py CHANGED Viewed

@@ -90,6 +90,14 @@ has_description = _c(_v.has_description)
 has_license = _c(_v.has_license)
 has_title = _c(_v.has_title)
+has_homepage = Reference(prefix="foaf", identifier="homepage", name="has homepage")
+has_logo = Reference(prefix="foaf", identifier="logo", name="has logo")
+has_mailbox = Reference(prefix="foaf", identifier="mbox", name="has mailbox")
+has_depiction = Reference(prefix="foaf", identifier="depicted_by", name="depicted by")
+has_repository = Reference(prefix="doap", identifier="repository", name="has repository")
+has_mailing_list = Reference(prefix="doap", identifier="mailing-list", name="has mailing list")
+has_maintainer = Reference(prefix="doap", identifier="maintainer", name="has maintainer")
 has_part = Reference(prefix=BFO_PREFIX, identifier="0000051", name="has part")
 part_of = Reference(prefix=BFO_PREFIX, identifier="0000050", name="part of")
 orthologous = Reference(

pyobo/utils/cache.py CHANGED Viewed

@@ -12,8 +12,9 @@ from pystow.cache import CachedCollection as cached_collection  # noqa:N813
 from pystow.cache import CachedDataFrame as cached_df  # noqa:N813
 from pystow.cache import CachedJSON as cached_json  # noqa:N813
 from pystow.cache import CachedPickle as cached_pickle  # noqa:N813
+from pystow.utils import safe_open
-from .io import open_map_tsv, open_multimap_tsv, safe_open, write_map_tsv, write_multimap_tsv
+from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
 __all__ = [
     "cached_collection",
@@ -69,13 +70,13 @@ NODE_LINK_STYLE = "links"  # TODO update to "edges"
 def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
     """Read a graph that's gzipped nodelink."""
-    with safe_open(path, read=True) as file:
+    with safe_open(path, operation="read") as file:
         return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
 def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
     """Write a graph as gzipped nodelink."""
-    with safe_open(path, read=False) as file:
+    with safe_open(path, operation="write") as file:
         json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)

pyobo/utils/io.py CHANGED Viewed

@@ -1,27 +1,24 @@
 """I/O utilities."""
 import collections.abc
-import contextlib
-import csv
 import gzip
 import logging
 from collections import defaultdict
 from collections.abc import Generator, Iterable, Mapping
 from contextlib import contextmanager
 from pathlib import Path
-from typing import Literal, TextIO, TypeVar
+from typing import TypeVar, cast
 import pandas as pd
+import pystow.utils
+from pystow.utils import safe_open_reader, safe_open_writer
 from tqdm.auto import tqdm
 __all__ = [
-    "get_reader",
     "multidict",
     "multisetdict",
     "open_map_tsv",
     "open_multimap_tsv",
-    "open_reader",
-    "safe_open",
     "safe_open_writer",
     "write_iterable_tsv",
     "write_map_tsv",
@@ -34,35 +31,22 @@ X = TypeVar("X")
 Y = TypeVar("Y")
-@contextmanager
-def open_reader(path: str | Path, sep: str = "\t"):
-    """Open a file and get a reader for it."""
-    path = Path(path)
-    with safe_open(path, read=True) as file:
-        yield get_reader(file, sep=sep)
-def get_reader(x, sep: str = "\t"):
-    """Get a :func:`csv.reader` with PyOBO default settings."""
-    return csv.reader(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
 def open_map_tsv(
     path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
 ) -> Mapping[str, str]:
     """Load a mapping TSV file into a dictionary."""
-    with safe_open(path, read=True) as file:
+    rv = {}
+    with pystow.utils.safe_open_reader(path) as reader:
         if has_header:
-            next(file)  # throw away header
+            next(reader)  # throw away header
         if use_tqdm:
-            file = tqdm(file, desc=f"loading TSV from {path}")
-        rv = {}
-        for row in get_reader(file):
+            reader = tqdm(reader, desc=f"loading TSV from {path}")
+        for row in reader:
             if len(row) != 2:
                 logger.warning("[%s] malformed row can not be put in dict: %s", path, row)
                 continue
             rv[row[0]] = row[1]
-        return rv
+    return rv
 def open_multimap_tsv(
@@ -72,24 +56,27 @@ def open_multimap_tsv(
     has_header: bool = True,
 ) -> Mapping[str, list[str]]:
     """Load a mapping TSV file that has multiple mappings for each."""
-    return multidict(_help_multimap_tsv(path=path, use_tqdm=use_tqdm, has_header=has_header))
+    with _help_multimap_tsv(path=path, use_tqdm=use_tqdm, has_header=has_header) as file:
+        return multidict(file)
+@contextmanager
 def _help_multimap_tsv(
     path: str | Path,
     *,
     use_tqdm: bool = False,
     has_header: bool = True,
-) -> Iterable[tuple[str, str]]:
-    with safe_open(path, read=True) as file:
+) -> Generator[Iterable[tuple[str, str]], None, None]:
+    with safe_open_reader(path) as reader:
         if has_header:
             try:
-                next(file)  # throw away header
+                next(reader)  # throw away header
             except gzip.BadGzipFile as e:
                 raise ValueError(f"could not open file {path}") from e
         if use_tqdm:
-            file = tqdm(file, desc=f"loading TSV from {path}")
-        yield from get_reader(file)
+            yield tqdm(reader, desc=f"loading TSV from {path}")
+        else:
+            yield cast(Iterable[tuple[str, str]], reader)
 def multidict(pairs: Iterable[tuple[X, Y]]) -> Mapping[X, list[Y]]:
@@ -149,28 +136,3 @@ def write_iterable_tsv(
         if header is not None:
             writer.writerow(header)
         writer.writerows(it)
-@contextlib.contextmanager
-def safe_open(
-    path: str | Path, read: bool, encoding: str | None = None
-) -> Generator[TextIO, None, None]:
-    """Safely open a file for reading or writing text."""
-    path = Path(path).expanduser().resolve()
-    mode: Literal["rt", "wt"] = "rt" if read else "wt"
-    if path.suffix.endswith(".gz"):
-        with gzip.open(path, mode=mode, encoding=encoding) as file:
-            yield file
-    else:
-        with open(path, mode=mode) as file:
-            yield file
-@contextlib.contextmanager
-def safe_open_writer(f: str | Path | TextIO, *, delimiter: str = "\t"):  # type:ignore
-    """Open a CSV writer, wrapping :func:`csv.writer`."""
-    if isinstance(f, str | Path):
-        with safe_open(f, read=False) as file:
-            yield csv.writer(file, delimiter=delimiter)
-    else:
-        yield csv.writer(f, delimiter=delimiter)

pyobo/utils/misc.py CHANGED Viewed

@@ -1,9 +1,17 @@
 """Miscellaneous utilities."""
+from __future__ import annotations
 import logging
+from collections.abc import Callable, Iterable
 from datetime import datetime
+import bioversions.utils
+from pyobo.constants import ONTOLOGY_GETTERS, OntologyFormat
 __all__ = [
+    "VERSION_GETTERS",
     "cleanup_version",
 ]
@@ -15,8 +23,11 @@ BIZARRE_LOGGED = set()
 VERSION_REWRITES = {
     "$Date: 2009/11/15 10:54:12 $": "2009-11-15",  # for owl
     "http://www.w3.org/2006/time#2016": "2016",  # for time
+    "https://purl.org/ontology/modalia#1.0.0": "1.0.0",  # for dalia
+}
+STATIC_VERSION_REWRITES = {
+    "orth": "2",
 }
-STATIC_VERSION_REWRITES = {"orth": "2"}
 VERSION_PREFIXES = [
     "http://www.orpha.net/version",
     "https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
@@ -27,17 +38,36 @@ VERSION_PREFIXES = [
     "http://purl.dataone.org/odo/SASAP/",  # like in http://purl.dataone.org/odo/SASAP/0.3.1
     "http://purl.dataone.org/odo/SENSO/",  # like in http://purl.dataone.org/odo/SENSO/0.1.0
     "https://purl.dataone.org/odo/ADCAD/",
+    "http://identifiers.org/combine.specifications/teddy.rel-",
+    "https://nfdi.fiz-karlsruhe.de/ontology/",
+    "http://www.w3.org/ns/prov-",
+    "https://raw.githubusercontent.com/enpadasi/Ontology-for-Nutritional-Studies/releases/download/v",
+    "http://purl.jp/bio/4/ontology/iobc/",  # like http://purl.jp/bio/4/ontology/iobc/1.6.0
+    "http://w3id.org/nfdi4ing/metadata4ing/",  # like http://w3id.org/nfdi4ing/metadata4ing/1.3.1
+    "http://www.semanticweb.com/OntoRxn/",  # like http://www.semanticweb.com/OntoRxn/0.2.5
+    "https://w3id.org/lehrplan/ontology/",  # like in https://w3id.org/lehrplan/ontology/1.0.0-4
+    "http://www.ebi.ac.uk/swo/version/",  # http://www.ebi.ac.uk/swo/version/6.0
+    "https://w3id.org/emi/version/",
 ]
 VERSION_PREFIX_SPLITS = [
     "http://www.ebi.ac.uk/efo/releases/v",
     "http://www.ebi.ac.uk/swo/swo.owl/",
     "http://semanticscience.org/ontology/sio/v",
     "http://ontology.neuinfo.org/NIF/ttl/nif/version/",
+    "http://nmrml.org/cv/v",  # as in http://nmrml.org/cv/v1.1.0/nmrCV
+    "http://enanomapper.github.io/ontologies/releases/",  # as in http://enanomapper.github.io/ontologies/releases/10.0/enanomapper
 ]
+BAD = {
+    "http://purl.obolibrary.org/obo",
+    "http://www.bioassayontology.org/bao/bao_complete",
+}
 def cleanup_version(data_version: str, prefix: str) -> str:
     """Clean the version information."""
+    # in case a literal string that wasn't parsed properly gets put in
+    data_version = data_version.strip('"')
     if data_version in VERSION_REWRITES:
         return VERSION_REWRITES[data_version]
@@ -74,3 +104,114 @@ def cleanup_version(data_version: str, prefix: str) -> str:
         logger.debug("[%s] bizarre version: %s", prefix, data_version)
         BIZARRE_LOGGED.add((prefix, data_version))
     return data_version
+def _get_obo_version(prefix: str, url: str, *, max_lines: int = 200) -> str | None:
+    rv = bioversions.utils.get_obo_version(url, max_lines=max_lines)
+    if rv is None:
+        return None
+    return cleanup_version(rv, prefix)
+def _get_owl_version(prefix: str, url: str, *, max_lines: int = 200) -> str | None:
+    rv = bioversions.utils.get_owl_xml_version(url, max_lines=max_lines)
+    if rv is None:
+        return None
+    return cleanup_version(rv, prefix)
+def _get_obograph_json_version(prefix: str, url: str) -> str | None:
+    rv = bioversions.utils.get_obograph_json_version(url)
+    if rv is None:
+        return None
+    return cleanup_version(rv, prefix)
+#: A mapping from data type to gersion getter function
+VERSION_GETTERS: dict[OntologyFormat, Callable[[str, str], str | None]] = {
+    "obo": _get_obo_version,
+    "owl": _get_owl_version,
+    "json": _get_obograph_json_version,
+}
+def _prioritize_version(
+    data_version: str | None,
+    ontology_prefix: str,
+    version: str | None,
+    date: datetime | None,
+) -> str | None:
+    """Process version information coming from several sources and normalize them."""
+    if ontology_prefix in STATIC_VERSION_REWRITES:
+        return STATIC_VERSION_REWRITES[ontology_prefix]
+    if version:
+        if version in BAD:
+            logger.debug("[%s] had known bad version, returning None: ", ontology_prefix, version)
+            return None
+        clean_injected_version = cleanup_version(version, prefix=ontology_prefix)
+        if not data_version:
+            logger.debug(
+                "[%s] did not have a version, overriding with %s",
+                ontology_prefix,
+                clean_injected_version,
+            )
+            return clean_injected_version
+        clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
+        if clean_data_version != clean_injected_version:
+            # in this case, we're going to trust the one that's passed
+            # through explicitly more than the graph's content
+            logger.debug(
+                "[%s] had version %s, overriding with %s",
+                ontology_prefix,
+                data_version,
+                version,
+            )
+        return clean_injected_version
+    if data_version:
+        if data_version in BAD:
+            logger.debug(
+                "[%s] had known bad version, returning None: ", ontology_prefix, data_version
+            )
+            return None
+        clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
+        logger.debug("[%s] using version %s", ontology_prefix, clean_data_version)
+        return clean_data_version
+    if date is not None:
+        derived_date_version = date.strftime("%Y-%m-%d")
+        logger.debug(
+            "[%s] does not report a version. falling back to date: %s",
+            ontology_prefix,
+            derived_date_version,
+        )
+        return derived_date_version
+    logger.debug("[%s] does not report a version nor a date", ontology_prefix)
+    return None
+def _get_getter_urls(prefix: str) -> Iterable[tuple[OntologyFormat, str]]:
+    # assume that all possible files that can be downloaded
+    # are in sync and have the same version
+    for ontology_format, get_url_func in ONTOLOGY_GETTERS:
+        url = get_url_func(prefix)
+        if url is None:
+            continue
+        yield ontology_format, url
+def _get_version_from_artifact(prefix: str) -> str | None:
+    for ontology_format, url in _get_getter_urls(prefix):
+        # Try to peak into the file to get the version without fully downloading
+        get_version_func = VERSION_GETTERS.get(ontology_format)
+        if get_version_func is None:
+            continue
+        version = get_version_func(prefix, url)
+        if version:
+            return cleanup_version(version, prefix=prefix)
+    return None

pyobo 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl

pyobo 0.12.4py3-none-any.whl → 0.12.6py3-none-any.whl