PyPI - pyobo - Versions diffs - 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl - Mend

pyobo 0.12.4py3-none-any.whl → 0.12.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +6 -0
pyobo/api/__init__.py +3 -0
pyobo/api/embedding.py +118 -0
pyobo/api/utils.py +0 -10
pyobo/cli/cli.py +1 -6
pyobo/cli/database.py +7 -1
pyobo/constants.py +23 -0
pyobo/getters.py +52 -35
pyobo/identifier_utils/api.py +3 -1
pyobo/sources/__init__.py +14 -1
pyobo/sources/chembl/__init__.py +6 -0
pyobo/sources/chembl/chembl_cell.py +94 -0
pyobo/sources/chembl/chembl_mechanism.py +81 -0
pyobo/sources/chembl/chembl_tissue.py +70 -0
pyobo/sources/clinicaltrials.py +32 -33
pyobo/sources/complexportal.py +5 -1
pyobo/sources/drugcentral.py +2 -1
pyobo/sources/hgnc/hgnc.py +13 -6
pyobo/sources/iana_media_type.py +100 -0
pyobo/sources/mesh.py +82 -29
pyobo/sources/reactome.py +10 -3
pyobo/sources/spdx.py +89 -0
pyobo/sources/uniprot/uniprot.py +2 -2
pyobo/sources/wikipathways.py +92 -7
pyobo/struct/__init__.py +2 -0
pyobo/struct/functional/dsl.py +10 -1
pyobo/struct/functional/ontology.py +3 -3
pyobo/struct/obo/reader.py +17 -53
pyobo/struct/obograph/export.py +2 -2
pyobo/struct/struct.py +125 -8
pyobo/struct/struct_utils.py +10 -0
pyobo/struct/typedef.py +15 -3
pyobo/struct/vocabulary.py +8 -0
pyobo/utils/cache.py +4 -3
pyobo/utils/io.py +18 -56
pyobo/utils/misc.py +142 -1
pyobo/utils/path.py +34 -2
pyobo/version.py +1 -1
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/METADATA +11 -7
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/RECORD +44 -38
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/WHEEL +0 -0
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/entry_points.txt +0 -0
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/licenses/LICENSE +0 -0

pyobo/sources/reactome.py CHANGED Viewed

@@ -22,6 +22,7 @@ __all__ = [
 logger = logging.getLogger(__name__)
 PREFIX = "reactome"
+ROOT = Reference(prefix="pw", identifier="0000001", name="pathway")
 # TODO alt ids https://reactome.org/download/current/reactome_stable_ids.txt
@@ -32,10 +33,12 @@ class ReactomeGetter(Obo):
     ontology = bioversions_key = PREFIX
     typedefs = [from_species, has_participant, has_citation]
+    root_terms = [ROOT]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
-        return iter_terms(version=self._version_or_raise, force=force)
+        yield Term(reference=ROOT)
+        yield from iter_terms(version=self._version_or_raise, force=force)
 def ensure_participant_df(version: str, force: bool = False) -> pd.DataFrame:
@@ -87,6 +90,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
     for parent_id, child_id in hierarchy_df.values:
         terms[child_id].append_parent(terms[parent_id])
+    for term in terms.values():
+        if not term.parents:
+            term.append_parent(ROOT)
     uniprot_pathway_df = ensure_participant_df(version=version, force=force)
     for uniprot_id, reactome_id in tqdm(
         uniprot_pathway_df.values,
@@ -102,7 +109,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
             reference = Reference(prefix="uniprot.isoform", identifier=uniprot_id)
         else:
             reference = Reference(prefix="uniprot", identifier=uniprot_id)
-        terms[reactome_id].append_relationship(has_participant, reference)
+        terms[reactome_id].annotate_object(has_participant, reference)
     chebi_pathway_url = f"https://reactome.org/download/{version}/ChEBI2Reactome_All_Levels.txt"
     chebi_pathway_df = ensure_df(
@@ -122,7 +129,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
         if reactome_id not in terms:
             tqdm.write(f"{reactome_id} appears in chebi participants file but not pathways file")
             continue
-        terms[reactome_id].append_relationship(
+        terms[reactome_id].annotate_object(
             has_participant, Reference(prefix="chebi", identifier=chebi_id)
         )

pyobo/sources/spdx.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""Convert SPDX to an ontology."""
+from collections.abc import Iterable
+from typing import Any
+from pydantic import ValidationError
+from tqdm import tqdm
+from pyobo.struct import Obo, Reference, Term, TypeDef
+from pyobo.struct.typedef import see_also
+from pyobo.struct.vocabulary import xsd_boolean
+from pyobo.utils.path import ensure_json
+__all__ = [
+    "SPDXLicenseGetter",
+]
+DATA_URL = "https://github.com/spdx/license-list-data/raw/refs/heads/main/json/licenses.json"
+LICENSE_PREFIX = "spdx"
+TERM_PREFIX = "spdx.term"
+ROOT = Term.from_triple(TERM_PREFIX, "ListedLicense", "listed license")
+IS_OSI = TypeDef(
+    reference=Reference(prefix=TERM_PREFIX, identifier="isOsiApproved", name="is OSI approved"),
+    is_metadata_tag=True,
+    domain=ROOT.reference,
+    range=xsd_boolean,
+)
+IS_FSF = TypeDef(
+    reference=Reference(prefix=TERM_PREFIX, identifier="isFsfLibre", name="is FSF Libre"),
+    is_metadata_tag=True,
+    domain=ROOT.reference,
+    range=xsd_boolean,
+)
+def get_terms(version: str) -> Iterable[Term]:
+    """Iterate over terms."""
+    yield ROOT
+    data = ensure_json(
+        LICENSE_PREFIX,
+        url=DATA_URL,
+        version=version,
+    )
+    for record in data["licenses"]:
+        if term := _get_term(record):
+            yield term
+def _get_term(record: dict[str, Any]) -> Term | None:
+    try:
+        reference = Reference(
+            prefix=LICENSE_PREFIX, identifier=record["licenseId"], name=record["name"]
+        )
+    except ValidationError:
+        tqdm.write(f"invalid: {record['licenseId']}")
+        return None
+    term = (
+        Term(
+            reference=reference,
+            is_obsolete=True if record.get("isDeprecatedLicenseId") else None,
+            # type="Instance",
+        )
+        .append_parent(ROOT)
+        .append_synonym(record["licenseId"])
+    )
+    if record.get("isOsiApproved"):
+        term.annotate_boolean(IS_OSI, True)
+    if record.get("isFsfLibre"):
+        term.annotate_boolean(IS_FSF, True)
+    for uri in record.get("seeAlso", []):
+        term.annotate_uri(see_also, uri)
+    return term
+class SPDXLicenseGetter(Obo):
+    """An ontology representation of the SPDX Licenses."""
+    bioversions_key = ontology = LICENSE_PREFIX
+    typedefs = [see_also, IS_FSF, IS_OSI]
+    root_terms = [ROOT.reference]
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return get_terms(version=self._version_or_raise)
+if __name__ == "__main__":
+    SPDXLicenseGetter.cli()

pyobo/sources/uniprot/uniprot.py CHANGED Viewed

@@ -4,6 +4,7 @@ from collections.abc import Iterable
 from pathlib import Path
 from typing import cast
+from pystow.utils import safe_open_reader
 from tqdm.auto import tqdm
 from pyobo import Obo, Reference
@@ -22,7 +23,6 @@ from pyobo.struct import (
     participates_in,
 )
 from pyobo.struct.typedef import gene_product_of, located_in, molecularly_interacts_with
-from pyobo.utils.io import open_reader
 PREFIX = "uniprot"
 BASE_URL = "https://rest.uniprot.org/uniprotkb/stream"
@@ -78,7 +78,7 @@ class UniProtGetter(Obo):
 def iter_terms(version: str | None = None) -> Iterable[Term]:
     """Iterate over UniProt Terms."""
-    with open_reader(ensure(version=version)) as reader:
+    with safe_open_reader(ensure(version=version)) as reader:
         _ = next(reader)  # header
         for (
             uniprot_id,

pyobo/sources/wikipathways.py CHANGED Viewed

@@ -3,13 +3,14 @@
 import logging
 from collections.abc import Iterable
-from pystow.utils import DownloadError
+import pystow
+from pystow.utils import DownloadError, read_zipfile_rdf
 from tqdm import tqdm
 from .gmt_utils import parse_wikipathways_gmt
 from ..constants import SPECIES_REMAPPING
 from ..struct import Obo, Reference, Term, from_species
-from ..struct.typedef import has_participant
+from ..struct.typedef import contributes_to_condition, has_depiction, has_participant, located_in
 from ..utils.path import ensure_path
 __all__ = [
@@ -20,6 +21,7 @@ logger = logging.getLogger(__name__)
 PREFIX = "wikipathways"
+ROOT = Reference(prefix="pw", identifier="0000001", name="pathway")
 _PATHWAY_INFO = [
     ("Anopheles_gambiae", "7165"),
     ("Arabidopsis_thaliana", "3702"),
@@ -46,17 +48,27 @@ class WikiPathwaysGetter(Obo):
     """An ontology representation of WikiPathways' pathway database."""
     ontology = bioversions_key = PREFIX
-    typedefs = [from_species, has_participant]
+    typedefs = [from_species, has_participant, contributes_to_condition, located_in, has_depiction]
+    root_terms = [ROOT]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
-        return iter_terms(version=self._version_or_raise)
+        yield Term(reference=ROOT)
+        yield from iter_terms(version=self._version_or_raise)
-def iter_terms(version: str) -> Iterable[Term]:
+PW_PREFIX = "http://purl.obolibrary.org/obo/PW_"
+DOID_PREFIX = "http://purl.obolibrary.org/obo/DOID_"
+CL_PREFIX = "http://purl.obolibrary.org/obo/CL_"
+def iter_terms(version: str, *, include_descriptions: bool = False) -> Iterable[Term]:
     """Get WikiPathways terms."""
-    base_url = f"http://data.wikipathways.org/{version}/gmt/wikipathways-{version}-gmt"
+    archive_url = f"https://data.wikipathways.org/current/rdf/wikipathways-{version}-rdf-wp.zip"
+    archive = pystow.ensure(PREFIX, url=archive_url, version=version)
+    base_url = f"http://data.wikipathways.org/{version}/gmt/wikipathways-{version}-gmt"
+    pw_references = set()
     for species_code, taxonomy_id in tqdm(_PATHWAY_INFO, desc=f"[{PREFIX}]", unit="species"):
         url = f"{base_url}-{species_code}.gmt"
         try:
@@ -68,15 +80,88 @@ def iter_terms(version: str) -> Iterable[Term]:
         taxonomy_name = SPECIES_REMAPPING.get(species_code, species_code)
         for identifier, _version, _revision, name, _species, genes in parse_wikipathways_gmt(path):
-            term = Term(reference=Reference(prefix=PREFIX, identifier=identifier, name=name))
+            graph = read_zipfile_rdf(archive, inner_path=f"wp/{identifier}.ttl")
+            uri = f"https://identifiers.org/wikipathways/{identifier}"
+            definition: str | None = None
+            if include_descriptions:
+                # TODO deal with weird characters breaking OFN
+                description_results = list(
+                    graph.query(
+                        f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/dcterms:description ?p }} LIMIT 1"
+                    )
+                )
+                if description_results:
+                    definition = str(description_results[0][0])  # type:ignore[index]
+            term = Term(
+                reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
+                definition=definition,
+            )
             term.set_species(taxonomy_id, taxonomy_name)
+            term.annotate_uri(
+                has_depiction,
+                f"https://www.wikipathways.org/wikipathways-assets/pathways/{identifier}/{identifier}.svg",
+            )
             for ncbigene_id in genes:
                 term.annotate_object(
                     has_participant,
                     Reference(prefix="ncbigene", identifier=ncbigene_id),
                 )
+            # TODO switch query over to including chemicals from RDF SPARQL query
+            # TODO get description from SPARQL
+            parents = [  # type:ignore[misc]
+                p
+                for (p,) in graph.query(
+                    f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/wp:pathwayOntologyTag ?p }}"
+                )
+            ]
+            for parent in parents:
+                if parent.startswith(PW_PREFIX):
+                    ref = Reference(prefix="pw", identifier=parent.removeprefix(PW_PREFIX))
+                    pw_references.add(ref)
+                    term.append_parent(ref)
+            if not parents:
+                tqdm.write(f"[{term.curie}] could not find parent")
+                term.append_parent(ROOT)
+            diseases = graph.query(
+                f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/wp:diseaseOntologyTag ?p }}"
+            )
+            for (disease,) in diseases:  # type:ignore[misc]
+                if disease.startswith(DOID_PREFIX):
+                    term.annotate_object(
+                        contributes_to_condition,
+                        Reference(prefix="doid", identifier=disease.removeprefix(DOID_PREFIX)),
+                    )
+            cells = graph.query(
+                f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/wp:cellTypeOntologyTag ?p }}"
+            )
+            for (cell,) in cells:  # type:ignore[misc]
+                if cell.startswith(CL_PREFIX):
+                    term.annotate_object(
+                        located_in,
+                        Reference(prefix="cl", identifier=cell.removeprefix(CL_PREFIX)),
+                    )
             yield term
+    from ..api import get_ancestors
+    from ..getters import get_ontology
+    for pw_reference in list(pw_references):
+        pw_references.update(get_ancestors(pw_reference) or set())
+    for pw_term in get_ontology("pw"):
+        if pw_term.reference in pw_references:
+            yield Term(
+                reference=pw_term.reference,
+                definition=pw_term.definition,
+                # PW has issues in hierarchy - there are lots of leaves with no root
+                parents=pw_term.parents or [ROOT],
+            )
 if __name__ == "__main__":
     WikiPathwaysGetter.cli()

pyobo/struct/__init__.py CHANGED Viewed

@@ -16,6 +16,7 @@ from .struct import (
     SynonymTypeDef,
     Term,
     TypeDef,
+    build_ontology,
     make_ad_hoc_ontology,
 )
 from .struct_utils import Annotation, Stanza, StanzaType
@@ -57,6 +58,7 @@ __all__ = [
     "Term",
     "TypeDef",
     "_parse_str_or_curie_or_uri",
+    "build_ontology",
     "default_reference",
     "derives_from",
     "enables",

pyobo/struct/functional/dsl.py CHANGED Viewed

@@ -211,7 +211,16 @@ class LiteralBox(Box):
     def to_funowl(self) -> str:
         """Represent this literal for functional OWL."""
-        return self.literal.n3(self._namespace_manager)
+        rv = self.literal.n3(self._namespace_manager)
+        # it appears that the OFN format doesn't use triple quotes
+        if rv.startswith('"""') and rv.endswith('"""^^xsd:string'):
+            # strip them off
+            rv = rv.removeprefix('"""').removesuffix('"""^^xsd:string')
+            # escape quotes
+            rv = rv.replace('"', '\\"')
+            # stick back quotes and xsd tag
+            rv = '"' + rv + '"^^xsd:string'
+        return rv
     def to_funowl_args(self) -> str:  # pragma: no cover
         """Get the inside of the functional OWL tag representing the literal (unused)."""

pyobo/struct/functional/ontology.py CHANGED Viewed

@@ -8,6 +8,7 @@ from collections.abc import Sequence
 from pathlib import Path
 from curies import Converter
+from pystow.utils import safe_open
 from rdflib import OWL, RDF, Graph, term
 from pyobo.struct.functional.dsl import Annotation, Annotations, Axiom, Box
@@ -16,7 +17,6 @@ from pyobo.struct.functional.utils import (
     FunctionalOWLSerializable,
     list_to_funowl,
 )
-from pyobo.utils.io import safe_open
 __all__ = [
     "Document",
@@ -108,9 +108,9 @@ class Document:
         return graph
     def write_funowl(self, path: str | Path) -> None:
-        """Write functional OWL to a file.."""
+        """Write functional OWL to a file."""
         path = Path(path).expanduser().resolve()
-        with safe_open(path, read=False) as file:
+        with safe_open(path, operation="write") as file:
             file.write(self.to_funowl())
     def to_funowl(self) -> str:

pyobo/struct/obo/reader.py CHANGED Viewed

@@ -18,6 +18,7 @@ from curies import ReferenceTuple
 from curies.preprocessing import BlocklistError
 from curies.vocabulary import SynonymScope
 from more_itertools import pairwise
+from pystow.utils import safe_open
 from tqdm.auto import tqdm
 from .reader_utils import (
@@ -52,8 +53,7 @@ from ...identifier_utils import (
     get_rules,
 )
 from ...utils.cache import write_gzipped_graph
-from ...utils.io import safe_open
-from ...utils.misc import STATIC_VERSION_REWRITES, cleanup_version
+from ...utils.misc import _prioritize_version
 __all__ = [
     "from_obo_path",
@@ -90,7 +90,7 @@ def from_obo_path(
                 )
     else:
         logger.info("[%s] parsing OBO with obonet from %s", prefix or "<unknown>", path)
-        with safe_open(path, read=True) as file:
+        with safe_open(path, operation="read") as file:
             graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
     if prefix:
@@ -157,7 +157,7 @@ def from_obonet(
     upgrade: bool = True,
     use_tqdm: bool = False,
 ) -> Obo:
-    """Get all of the terms from a OBO graph."""
+    """Get all the terms from a OBO graph."""
     ontology_prefix_raw = graph.graph["ontology"]
     ontology_prefix = _normalize_prefix_strict(ontology_prefix_raw)
     logger.info("[%s] extracting OBO using obonet", ontology_prefix)
@@ -168,8 +168,11 @@ def from_obonet(
     macro_config = MacroConfig(graph.graph, strict=strict, ontology_prefix=ontology_prefix)
-    data_version = _clean_graph_version(
-        graph, ontology_prefix=ontology_prefix, version=version, date=date
+    data_version = _prioritize_version(
+        data_version=graph.graph.get("data-version") or None,
+        ontology_prefix=ontology_prefix,
+        version=version,
+        date=date,
     )
     if data_version and "/" in data_version:
         raise ValueError(
@@ -533,17 +536,22 @@ def _process_subsets(stanza: Stanza, data, *, ontology_prefix: str, strict: bool
         stanza.append_subset(reference)
+# needed to parse OPMI
+_BOOLEAN_TRUE_VALUES = {"true", "1", 1}
+_BOOLEAN_FALSE_VALUES = {"false", "0", 0}
 def _get_boolean(data: Mapping[str, Any], tag: str) -> bool | None:
     value = data.get(tag)
     if value is None:
         return None
     if isinstance(value, list):
         value = value[0]
-    if value == "false":
+    if value in _BOOLEAN_FALSE_VALUES:
         return False
-    if value == "true":
+    if value in _BOOLEAN_TRUE_VALUES:
         return True
-    raise ValueError(value)
+    raise ValueError(f"unhandled value for boolean: ({type(value)}) {value}")
 def _get_reference(
@@ -703,50 +711,6 @@ def _clean_graph_ontology(graph, prefix: str) -> None:
         graph.graph["ontology"] = prefix
-def _clean_graph_version(
-    graph, ontology_prefix: str, version: str | None, date: datetime | None
-) -> str | None:
-    if ontology_prefix in STATIC_VERSION_REWRITES:
-        return STATIC_VERSION_REWRITES[ontology_prefix]
-    data_version: str | None = graph.graph.get("data-version") or None
-    if version:
-        clean_injected_version = cleanup_version(version, prefix=ontology_prefix)
-        if not data_version:
-            logger.debug(
-                "[%s] did not have a version, overriding with %s",
-                ontology_prefix,
-                clean_injected_version,
-            )
-            return clean_injected_version
-        clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
-        if clean_data_version != clean_injected_version:
-            # in this case, we're going to trust the one that's passed
-            # through explicitly more than the graph's content
-            logger.debug(
-                "[%s] had version %s, overriding with %s", ontology_prefix, data_version, version
-            )
-        return clean_injected_version
-    if data_version:
-        clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
-        logger.debug("[%s] using version %s", ontology_prefix, clean_data_version)
-        return clean_data_version
-    if date is not None:
-        derived_date_version = date.strftime("%Y-%m-%d")
-        logger.debug(
-            "[%s] does not report a version. falling back to date: %s",
-            ontology_prefix,
-            derived_date_version,
-        )
-        return derived_date_version
-    logger.debug("[%s] does not report a version nor a date", ontology_prefix)
-    return None
 def _iter_obo_graph(
     graph: nx.MultiDiGraph,
     *,

pyobo/struct/obograph/export.py CHANGED Viewed

@@ -8,11 +8,11 @@ import curies
 import obographs as og
 from curies import Converter, ReferenceTuple
 from curies import vocabulary as v
+from pystow.utils import safe_open
 from pyobo.identifier_utils.api import get_converter
 from pyobo.struct import Obo, OBOLiteral, Stanza, Term, TypeDef
 from pyobo.struct import typedef as tdv
-from pyobo.utils.io import safe_open
 __all__ = [
     "to_obograph",
@@ -25,7 +25,7 @@ def write_obograph(obo: Obo, path: str | Path, *, converter: Converter | None =
     """Write an ontology to a file as OBO Graph JSON."""
     path = Path(path).expanduser().resolve()
     raw_graph = to_obograph(obo, converter=converter)
-    with safe_open(path, read=False) as file:
+    with safe_open(path, operation="write") as file:
         file.write(raw_graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))

pyobo 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl

pyobo 0.12.4py3-none-any.whl → 0.12.6py3-none-any.whl