PyPI - pyobo - Versions diffs - 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

pyobo 0.11.2py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (227) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -113
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +108 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +183 -161
pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +196 -118
pyobo/gilda_utils.py +79 -200
pyobo/identifier_utils/__init__.py +41 -0
pyobo/identifier_utils/api.py +296 -0
pyobo/identifier_utils/model.py +130 -0
pyobo/identifier_utils/preprocessing.json +812 -0
pyobo/identifier_utils/preprocessing.py +61 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +43 -39
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1358 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +0 -5
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +3 -8
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +10 -3
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +270 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1413 -643
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +13 -11
pyobo/utils/io.py +17 -31
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +76 -70
pyobo/version.py +3 -3
{pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
pyobo-0.12.0.dist-info/RECORD +202 -0
pyobo-0.12.0.dist-info/WHEEL +4 -0
{pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo-0.11.2.dist-info/RECORD +0 -157
pyobo-0.11.2.dist-info/WHEEL +0 -5
pyobo-0.11.2.dist-info/top_level.txt +0 -1

pyobo/api/typedefs.py CHANGED Viewed

@@ -2,15 +2,16 @@
 import logging
 from functools import lru_cache
-from typing import Optional
 import pandas as pd
+from typing_extensions import Unpack
-from .utils import get_version
+from .utils import get_version_from_kwargs
+from ..constants import GetOntologyKwargs, check_should_cache, check_should_force
 from ..getters import get_ontology
 from ..identifier_utils import wrap_norm_prefix
 from ..utils.cache import cached_df
-from ..utils.path import prefix_cache_join
+from ..utils.path import CacheArtifact, get_cache_path
 __all__ = [
     "get_typedef_df",
@@ -21,18 +22,17 @@ logger = logging.getLogger(__name__)
 @lru_cache
 @wrap_norm_prefix
-def get_typedef_df(
-    prefix: str, *, force: bool = False, version: Optional[str] = None
-) -> pd.DataFrame:
+def get_typedef_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
     """Get an identifier to name mapping for the typedefs in an OBO file."""
-    if version is None:
-        version = get_version(prefix)
-    path = prefix_cache_join(prefix, name="typedefs.tsv", version=version)
+    version = get_version_from_kwargs(prefix, kwargs)
+    path = get_cache_path(prefix, CacheArtifact.typedefs, version=version)
-    @cached_df(path=path, dtype=str, force=force)
+    @cached_df(
+        path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
+    )
     def _df_getter() -> pd.DataFrame:
         logger.debug("[%s] no cached typedefs found. getting from OBO loader", prefix)
-        ontology = get_ontology(prefix, force=force, version=version)
+        ontology = get_ontology(prefix, **kwargs)
         logger.debug("[%s] loading typedef mappings", prefix)
         return ontology.get_typedef_df()

pyobo/api/utils.py CHANGED Viewed

@@ -3,18 +3,23 @@
 import json
 import logging
 import os
+import warnings
 from functools import lru_cache
-from typing import Optional
+from typing import Literal, overload
 import bioversions
+import curies
+from bioregistry import NormalizedNamableReference as Reference
+from curies import ReferenceTuple
+from ..constants import GetOntologyKwargs
 from ..utils.path import prefix_directory_join
 __all__ = [
-    "safe_get_version",
+    "VersionError",
     "get_version",
     "get_version_pins",
-    "VersionError",
+    "safe_get_version",
 ]
 logger = logging.getLogger(__name__)
@@ -24,11 +29,25 @@ class VersionError(ValueError):
     """A catch-all for version getting failure."""
-def get_version(prefix: str) -> Optional[str]:
+# docstr-coverage:excused `overload`
+@overload
+def get_version(prefix: str, *, strict: Literal[True] = True) -> str: ...
+# docstr-coverage:excused `overload`
+@overload
+def get_version(prefix: str, *, strict: Literal[False] = False) -> str | None: ...
+def get_version(prefix: str, *, strict: bool = False) -> str | None:
     """Get the version for the resource, if available.
     :param prefix: the resource name
-    :return: The version if available else None
+    :param strict: Should an error be raised if no version is available?
+    :returns: The version if available else None
+    :raises VersionError: if the version is not available and strict mode is enabled
     """
     # Prioritize loaded environment variable PYOBO_VERSION_PINS dictionary
     version = get_version_pins().get(prefix)
@@ -47,13 +66,27 @@ def get_version(prefix: str) -> Optional[str]:
     metadata_json_path = prefix_directory_join(prefix, name="metadata.json", ensure_exists=False)
     if metadata_json_path.exists():
         data = json.loads(metadata_json_path.read_text())
-        return data["version"]
+        version = data["version"]
+        if version:
+            return version
+    if strict:
+        raise ValueError
     return None
+def get_version_from_kwargs(prefix: str, kwargs: GetOntologyKwargs) -> str | None:
+    """Get the version for the resource based on generic keyword arguments."""
+    if version := kwargs.get("version"):
+        return version
+    # it's okay if none gets returned after getting this far, we at least tried
+    return get_version(prefix, strict=False)
 def safe_get_version(prefix: str) -> str:
     """Get the version."""
+    # FIXME replace with get_version(prefix, strict=True)
     v = get_version(prefix)
     if v is None:
         raise ValueError
@@ -65,13 +98,12 @@ def get_version_pins() -> dict[str, str]:
     """Retrieve user-defined resource version pins.
     To set your own resource pins, set your machine's environmental variable
-    "PYOBO_VERSION_PINS" to a JSON string containing string resource prefixes
-    as keys and string versions of their respective resource as values.
-    Constraining version pins will make PyOBO rely on cached versions of a resource.
-    A user might want to pin resource versions that are used by PyOBO due to
-    the fact that PyOBO will download the latest version of a resource if it is
-    not pinned. This downloading process can lead to a slow-down in downstream
-    applications that rely on PyOBO.
+    "PYOBO_VERSION_PINS" to a JSON string containing string resource prefixes as keys
+    and string versions of their respective resource as values. Constraining version
+    pins will make PyOBO rely on cached versions of a resource. A user might want to pin
+    resource versions that are used by PyOBO due to the fact that PyOBO will download
+    the latest version of a resource if it is not pinned. This downloading process can
+    lead to a slow-down in downstream applications that rely on PyOBO.
     """
     version_pins_str = os.getenv("PYOBO_VERSION_PINS")
     if not version_pins_str:
@@ -102,3 +134,24 @@ def get_version_pins() -> dict[str, str]:
         f"name."
     )
     return version_pins
+def _get_pi(
+    prefix: str | curies.Reference | ReferenceTuple, identifier: str | None = None, /
+) -> Reference:
+    if isinstance(prefix, ReferenceTuple | curies.Reference):
+        if identifier is not None:
+            raise ValueError("unexpected non-none value passed as second positional argument")
+        return Reference(prefix=prefix.prefix, identifier=prefix.identifier)
+    if isinstance(prefix, str) and identifier is None:
+        return Reference.from_curie(prefix)
+    if identifier is None:
+        raise ValueError(
+            "prefix was given as a string, so an identifier was expected to be passed as a string as well"
+        )
+    warnings.warn(
+        "Passing a prefix and identifier as seperate arguments is deprecated. Please pass a curies.Reference or curies.ReferenceTuple in the first positional-only argument instead.",
+        DeprecationWarning,
+        stacklevel=4,  # this is 4 since this is (always?) called from inside a decorator
+    )
+    return Reference(prefix=prefix, identifier=identifier)

pyobo/api/xrefs.py CHANGED Viewed

@@ -1,28 +1,36 @@
 """High-level API for synonyms."""
 import logging
+import warnings
 from collections.abc import Mapping
 from functools import lru_cache
-from typing import Optional, Union
 import pandas as pd
-from tqdm.auto import tqdm
-from tqdm.contrib.logging import logging_redirect_tqdm
-from .utils import get_version
-from ..constants import TARGET_ID, TARGET_PREFIX
+from curies import ReferenceTuple
+from typing_extensions import Unpack
+from .utils import get_version_from_kwargs
+from ..constants import (
+    TARGET_ID,
+    TARGET_PREFIX,
+    GetOntologyKwargs,
+    check_should_cache,
+    check_should_force,
+    check_should_use_tqdm,
+)
 from ..getters import get_ontology
 from ..identifier_utils import wrap_norm_prefix
-from ..struct import Obo, Reference
-from ..utils.cache import cached_df, cached_mapping
-from ..utils.path import prefix_cache_join
+from ..struct import Obo
+from ..utils.cache import cached_df
+from ..utils.path import CacheArtifact, get_cache_path
 __all__ = [
-    "get_xrefs_df",
     "get_filtered_xrefs",
+    "get_mappings_df",
+    "get_sssom_df",
     "get_xref",
     "get_xrefs",
-    "get_sssom_df",
+    "get_xrefs_df",
 ]
 logger = logging.getLogger(__name__)
@@ -35,10 +43,10 @@ def get_xref(
     new_prefix: str,
     *,
     flip: bool = False,
-    version: Optional[str] = None,
-) -> Optional[str]:
+    **kwargs: Unpack[GetOntologyKwargs],
+) -> str | None:
     """Get the xref with the new prefix if a direct path exists."""
-    filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip, version=version)
+    filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip, **kwargs)
     return filtered_xrefs.get(identifier)
@@ -49,32 +57,18 @@ def get_filtered_xrefs(
     xref_prefix: str,
     *,
     flip: bool = False,
-    use_tqdm: bool = False,
-    force: bool = False,
-    strict: bool = False,
-    version: Optional[str] = None,
+    **kwargs: Unpack[GetOntologyKwargs],
 ) -> Mapping[str, str]:
     """Get xrefs to a given target."""
-    if version is None:
-        version = get_version(prefix)
-    path = prefix_cache_join(prefix, "xrefs", name=f"{xref_prefix}.tsv", version=version)
-    all_xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=version)
-    header = [f"{prefix}_id", f"{xref_prefix}_id"]
-    @cached_mapping(path=path, header=header, use_tqdm=use_tqdm, force=force)
-    def _get_mapping() -> Mapping[str, str]:
-        if all_xrefs_path.is_file():
-            logger.info("[%s] loading pre-cached xrefs", prefix)
-            df = pd.read_csv(all_xrefs_path, sep="\t", dtype=str)
-            logger.info("[%s] filtering pre-cached xrefs", prefix)
-            df = df.loc[df[TARGET_PREFIX] == xref_prefix, [f"{prefix}_id", TARGET_ID]]
-            return dict(df.values)
-        logger.info("[%s] no cached xrefs found. getting from OBO loader", prefix)
-        ontology = get_ontology(prefix, force=force, strict=strict, version=version)
-        return ontology.get_filtered_xrefs_mapping(xref_prefix, use_tqdm=use_tqdm)
-    rv = _get_mapping()
+    mappings_df = get_mappings_df(prefix, **kwargs)
+    rv = {}
+    for subject_curie, object_curie in mappings_df[["subject_id", "object_id"]].values:
+        subject_pair = ReferenceTuple.from_curie(subject_curie)
+        object_pair = ReferenceTuple.from_curie(object_curie)
+        if object_pair.prefix == xref_prefix:
+            rv[subject_pair.identifier] = object_pair.identifier
     if flip:
         return {v: k for k, v in rv.items()}
     return rv
@@ -84,104 +78,104 @@ get_xrefs = get_filtered_xrefs
 @wrap_norm_prefix
-def get_xrefs_df(
-    prefix: str,
-    *,
-    use_tqdm: bool = False,
-    force: bool = False,
-    strict: bool = False,
-    version: Optional[str] = None,
-) -> pd.DataFrame:
+def get_xrefs_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
     """Get all xrefs."""
-    if version is None:
-        version = get_version(prefix)
-    path = prefix_cache_join(prefix, name="xrefs.tsv", version=version)
+    warnings.warn(
+        f"use pyobo.get_mappings_df instead of pyobo.get_xrefs_df."
+        f"Not using cache artifact path to {CacheArtifact.xrefs}",
+        DeprecationWarning,
+        stacklevel=2,
+    )
-    @cached_df(path=path, dtype=str, force=force)
-    def _df_getter() -> pd.DataFrame:
-        logger.info("[%s] no cached xrefs found. getting from OBO loader", prefix)
-        ontology = get_ontology(prefix, force=force, strict=strict, version=version)
-        return ontology.get_xrefs_df(use_tqdm=use_tqdm)
+    mappings_df = get_mappings_df(prefix, **kwargs)
-    return _df_getter()
+    rows = []
+    for subject_curie, object_curie in mappings_df[["subject_id", "object_id"]].values:
+        subject_pair = ReferenceTuple.from_curie(subject_curie)
+        object_pair = ReferenceTuple.from_curie(object_curie)
+        rows.append((subject_pair.identifier, object_pair.prefix, object_pair.identifier))
+    df = pd.DataFrame(rows, columns=[f"{prefix}_id", TARGET_PREFIX, TARGET_ID])
+    df = df.drop_duplicates()
+    return df
 def get_sssom_df(
-    prefix: Union[str, Obo],
+    prefix: str | Obo, *, names: bool = True, **kwargs: Unpack[GetOntologyKwargs]
+) -> pd.DataFrame:
+    """Get an SSSOM dataframe, replaced by :func:`get_mappings_df`."""
+    warnings.warn("get_sssom_df was renamed to get_mappings_df", DeprecationWarning, stacklevel=2)
+    return get_mappings_df(prefix=prefix, names=names, **kwargs)
+def get_mappings_df(
+    prefix: str | Obo,
     *,
-    predicate_id: str = "oboinowl:hasDbXref",
-    justification: str = "sempav:UnspecifiedMatching",
     names: bool = True,
-    **kwargs,
+    include_mapping_source_column: bool = False,
+    **kwargs: Unpack[GetOntologyKwargs],
 ) -> pd.DataFrame:
-    r"""Get xrefs from a source as an SSSOM dataframe.
+    r"""Get semantic mappings from a source as an SSSOM dataframe.
     :param prefix: The ontology to look in for xrefs
-    :param predicate_id: The predicate used in the SSSOM document. By default, ontologies
-        don't typically ascribe semantics to xrefs so ``oboinowl:hasDbXref`` is used
-    :param justification: The justification for the mapping. By default, ontologies
-        don't typically ascribe semantics, so this is left with `sempav:UnspecifiedMatching`
     :param names: Add name columns (``subject_label`` and ``object_label``)
     :returns: A SSSOM-compliant dataframe of xrefs
     For example, if you want to get UMLS as an SSSOM dataframe, you can do
-    >>> import pyobo
-    >>> df = pyobo.get_sssom_df("umls")
-    >>> df.to_csv("umls.sssom.tsv", sep="\t", index=False)
+    .. code-block:: python
-    If you don't want to get all of the many resources required to add
-    names, you can pass ``names=False``
+        import pyobo
-    >>> import pyobo
-    >>> df = pyobo.get_sssom_df("umls", names=False)
-    >>> df.to_csv("umls.sssom.tsv", sep="\t", index=False)
+        df = pyobo.get_mappings_df("umls")
+        df.to_csv("umls.sssom.tsv", sep="\t", index=False)
-    .. note:: This assumes the Bioregistry as the prefix map
-    """
-    from .names import get_name
+    If you don't want to get all of the many resources required to add names, you can
+    pass ``names=False``
+    .. code-block:: python
+        import pyobo
+        df = pyobo.get_mappings_df("umls", names=False)
+        df.to_csv("umls.sssom.tsv", sep="\t", index=False)
+    .. note::
+        This assumes the Bioregistry as the prefix map
+    """
     if isinstance(prefix, Obo):
-        df = prefix.get_xrefs_df()
+        df = prefix.get_mappings_df(
+            include_subject_labels=names,
+            include_mapping_source_column=include_mapping_source_column,
+            use_tqdm=check_should_use_tqdm(kwargs),
+        )
         prefix = prefix.ontology
     else:
-        df = get_xrefs_df(prefix=prefix, **kwargs)
-    rows: list[tuple[str, ...]] = []
-    with logging_redirect_tqdm():
-        for source_id, target_prefix, target_id in tqdm(
-            df.values, unit="mapping", unit_scale=True, desc=f"[{prefix}] SSSOM"
-        ):
-            source = Reference(prefix=prefix, identifier=source_id)
-            target = Reference(prefix=target_prefix, identifier=target_id)
-            if names:
-                rows.append(
-                    (
-                        source.curie,
-                        get_name(prefix, source_id) or "",
-                        target.curie,
-                        get_name(target_prefix, target_id),
-                        predicate_id,
-                        justification,
-                    )
-                )
-            else:
-                rows.append((source.curie, target.curie, predicate_id, justification))
+        version = get_version_from_kwargs(prefix, kwargs)
+        path = get_cache_path(prefix, CacheArtifact.mappings, version=version)
+        @cached_df(
+            path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
+        )
+        def _df_getter() -> pd.DataFrame:
+            logger.info("[%s] rebuilding SSSOM", prefix)
+            ontology = get_ontology(prefix, **kwargs)
+            return ontology.get_mappings_df(
+                use_tqdm=check_should_use_tqdm(kwargs),
+                include_subject_labels=True,
+                include_mapping_source_column=include_mapping_source_column,
+            )
+        df = _df_getter()
     if names:
-        columns = [
-            "subject_id",
-            "subject_label",
-            "object_id",
-            "object_label",
-            "predicate_id",
-            "mapping_justification",
-        ]
-    else:
-        columns = [
-            "subject_id",
-            "object_id",
-            "predicate_id",
-            "mapping_justification",
-        ]
-    return pd.DataFrame(rows, columns=columns)
+        from .names import get_name_by_curie
+        df["object_label"] = df["object_id"].map(get_name_by_curie)
+    elif "subject_label" in df.columns:
+        del df["subject_label"]
+    return df

pyobo/cli/__init__.py CHANGED Viewed

File without changes

pyobo/cli/cli.py CHANGED Viewed

@@ -2,23 +2,19 @@
 import logging
 import os
-import sys
+from collections.abc import Iterable
+from functools import lru_cache
 from operator import itemgetter
+import bioregistry
 import click
 import humanize
-from more_click import verbose_option
 from tabulate import tabulate
-from .aws import main as aws_main
 from .database import main as database_main
 from .lookup import lookup
-from ..constants import RAW_DIRECTORY
-from ..plugins import has_nomenclature_plugin, iter_nomenclature_plugins
-from ..registries import iter_cached_obo
-from ..utils.io import get_writer
-from ..xrefdb.canonicalizer import Canonicalizer, get_priority_curie, remap_file_stream
-from ..xrefdb.priority import DEFAULT_PRIORITY_LIST
+from ..constants import GLOBAL_SKIP, RAW_DIRECTORY
+from ..plugins import has_nomenclature_plugin
 __all__ = ["main"]
@@ -31,36 +27,6 @@ def main():
     """CLI for PyOBO."""
-_ORDERING_TEXT = ", ".join(f"{i}) {x}" for i, x in enumerate(DEFAULT_PRIORITY_LIST, start=1))
-@main.command(help=f"Prioritize a CURIE from ordering: {_ORDERING_TEXT}")
-@click.argument("curie")
-def prioritize(curie: str):
-    """Prioritize a CURIE."""
-    priority_curie = get_priority_curie(curie)
-    click.secho(priority_curie)
-@main.command()
-@click.option("-i", "--file-in", type=click.File("r"), default=sys.stdin)
-@click.option("-o", "--file-out", type=click.File("w"), default=sys.stdout)
-@click.option("--column", type=int, default=0, show_default=True)
-@click.option("--sep", default="\t", show_default=True)
-def recurify(file_in, file_out, column: int, sep: str):
-    """Remap a column in a given file stream."""
-    remap_file_stream(file_in=file_in, file_out=file_out, column=column, sep=sep)
-@main.command()
-@verbose_option
-def cache():
-    """Cache all resources."""
-    for obo in iter_nomenclature_plugins():
-        click.secho(f"Caching {obo.ontology}", bold=True, fg="green")
-        obo.write_default()
 @main.command()
 @click.option("--remove-obo", is_flag=True)
 def clean(remove_obo: bool):
@@ -93,7 +59,7 @@ def clean(remove_obo: bool):
 @main.command()
 def ls():
     """List how big all of the OBO files are."""
-    entries = [(prefix, os.path.getsize(path)) for prefix, path in iter_cached_obo()]
+    entries = [(prefix, os.path.getsize(path)) for prefix, path in _iter_cached_obo()]
     entries = [
         (prefix, humanize.naturalsize(size), "✅" if not has_nomenclature_plugin(prefix) else "❌")
         for prefix, size in sorted(entries, key=itemgetter(1), reverse=True)
@@ -101,19 +67,38 @@ def ls():
     click.echo(tabulate(entries, headers=["Source", "Size", "OBO"]))
-@main.command()
-@verbose_option
-@click.option("-f", "--file", type=click.File("w"))
-def remapping(file):
-    """Make a canonical remapping."""
-    canonicalizer = Canonicalizer.get_default()
-    writer = get_writer(file)
-    writer.writerow(["input", "canonical"])
-    writer.writerows(canonicalizer.iterate_flat_mapping())
+def _iter_cached_obo() -> Iterable[tuple[str, str]]:
+    """Iterate over cached OBO paths."""
+    for prefix in os.listdir(RAW_DIRECTORY):
+        if prefix in GLOBAL_SKIP or _has_no_download(prefix) or bioregistry.is_deprecated(prefix):
+            continue
+        d = RAW_DIRECTORY.joinpath(prefix)
+        if not os.path.isdir(d):
+            continue
+        for x in os.listdir(d):
+            if x.endswith(".obo"):
+                p = os.path.join(d, x)
+                yield prefix, p
+def _has_no_download(prefix: str) -> bool:
+    """Return if the prefix is not available."""
+    prefix_norm = bioregistry.normalize_prefix(prefix)
+    return prefix_norm is not None and prefix_norm in _no_download()
+@lru_cache(maxsize=1)
+def _no_download() -> set[str]:
+    """Get the list of prefixes not available as OBO."""
+    return {
+        prefix
+        for prefix in bioregistry.read_registry()
+        if bioregistry.get_obo_download(prefix) is None
+        and bioregistry.get_owl_download(prefix) is None
+    }
 main.add_command(lookup)
-main.add_command(aws_main)
 main.add_command(database_main)
 if __name__ == "__main__":

pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

pyobo 0.11.2py3-none-any.whl → 0.12.0py3-none-any.whl