PyPI - pyobo - Versions diffs - 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl - Mend

pyobo 0.11.2py3-none-any.whl → 0.12.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -117
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +107 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +210 -160
pyobo/cli/database_utils.py +155 -0
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +209 -191
pyobo/gilda_utils.py +52 -250
pyobo/identifier_utils/__init__.py +33 -0
pyobo/identifier_utils/api.py +305 -0
pyobo/identifier_utils/preprocessing.json +873 -0
pyobo/identifier_utils/preprocessing.py +27 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +48 -40
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1354 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +9 -6
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +8 -13
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +11 -4
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +272 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1484 -657
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +16 -15
pyobo/utils/io.py +51 -41
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +73 -70
pyobo/version.py +3 -3
pyobo-0.12.1.dist-info/METADATA +671 -0
pyobo-0.12.1.dist-info/RECORD +201 -0
pyobo-0.12.1.dist-info/WHEEL +4 -0
{pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo/xrefdb/xrefs_pipeline.py +0 -180
pyobo-0.11.2.dist-info/METADATA +0 -711
pyobo-0.11.2.dist-info/RECORD +0 -157
pyobo-0.11.2.dist-info/WHEEL +0 -5
pyobo-0.11.2.dist-info/top_level.txt +0 -1

pyobo/api/properties.py CHANGED Viewed

@@ -1,140 +1,159 @@
 """High-level API for properties."""
 import logging
-import os
 from collections.abc import Mapping
-from typing import Optional
 import pandas as pd
-from .utils import get_version
+from tqdm import tqdm
+from typing_extensions import Unpack
+from .utils import get_version_from_kwargs
+from ..constants import (
+    GetOntologyKwargs,
+    check_should_cache,
+    check_should_force,
+    check_should_use_tqdm,
+)
 from ..getters import get_ontology
 from ..identifier_utils import wrap_norm_prefix
-from ..utils.cache import cached_df, cached_mapping, cached_multidict
+from ..struct import Reference
+from ..struct.struct_utils import OBOLiteral, ReferenceHint, _ensure_ref
+from ..utils.cache import cached_df
 from ..utils.io import multidict
-from ..utils.path import prefix_cache_join
+from ..utils.path import CacheArtifact, get_cache_path
 __all__ = [
-    "get_properties_df",
     "get_filtered_properties_df",
     "get_filtered_properties_mapping",
     "get_filtered_properties_multimapping",
-    "get_property",
+    "get_literal_properties",
+    "get_literal_properties_df",
+    "get_object_properties",
+    "get_object_properties_df",
     "get_properties",
+    "get_properties_df",
+    "get_property",
 ]
 logger = logging.getLogger(__name__)
+def get_object_properties_df(prefix, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
+    """Get a dataframe of object property triples."""
+    version = get_version_from_kwargs(prefix, kwargs)
+    path = get_cache_path(prefix, CacheArtifact.object_properties, version=version)
+    @cached_df(
+        path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
+    )
+    def _df_getter() -> pd.DataFrame:
+        return get_ontology(prefix, **kwargs).get_object_properties_df(
+            use_tqdm=check_should_use_tqdm(kwargs)
+        )
+    return _df_getter()
+def get_object_properties(
+    prefix, **kwargs: Unpack[GetOntologyKwargs]
+) -> list[tuple[Reference, Reference, Reference]]:
+    """Get a list of object property triples."""
+    df = get_object_properties_df(prefix, **kwargs)
+    return [
+        (Reference.from_curie(s), Reference.from_curie(p), Reference.from_curie(o))
+        for s, p, o in df.values
+    ]
+def get_literal_properties(
+    prefix: str, **kwargs: Unpack[GetOntologyKwargs]
+) -> list[tuple[Reference, Reference, OBOLiteral]]:
+    """Get a list of literal property triples."""
+    df = get_literal_properties_df(prefix, **kwargs)
+    return [
+        (
+            Reference.from_curie(s),
+            Reference.from_curie(p),
+            OBOLiteral(
+                value,
+                Reference.from_curie(datatype),
+                language if language and pd.notna(language) else None,
+            ),
+        )
+        for s, p, value, datatype, language in tqdm(
+            df.values,
+            desc=f"[{prefix}] parsing properties",
+            unit_scale=True,
+            unit="triple",
+            disable=not check_should_use_tqdm(kwargs),
+        )
+    ]
+def get_literal_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
+    """Get a dataframe of literal property quads."""
+    version = get_version_from_kwargs(prefix, kwargs)
+    path = get_cache_path(prefix, CacheArtifact.literal_properties, version=version)
+    @cached_df(
+        path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
+    )
+    def _df_getter() -> pd.DataFrame:
+        return get_ontology(prefix, **kwargs).get_literal_properties_df(
+            use_tqdm=check_should_use_tqdm(kwargs)
+        )
+    return _df_getter()
 @wrap_norm_prefix
-def get_properties_df(
-    prefix: str, *, force: bool = False, version: Optional[str] = None
-) -> pd.DataFrame:
+def get_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
     """Extract properties.
     :param prefix: the resource to load
-    :param force: should the resource be re-downloaded, re-parsed, and re-cached?
     :returns: A dataframe with the properties
     """
-    if version is None:
-        version = get_version(prefix)
-    path = prefix_cache_join(prefix, name="properties.tsv", version=version)
-    @cached_df(path=path, dtype=str, force=force)
-    def _df_getter() -> pd.DataFrame:
-        if force:
-            logger.info("[%s] forcing reload for properties", prefix)
-        else:
-            logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
-        ontology = get_ontology(prefix, force=force, version=version)
-        df = ontology.get_properties_df()
-        df.dropna(inplace=True)
-        return df
-    return _df_getter()
+    df1 = get_literal_properties_df(prefix, **kwargs)
+    df2 = get_object_properties_df(prefix, **kwargs)
+    df = pd.concat([df1[["source", "predicate", "target"]], df2])
+    ll = len(prefix) + 1
+    df[f"{prefix}_id"] = df["source"].map(lambda x: x[ll:])
+    df = df.rename(columns={"predicate": "property", "target": "value"})
+    del df["source"]
+    return df[[f"{prefix}_id", "property", "value"]]
 @wrap_norm_prefix
 def get_filtered_properties_mapping(
-    prefix: str,
-    prop: str,
-    *,
-    use_tqdm: bool = False,
-    force: bool = False,
-    version: Optional[str] = None,
+    prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
 ) -> Mapping[str, str]:
     """Extract a single property for each term as a dictionary.
     :param prefix: the resource to load
     :param prop: the property to extract
-    :param use_tqdm: should a progress bar be shown?
-    :param force: should the resource be re-downloaded, re-parsed, and re-cached?
     :returns: A mapping from identifier to property value
     """
-    df = get_properties_df(prefix=prefix, force=force, version=version)
-    df = df[df["property"] == prop]
-    return dict(df[[f"{prefix}_id", "value"]].values)
-    if version is None:
-        version = get_version(prefix)
-    path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
-    all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
-    @cached_mapping(path=path, header=[f"{prefix}_id", prop], force=force)
-    def _mapping_getter() -> Mapping[str, str]:
-        if os.path.exists(all_properties_path):
-            logger.info("[%s] loading pre-cached properties", prefix)
-            df = pd.read_csv(all_properties_path, sep="\t")
-            logger.info("[%s] filtering pre-cached properties", prefix)
-            df = df.loc[df["property"] == prop, [f"{prefix}_id", "value"]]
-            return dict(df.values)
-        logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
-        ontology = get_ontology(prefix, force=force, version=version)
-        return ontology.get_filtered_properties_mapping(prop, use_tqdm=use_tqdm)
-    return _mapping_getter()
+    df = get_filtered_properties_df(prefix, prop, **kwargs)
+    return dict(df.values)
 @wrap_norm_prefix
 def get_filtered_properties_multimapping(
-    prefix: str,
-    prop: str,
-    *,
-    use_tqdm: bool = False,
-    force: bool = False,
-    version: Optional[str] = None,
+    prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
 ) -> Mapping[str, list[str]]:
     """Extract multiple properties for each term as a dictionary.
     :param prefix: the resource to load
     :param prop: the property to extract
-    :param use_tqdm: should a progress bar be shown?
-    :param force: should the resource be re-downloaded, re-parsed, and re-cached?
     :returns: A mapping from identifier to property values
     """
-    if version is None:
-        version = get_version(prefix)
-    path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
-    all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
+    df = get_filtered_properties_df(prefix, prop, **kwargs)
+    return multidict(df.values)
-    @cached_multidict(path=path, header=[f"{prefix}_id", prop], force=force)
-    def _mapping_getter() -> Mapping[str, list[str]]:
-        if os.path.exists(all_properties_path):
-            logger.info("[%s] loading pre-cached properties", prefix)
-            df = pd.read_csv(all_properties_path, sep="\t")
-            logger.info("[%s] filtering pre-cached properties", prefix)
-            df = df.loc[df["property"] == prop, [f"{prefix}_id", "value"]]
-            return multidict(df.values)
-        logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
-        ontology = get_ontology(prefix, force=force, version=version)
-        return ontology.get_filtered_properties_multimapping(prop, use_tqdm=use_tqdm)
-    return _mapping_getter()
-def get_property(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[str]:
+def get_property(
+    prefix: str, identifier: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
+) -> str | None:
     """Extract a single property for the given entity.
     :param prefix: the resource to load
@@ -152,7 +171,12 @@ def get_property(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[
     return filtered_properties_mapping.get(identifier)
-def get_properties(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[list[str]]:
+def get_properties(
+    prefix: str,
+    identifier: str,
+    prop: ReferenceHint,
+    **kwargs: Unpack[GetOntologyKwargs],
+) -> list[str] | None:
     """Extract a set of properties for the given entity.
     :param prefix: the resource to load
@@ -168,39 +192,15 @@ def get_properties(prefix: str, identifier: str, prop: str, **kwargs) -> Optiona
 @wrap_norm_prefix
 def get_filtered_properties_df(
-    prefix: str,
-    prop: str,
-    *,
-    use_tqdm: bool = False,
-    force: bool = False,
-    version: Optional[str] = None,
+    prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
 ) -> pd.DataFrame:
     """Extract a single property for each term.
     :param prefix: the resource to load
     :param prop: the property to extract
-    :param use_tqdm: should a progress bar be shown?
-    :param force: should the resource be re-downloaded, re-parsed, and re-cached?
     :returns: A dataframe from identifier to property value. Columns are [<prefix>_id, value].
     """
-    if version is None:
-        version = get_version(prefix)
-    path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
-    all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
-    @cached_df(path=path, dtype=str, force=force)
-    def _df_getter() -> pd.DataFrame:
-        if os.path.exists(all_properties_path):
-            logger.info("[%s] loading pre-cached properties", prefix)
-            df = pd.read_csv(all_properties_path, sep="\t")
-            logger.info("[%s] filtering pre-cached properties", prefix)
-            return df.loc[df["property"] == prop, [f"{prefix}_id", "value"]]
-        if force:
-            logger.info("[%s] forcing reload for properties", prefix)
-        else:
-            logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
-        ontology = get_ontology(prefix, force=force, version=version)
-        return ontology.get_filtered_properties_df(prop, use_tqdm=use_tqdm)
-    return _df_getter()
+    prop = _ensure_ref(prop, ontology_prefix=prefix)
+    df = get_properties_df(prefix, **kwargs)
+    df = df.loc[df["property"] == prop.curie, [f"{prefix}_id", "value"]]
+    return df

pyobo/api/relations.py CHANGED Viewed

@@ -1,15 +1,13 @@
 """High-level API for relations."""
 import logging
-import os
 from collections.abc import Mapping
 from functools import lru_cache
-from typing import Optional
-import networkx as nx
 import pandas as pd
+from typing_extensions import Unpack
-from .utils import get_version
+from .utils import get_version_from_kwargs
 from ..constants import (
     RELATION_COLUMNS,
     RELATION_ID,
@@ -18,50 +16,60 @@ from ..constants import (
     SOURCE_PREFIX,
     TARGET_ID,
     TARGET_PREFIX,
+    GetOntologyKwargs,
+    check_should_cache,
+    check_should_force,
+    check_should_use_tqdm,
 )
 from ..getters import get_ontology
 from ..identifier_utils import wrap_norm_prefix
-from ..struct import Reference, RelationHint, TypeDef, get_reference_tuple
+from ..struct.reference import Reference
+from ..struct.struct_utils import ReferenceHint, _ensure_ref
 from ..utils.cache import cached_df
-from ..utils.path import prefix_cache_join
+from ..utils.path import CacheArtifact, get_cache_path, get_relation_cache_path
 __all__ = [
-    "get_relations_df",
     "get_filtered_relations_df",
     "get_id_multirelations_mapping",
-    "get_relation_mapping",
     "get_relation",
-    "get_graph",
+    "get_relation_mapping",
+    "get_relations",
+    "get_relations_df",
 ]
-# TODO get_relation, get_relations
 logger = logging.getLogger(__name__)
+@wrap_norm_prefix
+def get_relations(
+    prefix: str, **kwargs: Unpack[GetOntologyKwargs]
+) -> list[tuple[Reference, Reference, Reference]]:
+    """Get relations."""
+    df = get_relations_df(prefix, wide=False, **kwargs)
+    return [
+        (
+            Reference(prefix=prefix, identifier=source_id),
+            Reference(prefix=relation_prefix, identifier=relation_id),
+            Reference(prefix=target_prefix, identifier=target_id),
+        )
+        for source_id, relation_prefix, relation_id, target_prefix, target_id in df.values
+    ]
 @wrap_norm_prefix
 def get_relations_df(
-    prefix: str,
-    *,
-    use_tqdm: bool = False,
-    force: bool = False,
-    wide: bool = False,
-    strict: bool = True,
-    version: Optional[str] = None,
+    prefix: str, *, wide: bool = False, **kwargs: Unpack[GetOntologyKwargs]
 ) -> pd.DataFrame:
     """Get all relations from the OBO."""
-    if version is None:
-        version = get_version(prefix)
-    path = prefix_cache_join(prefix, name="relations.tsv", version=version)
+    version = get_version_from_kwargs(prefix, kwargs)
+    path = get_cache_path(prefix, CacheArtifact.relations, version=version)
-    @cached_df(path=path, dtype=str, force=force)
+    @cached_df(
+        path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
+    )
     def _df_getter() -> pd.DataFrame:
-        if force:
-            logger.info("[%s] forcing reload for relations", prefix)
-        else:
-            logger.info("[%s] no cached relations found. getting from OBO loader", prefix)
-        ontology = get_ontology(prefix, force=force, version=version, strict=strict)
-        return ontology.get_relations_df(use_tqdm=use_tqdm)
+        ontology = get_ontology(prefix, **kwargs)
+        return ontology.get_relations_df(use_tqdm=check_should_use_tqdm(kwargs))
     rv = _df_getter()
@@ -76,38 +84,29 @@ def get_relations_df(
 @wrap_norm_prefix
 def get_filtered_relations_df(
     prefix: str,
-    relation: RelationHint,
-    *,
-    use_tqdm: bool = False,
-    force: bool = False,
-    version: Optional[str] = None,
+    relation: ReferenceHint,
+    **kwargs: Unpack[GetOntologyKwargs],
 ) -> pd.DataFrame:
     """Get all the given relation."""
-    relation_prefix, relation_identifier = relation = get_reference_tuple(relation)
-    if version is None:
-        version = get_version(prefix)
-    path = prefix_cache_join(
-        prefix,
-        "relations",
-        name=f"{relation_prefix}:{relation_identifier}.tsv",
-        version=version,
+    relation = _ensure_ref(relation, ontology_prefix=prefix)
+    version = get_version_from_kwargs(prefix, kwargs)
+    all_relations_path = get_cache_path(prefix, CacheArtifact.relations, version=version)
+    if all_relations_path.is_file():
+        logger.debug("[%] loading all relations from %s", prefix, all_relations_path)
+        df = pd.read_csv(all_relations_path, sep="\t", dtype=str)
+        idx = (df[RELATION_PREFIX] == relation.prefix) & (df[RELATION_ID] == relation.identifier)
+        columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID]
+        return df.loc[idx, columns]
+    path = get_relation_cache_path(prefix, relation, version=version)
+    @cached_df(
+        path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
     )
-    all_relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
-    @cached_df(path=path, dtype=str, force=force)
     def _df_getter() -> pd.DataFrame:
-        if os.path.exists(all_relations_path):
-            logger.debug("[%] loading all relations from %s", prefix, all_relations_path)
-            df = pd.read_csv(all_relations_path, sep="\t", dtype=str)
-            idx = (df[RELATION_PREFIX] == relation_prefix) & (
-                df[RELATION_ID] == relation_identifier
-            )
-            columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID]
-            return df.loc[idx, columns]
         logger.info("[%s] no cached relations found. getting from OBO loader", prefix)
-        ontology = get_ontology(prefix, force=force, version=version)
-        return ontology.get_filtered_relations_df(relation, use_tqdm=use_tqdm)
+        ontology = get_ontology(prefix, **kwargs)
+        return ontology.get_filtered_relations_df(relation, use_tqdm=check_should_use_tqdm(kwargs))
     return _df_getter()
@@ -115,29 +114,24 @@ def get_filtered_relations_df(
 @wrap_norm_prefix
 def get_id_multirelations_mapping(
     prefix: str,
-    typedef: TypeDef,
-    *,
-    use_tqdm: bool = False,
-    force: bool = False,
-    version: Optional[str] = None,
+    typedef: ReferenceHint,
+    **kwargs: Unpack[GetOntologyKwargs],
 ) -> Mapping[str, list[Reference]]:
     """Get the OBO file and output a synonym dictionary."""
-    if version is None:
-        version = get_version(prefix)
-    ontology = get_ontology(prefix, force=force, version=version)
-    return ontology.get_id_multirelations_mapping(typedef=typedef, use_tqdm=use_tqdm)
+    kwargs["version"] = get_version_from_kwargs(prefix, kwargs)
+    ontology = get_ontology(prefix, **kwargs)
+    return ontology.get_id_multirelations_mapping(
+        typedef=typedef, use_tqdm=check_should_use_tqdm(kwargs)
+    )
 @lru_cache
 @wrap_norm_prefix
 def get_relation_mapping(
     prefix: str,
-    relation: RelationHint,
+    relation: ReferenceHint,
     target_prefix: str,
-    *,
-    use_tqdm: bool = False,
-    force: bool = False,
-    version: Optional[str] = None,
+    **kwargs: Unpack[GetOntologyKwargs],
 ) -> Mapping[str, str]:
     """Get relations from identifiers in the source prefix to target prefix with the given relation.
@@ -151,11 +145,9 @@ def get_relation_mapping(
     >>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping("hgnc", "ro:HOM0000017", "mgi")
     >>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id]
     """
-    if version is None:
-        version = get_version(prefix)
-    ontology = get_ontology(prefix, force=force, version=version)
+    ontology = get_ontology(prefix, **kwargs)
     return ontology.get_relation_mapping(
-        relation=relation, target_prefix=target_prefix, use_tqdm=use_tqdm
+        relation=relation, target_prefix=target_prefix, use_tqdm=check_should_use_tqdm(kwargs)
     )
@@ -163,13 +155,10 @@ def get_relation_mapping(
 def get_relation(
     prefix: str,
     source_identifier: str,
-    relation: RelationHint,
+    relation: ReferenceHint,
     target_prefix: str,
-    *,
-    use_tqdm: bool = False,
-    force: bool = False,
-    **kwargs,
-) -> Optional[str]:
+    **kwargs: Unpack[GetOntologyKwargs],
+) -> str | None:
     """Get the target identifier corresponding to the given relationship from the source prefix/identifier pair.
     .. warning:: Assumes there's only one version of the property for each term.
@@ -187,21 +176,6 @@ def get_relation(
         prefix=prefix,
         relation=relation,
         target_prefix=target_prefix,
-        use_tqdm=use_tqdm,
-        force=force,
         **kwargs,
     )
     return relation_mapping.get(source_identifier)
-def get_graph(prefix: str, **kwargs) -> nx.DiGraph:
-    """Get the relation graph."""
-    rv = nx.MultiDiGraph()
-    df = get_relations_df(prefix=prefix, **kwargs)
-    for source_id, relation_prefix, relation_id, target_ns, target_id in df.values:
-        rv.add_edge(
-            f"{prefix}:{source_id}",
-            f"{target_ns}:{target_id}",
-            key=f"{relation_prefix}:{relation_id}",
-        )
-    return rv

pyobo/api/species.py CHANGED Viewed

@@ -3,14 +3,17 @@
 import logging
 from collections.abc import Mapping
 from functools import lru_cache
-from typing import Optional
+import curies
+from typing_extensions import Unpack
 from .alts import get_primary_identifier
-from .utils import get_version
+from .utils import _get_pi, get_version_from_kwargs
+from ..constants import GetOntologyKwargs, check_should_force
 from ..getters import NoBuildError, get_ontology
 from ..identifier_utils import wrap_norm_prefix
 from ..utils.cache import cached_mapping
-from ..utils.path import prefix_cache_join
+from ..utils.path import CacheArtifact, get_cache_path
 __all__ = [
     "get_id_species_mapping",
@@ -20,34 +23,35 @@ __all__ = [
 logger = logging.getLogger(__name__)
-@wrap_norm_prefix
-def get_species(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
+def get_species(
+    prefix: str | curies.Reference | curies.ReferenceTuple,
+    identifier: str | None = None,
+    /,
+    **kwargs: Unpack[GetOntologyKwargs],
+) -> str | None:
     """Get the species."""
-    if prefix == "uniprot":
+    t = _get_pi(prefix, identifier)
+    if t.prefix == "uniprot":
         raise NotImplementedError
     try:
-        id_species = get_id_species_mapping(prefix, version=version)
+        id_species = get_id_species_mapping(t.prefix, **kwargs)
     except NoBuildError:
-        logger.warning("unable to look up species for prefix %s", prefix)
+        logger.warning("unable to look up species for prefix %s", t.prefix)
         return None
     if not id_species:
-        logger.warning("no results produced for prefix %s", prefix)
+        logger.warning("no results produced for prefix %s", t.prefix)
         return None
-    primary_id = get_primary_identifier(prefix, identifier, version=version)
+    primary_id = get_primary_identifier(t, **kwargs)
     return id_species.get(primary_id)
 @lru_cache
 @wrap_norm_prefix
-def get_id_species_mapping(
-    prefix: str,
-    force: bool = False,
-    strict: bool = True,
-    version: Optional[str] = None,
-) -> Mapping[str, str]:
+def get_id_species_mapping(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> Mapping[str, str]:
     """Get an identifier to species mapping."""
     if prefix == "ncbigene":
         from ..sources.ncbigene import get_ncbigene_id_to_species_mapping
@@ -57,14 +61,13 @@ def get_id_species_mapping(
         logger.info("[%s] done loading species mappings", prefix)
         return rv
-    if version is None:
-        version = get_version(prefix)
-    path = prefix_cache_join(prefix, name="species.tsv", version=version)
+    version = get_version_from_kwargs(prefix, kwargs)
+    path = get_cache_path(prefix, CacheArtifact.species, version=version)
-    @cached_mapping(path=path, header=[f"{prefix}_id", "species"], force=force)
+    @cached_mapping(path=path, header=[f"{prefix}_id", "species"], force=check_should_force(kwargs))
     def _get_id_species_mapping() -> Mapping[str, str]:
         logger.info("[%s] no cached species found. getting from OBO loader", prefix)
-        ontology = get_ontology(prefix, force=force, strict=strict, version=version)
+        ontology = get_ontology(prefix, **kwargs)
         logger.info("[%s] loading species mappings", prefix)
         return ontology.get_id_species_mapping()

pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

pyobo 0.11.2py3-none-any.whl → 0.12.1py3-none-any.whl