pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/api/typedefs.py
CHANGED
|
@@ -2,15 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from functools import lru_cache
|
|
5
|
-
from typing import Optional
|
|
6
5
|
|
|
7
6
|
import pandas as pd
|
|
7
|
+
from typing_extensions import Unpack
|
|
8
8
|
|
|
9
|
-
from .utils import
|
|
9
|
+
from .utils import get_version_from_kwargs
|
|
10
|
+
from ..constants import GetOntologyKwargs, check_should_cache, check_should_force
|
|
10
11
|
from ..getters import get_ontology
|
|
11
12
|
from ..identifier_utils import wrap_norm_prefix
|
|
12
13
|
from ..utils.cache import cached_df
|
|
13
|
-
from ..utils.path import
|
|
14
|
+
from ..utils.path import CacheArtifact, get_cache_path
|
|
14
15
|
|
|
15
16
|
__all__ = [
|
|
16
17
|
"get_typedef_df",
|
|
@@ -21,18 +22,17 @@ logger = logging.getLogger(__name__)
|
|
|
21
22
|
|
|
22
23
|
@lru_cache
|
|
23
24
|
@wrap_norm_prefix
|
|
24
|
-
def get_typedef_df(
|
|
25
|
-
prefix: str, *, force: bool = False, version: Optional[str] = None
|
|
26
|
-
) -> pd.DataFrame:
|
|
25
|
+
def get_typedef_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
|
|
27
26
|
"""Get an identifier to name mapping for the typedefs in an OBO file."""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
path = prefix_cache_join(prefix, name="typedefs.tsv", version=version)
|
|
27
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
28
|
+
path = get_cache_path(prefix, CacheArtifact.typedefs, version=version)
|
|
31
29
|
|
|
32
|
-
@cached_df(
|
|
30
|
+
@cached_df(
|
|
31
|
+
path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
|
|
32
|
+
)
|
|
33
33
|
def _df_getter() -> pd.DataFrame:
|
|
34
34
|
logger.debug("[%s] no cached typedefs found. getting from OBO loader", prefix)
|
|
35
|
-
ontology = get_ontology(prefix,
|
|
35
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
36
36
|
logger.debug("[%s] loading typedef mappings", prefix)
|
|
37
37
|
return ontology.get_typedef_df()
|
|
38
38
|
|
pyobo/api/utils.py
CHANGED
|
@@ -3,18 +3,23 @@
|
|
|
3
3
|
import json
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
|
+
import warnings
|
|
6
7
|
from functools import lru_cache
|
|
7
|
-
from typing import
|
|
8
|
+
from typing import Literal, overload
|
|
8
9
|
|
|
9
10
|
import bioversions
|
|
11
|
+
import curies
|
|
12
|
+
from bioregistry import NormalizedNamableReference as Reference
|
|
13
|
+
from curies import ReferenceTuple
|
|
10
14
|
|
|
15
|
+
from ..constants import GetOntologyKwargs
|
|
11
16
|
from ..utils.path import prefix_directory_join
|
|
12
17
|
|
|
13
18
|
__all__ = [
|
|
14
|
-
"
|
|
19
|
+
"VersionError",
|
|
15
20
|
"get_version",
|
|
16
21
|
"get_version_pins",
|
|
17
|
-
"
|
|
22
|
+
"safe_get_version",
|
|
18
23
|
]
|
|
19
24
|
|
|
20
25
|
logger = logging.getLogger(__name__)
|
|
@@ -24,11 +29,25 @@ class VersionError(ValueError):
|
|
|
24
29
|
"""A catch-all for version getting failure."""
|
|
25
30
|
|
|
26
31
|
|
|
27
|
-
|
|
32
|
+
# docstr-coverage:excused `overload`
|
|
33
|
+
@overload
|
|
34
|
+
def get_version(prefix: str, *, strict: Literal[True] = True) -> str: ...
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# docstr-coverage:excused `overload`
|
|
38
|
+
@overload
|
|
39
|
+
def get_version(prefix: str, *, strict: Literal[False] = False) -> str | None: ...
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_version(prefix: str, *, strict: bool = False) -> str | None:
|
|
28
43
|
"""Get the version for the resource, if available.
|
|
29
44
|
|
|
30
45
|
:param prefix: the resource name
|
|
31
|
-
:
|
|
46
|
+
:param strict: Should an error be raised if no version is available?
|
|
47
|
+
|
|
48
|
+
:returns: The version if available else None
|
|
49
|
+
|
|
50
|
+
:raises VersionError: if the version is not available and strict mode is enabled
|
|
32
51
|
"""
|
|
33
52
|
# Prioritize loaded environment variable PYOBO_VERSION_PINS dictionary
|
|
34
53
|
version = get_version_pins().get(prefix)
|
|
@@ -47,13 +66,27 @@ def get_version(prefix: str) -> Optional[str]:
|
|
|
47
66
|
metadata_json_path = prefix_directory_join(prefix, name="metadata.json", ensure_exists=False)
|
|
48
67
|
if metadata_json_path.exists():
|
|
49
68
|
data = json.loads(metadata_json_path.read_text())
|
|
50
|
-
|
|
69
|
+
version = data["version"]
|
|
70
|
+
if version:
|
|
71
|
+
return version
|
|
72
|
+
|
|
73
|
+
if strict:
|
|
74
|
+
raise ValueError
|
|
51
75
|
|
|
52
76
|
return None
|
|
53
77
|
|
|
54
78
|
|
|
79
|
+
def get_version_from_kwargs(prefix: str, kwargs: GetOntologyKwargs) -> str | None:
|
|
80
|
+
"""Get the version for the resource based on generic keyword arguments."""
|
|
81
|
+
if version := kwargs.get("version"):
|
|
82
|
+
return version
|
|
83
|
+
# it's okay if none gets returned after getting this far, we at least tried
|
|
84
|
+
return get_version(prefix, strict=False)
|
|
85
|
+
|
|
86
|
+
|
|
55
87
|
def safe_get_version(prefix: str) -> str:
|
|
56
88
|
"""Get the version."""
|
|
89
|
+
# FIXME replace with get_version(prefix, strict=True)
|
|
57
90
|
v = get_version(prefix)
|
|
58
91
|
if v is None:
|
|
59
92
|
raise ValueError
|
|
@@ -65,13 +98,12 @@ def get_version_pins() -> dict[str, str]:
|
|
|
65
98
|
"""Retrieve user-defined resource version pins.
|
|
66
99
|
|
|
67
100
|
To set your own resource pins, set your machine's environmental variable
|
|
68
|
-
"PYOBO_VERSION_PINS" to a JSON string containing string resource prefixes
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
the
|
|
73
|
-
|
|
74
|
-
applications that rely on PyOBO.
|
|
101
|
+
"PYOBO_VERSION_PINS" to a JSON string containing string resource prefixes as keys
|
|
102
|
+
and string versions of their respective resource as values. Constraining version
|
|
103
|
+
pins will make PyOBO rely on cached versions of a resource. A user might want to pin
|
|
104
|
+
resource versions that are used by PyOBO due to the fact that PyOBO will download
|
|
105
|
+
the latest version of a resource if it is not pinned. This downloading process can
|
|
106
|
+
lead to a slow-down in downstream applications that rely on PyOBO.
|
|
75
107
|
"""
|
|
76
108
|
version_pins_str = os.getenv("PYOBO_VERSION_PINS")
|
|
77
109
|
if not version_pins_str:
|
|
@@ -102,3 +134,24 @@ def get_version_pins() -> dict[str, str]:
|
|
|
102
134
|
f"name."
|
|
103
135
|
)
|
|
104
136
|
return version_pins
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _get_pi(
|
|
140
|
+
prefix: str | curies.Reference | ReferenceTuple, identifier: str | None = None, /
|
|
141
|
+
) -> Reference:
|
|
142
|
+
if isinstance(prefix, ReferenceTuple | curies.Reference):
|
|
143
|
+
if identifier is not None:
|
|
144
|
+
raise ValueError("unexpected non-none value passed as second positional argument")
|
|
145
|
+
return Reference(prefix=prefix.prefix, identifier=prefix.identifier)
|
|
146
|
+
if isinstance(prefix, str) and identifier is None:
|
|
147
|
+
return Reference.from_curie(prefix)
|
|
148
|
+
if identifier is None:
|
|
149
|
+
raise ValueError(
|
|
150
|
+
"prefix was given as a string, so an identifier was expected to be passed as a string as well"
|
|
151
|
+
)
|
|
152
|
+
warnings.warn(
|
|
153
|
+
"Passing a prefix and identifier as seperate arguments is deprecated. Please pass a curies.Reference or curies.ReferenceTuple in the first positional-only argument instead.",
|
|
154
|
+
DeprecationWarning,
|
|
155
|
+
stacklevel=4, # this is 4 since this is (always?) called from inside a decorator
|
|
156
|
+
)
|
|
157
|
+
return Reference(prefix=prefix, identifier=identifier)
|
pyobo/api/xrefs.py
CHANGED
|
@@ -1,28 +1,36 @@
|
|
|
1
1
|
"""High-level API for synonyms."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
import warnings
|
|
4
5
|
from collections.abc import Mapping
|
|
5
6
|
from functools import lru_cache
|
|
6
|
-
from typing import Optional, Union
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
9
|
-
from
|
|
10
|
-
from
|
|
11
|
-
|
|
12
|
-
from .utils import
|
|
13
|
-
from ..constants import
|
|
9
|
+
from curies import ReferenceTuple
|
|
10
|
+
from typing_extensions import Unpack
|
|
11
|
+
|
|
12
|
+
from .utils import get_version_from_kwargs
|
|
13
|
+
from ..constants import (
|
|
14
|
+
TARGET_ID,
|
|
15
|
+
TARGET_PREFIX,
|
|
16
|
+
GetOntologyKwargs,
|
|
17
|
+
check_should_cache,
|
|
18
|
+
check_should_force,
|
|
19
|
+
check_should_use_tqdm,
|
|
20
|
+
)
|
|
14
21
|
from ..getters import get_ontology
|
|
15
22
|
from ..identifier_utils import wrap_norm_prefix
|
|
16
|
-
from ..struct import Obo
|
|
17
|
-
from ..utils.cache import cached_df
|
|
18
|
-
from ..utils.path import
|
|
23
|
+
from ..struct import Obo
|
|
24
|
+
from ..utils.cache import cached_df
|
|
25
|
+
from ..utils.path import CacheArtifact, get_cache_path
|
|
19
26
|
|
|
20
27
|
__all__ = [
|
|
21
|
-
"get_xrefs_df",
|
|
22
28
|
"get_filtered_xrefs",
|
|
29
|
+
"get_mappings_df",
|
|
30
|
+
"get_sssom_df",
|
|
23
31
|
"get_xref",
|
|
24
32
|
"get_xrefs",
|
|
25
|
-
"
|
|
33
|
+
"get_xrefs_df",
|
|
26
34
|
]
|
|
27
35
|
|
|
28
36
|
logger = logging.getLogger(__name__)
|
|
@@ -35,10 +43,10 @@ def get_xref(
|
|
|
35
43
|
new_prefix: str,
|
|
36
44
|
*,
|
|
37
45
|
flip: bool = False,
|
|
38
|
-
|
|
39
|
-
) ->
|
|
46
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
47
|
+
) -> str | None:
|
|
40
48
|
"""Get the xref with the new prefix if a direct path exists."""
|
|
41
|
-
filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip,
|
|
49
|
+
filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip, **kwargs)
|
|
42
50
|
return filtered_xrefs.get(identifier)
|
|
43
51
|
|
|
44
52
|
|
|
@@ -49,32 +57,18 @@ def get_filtered_xrefs(
|
|
|
49
57
|
xref_prefix: str,
|
|
50
58
|
*,
|
|
51
59
|
flip: bool = False,
|
|
52
|
-
|
|
53
|
-
force: bool = False,
|
|
54
|
-
strict: bool = False,
|
|
55
|
-
version: Optional[str] = None,
|
|
60
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
56
61
|
) -> Mapping[str, str]:
|
|
57
62
|
"""Get xrefs to a given target."""
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
logger.info("[%s] loading pre-cached xrefs", prefix)
|
|
68
|
-
df = pd.read_csv(all_xrefs_path, sep="\t", dtype=str)
|
|
69
|
-
logger.info("[%s] filtering pre-cached xrefs", prefix)
|
|
70
|
-
df = df.loc[df[TARGET_PREFIX] == xref_prefix, [f"{prefix}_id", TARGET_ID]]
|
|
71
|
-
return dict(df.values)
|
|
72
|
-
|
|
73
|
-
logger.info("[%s] no cached xrefs found. getting from OBO loader", prefix)
|
|
74
|
-
ontology = get_ontology(prefix, force=force, strict=strict, version=version)
|
|
75
|
-
return ontology.get_filtered_xrefs_mapping(xref_prefix, use_tqdm=use_tqdm)
|
|
76
|
-
|
|
77
|
-
rv = _get_mapping()
|
|
63
|
+
mappings_df = get_mappings_df(prefix, **kwargs)
|
|
64
|
+
|
|
65
|
+
rv = {}
|
|
66
|
+
for subject_curie, object_curie in mappings_df[["subject_id", "object_id"]].values:
|
|
67
|
+
subject_pair = ReferenceTuple.from_curie(subject_curie)
|
|
68
|
+
object_pair = ReferenceTuple.from_curie(object_curie)
|
|
69
|
+
if object_pair.prefix == xref_prefix:
|
|
70
|
+
rv[subject_pair.identifier] = object_pair.identifier
|
|
71
|
+
|
|
78
72
|
if flip:
|
|
79
73
|
return {v: k for k, v in rv.items()}
|
|
80
74
|
return rv
|
|
@@ -84,104 +78,104 @@ get_xrefs = get_filtered_xrefs
|
|
|
84
78
|
|
|
85
79
|
|
|
86
80
|
@wrap_norm_prefix
|
|
87
|
-
def get_xrefs_df(
|
|
88
|
-
prefix: str,
|
|
89
|
-
*,
|
|
90
|
-
use_tqdm: bool = False,
|
|
91
|
-
force: bool = False,
|
|
92
|
-
strict: bool = False,
|
|
93
|
-
version: Optional[str] = None,
|
|
94
|
-
) -> pd.DataFrame:
|
|
81
|
+
def get_xrefs_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
|
|
95
82
|
"""Get all xrefs."""
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
83
|
+
warnings.warn(
|
|
84
|
+
f"use pyobo.get_mappings_df instead of pyobo.get_xrefs_df."
|
|
85
|
+
f"Not using cache artifact path to {CacheArtifact.xrefs}",
|
|
86
|
+
DeprecationWarning,
|
|
87
|
+
stacklevel=2,
|
|
88
|
+
)
|
|
99
89
|
|
|
100
|
-
|
|
101
|
-
def _df_getter() -> pd.DataFrame:
|
|
102
|
-
logger.info("[%s] no cached xrefs found. getting from OBO loader", prefix)
|
|
103
|
-
ontology = get_ontology(prefix, force=force, strict=strict, version=version)
|
|
104
|
-
return ontology.get_xrefs_df(use_tqdm=use_tqdm)
|
|
90
|
+
mappings_df = get_mappings_df(prefix, **kwargs)
|
|
105
91
|
|
|
106
|
-
|
|
92
|
+
rows = []
|
|
93
|
+
for subject_curie, object_curie in mappings_df[["subject_id", "object_id"]].values:
|
|
94
|
+
subject_pair = ReferenceTuple.from_curie(subject_curie)
|
|
95
|
+
object_pair = ReferenceTuple.from_curie(object_curie)
|
|
96
|
+
rows.append((subject_pair.identifier, object_pair.prefix, object_pair.identifier))
|
|
97
|
+
|
|
98
|
+
df = pd.DataFrame(rows, columns=[f"{prefix}_id", TARGET_PREFIX, TARGET_ID])
|
|
99
|
+
df = df.drop_duplicates()
|
|
100
|
+
return df
|
|
107
101
|
|
|
108
102
|
|
|
109
103
|
def get_sssom_df(
|
|
110
|
-
prefix:
|
|
104
|
+
prefix: str | Obo, *, names: bool = True, **kwargs: Unpack[GetOntologyKwargs]
|
|
105
|
+
) -> pd.DataFrame:
|
|
106
|
+
"""Get an SSSOM dataframe, replaced by :func:`get_mappings_df`."""
|
|
107
|
+
warnings.warn("get_sssom_df was renamed to get_mappings_df", DeprecationWarning, stacklevel=2)
|
|
108
|
+
return get_mappings_df(prefix=prefix, names=names, **kwargs)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def get_mappings_df(
|
|
112
|
+
prefix: str | Obo,
|
|
111
113
|
*,
|
|
112
|
-
predicate_id: str = "oboinowl:hasDbXref",
|
|
113
|
-
justification: str = "sempav:UnspecifiedMatching",
|
|
114
114
|
names: bool = True,
|
|
115
|
-
|
|
115
|
+
include_mapping_source_column: bool = False,
|
|
116
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
116
117
|
) -> pd.DataFrame:
|
|
117
|
-
r"""Get
|
|
118
|
+
r"""Get semantic mappings from a source as an SSSOM dataframe.
|
|
118
119
|
|
|
119
120
|
:param prefix: The ontology to look in for xrefs
|
|
120
|
-
:param predicate_id: The predicate used in the SSSOM document. By default, ontologies
|
|
121
|
-
don't typically ascribe semantics to xrefs so ``oboinowl:hasDbXref`` is used
|
|
122
|
-
:param justification: The justification for the mapping. By default, ontologies
|
|
123
|
-
don't typically ascribe semantics, so this is left with `sempav:UnspecifiedMatching`
|
|
124
121
|
:param names: Add name columns (``subject_label`` and ``object_label``)
|
|
122
|
+
|
|
125
123
|
:returns: A SSSOM-compliant dataframe of xrefs
|
|
126
124
|
|
|
127
125
|
For example, if you want to get UMLS as an SSSOM dataframe, you can do
|
|
128
126
|
|
|
129
|
-
|
|
130
|
-
>>> df = pyobo.get_sssom_df("umls")
|
|
131
|
-
>>> df.to_csv("umls.sssom.tsv", sep="\t", index=False)
|
|
127
|
+
.. code-block:: python
|
|
132
128
|
|
|
133
|
-
|
|
134
|
-
names, you can pass ``names=False``
|
|
129
|
+
import pyobo
|
|
135
130
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
>>> df.to_csv("umls.sssom.tsv", sep="\t", index=False)
|
|
131
|
+
df = pyobo.get_mappings_df("umls")
|
|
132
|
+
df.to_csv("umls.sssom.tsv", sep="\t", index=False)
|
|
139
133
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
134
|
+
If you don't want to get all of the many resources required to add names, you can
|
|
135
|
+
pass ``names=False``
|
|
136
|
+
|
|
137
|
+
.. code-block:: python
|
|
138
|
+
|
|
139
|
+
import pyobo
|
|
140
|
+
|
|
141
|
+
df = pyobo.get_mappings_df("umls", names=False)
|
|
142
|
+
df.to_csv("umls.sssom.tsv", sep="\t", index=False)
|
|
143
143
|
|
|
144
|
+
.. note::
|
|
145
|
+
|
|
146
|
+
This assumes the Bioregistry as the prefix map
|
|
147
|
+
"""
|
|
144
148
|
if isinstance(prefix, Obo):
|
|
145
|
-
df = prefix.
|
|
149
|
+
df = prefix.get_mappings_df(
|
|
150
|
+
include_subject_labels=names,
|
|
151
|
+
include_mapping_source_column=include_mapping_source_column,
|
|
152
|
+
use_tqdm=check_should_use_tqdm(kwargs),
|
|
153
|
+
)
|
|
146
154
|
prefix = prefix.ontology
|
|
155
|
+
|
|
147
156
|
else:
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
predicate_id,
|
|
165
|
-
justification,
|
|
166
|
-
)
|
|
167
|
-
)
|
|
168
|
-
else:
|
|
169
|
-
rows.append((source.curie, target.curie, predicate_id, justification))
|
|
157
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
158
|
+
path = get_cache_path(prefix, CacheArtifact.mappings, version=version)
|
|
159
|
+
|
|
160
|
+
@cached_df(
|
|
161
|
+
path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
|
|
162
|
+
)
|
|
163
|
+
def _df_getter() -> pd.DataFrame:
|
|
164
|
+
logger.info("[%s] rebuilding SSSOM", prefix)
|
|
165
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
166
|
+
return ontology.get_mappings_df(
|
|
167
|
+
use_tqdm=check_should_use_tqdm(kwargs),
|
|
168
|
+
include_subject_labels=True,
|
|
169
|
+
include_mapping_source_column=include_mapping_source_column,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
df = _df_getter()
|
|
170
173
|
|
|
171
174
|
if names:
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
]
|
|
180
|
-
else:
|
|
181
|
-
columns = [
|
|
182
|
-
"subject_id",
|
|
183
|
-
"object_id",
|
|
184
|
-
"predicate_id",
|
|
185
|
-
"mapping_justification",
|
|
186
|
-
]
|
|
187
|
-
return pd.DataFrame(rows, columns=columns)
|
|
175
|
+
from .names import get_name_by_curie
|
|
176
|
+
|
|
177
|
+
df["object_label"] = df["object_id"].map(get_name_by_curie)
|
|
178
|
+
elif "subject_label" in df.columns:
|
|
179
|
+
del df["subject_label"]
|
|
180
|
+
|
|
181
|
+
return df
|
pyobo/cli/__init__.py
CHANGED
|
File without changes
|
pyobo/cli/cli.py
CHANGED
|
@@ -2,23 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
-
import
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from functools import lru_cache
|
|
6
7
|
from operator import itemgetter
|
|
7
8
|
|
|
9
|
+
import bioregistry
|
|
8
10
|
import click
|
|
9
11
|
import humanize
|
|
10
|
-
from more_click import verbose_option
|
|
11
12
|
from tabulate import tabulate
|
|
12
13
|
|
|
13
|
-
from .aws import main as aws_main
|
|
14
14
|
from .database import main as database_main
|
|
15
15
|
from .lookup import lookup
|
|
16
|
-
from ..constants import RAW_DIRECTORY
|
|
17
|
-
from ..plugins import has_nomenclature_plugin
|
|
18
|
-
from ..registries import iter_cached_obo
|
|
19
|
-
from ..utils.io import get_writer
|
|
20
|
-
from ..xrefdb.canonicalizer import Canonicalizer, get_priority_curie, remap_file_stream
|
|
21
|
-
from ..xrefdb.priority import DEFAULT_PRIORITY_LIST
|
|
16
|
+
from ..constants import GLOBAL_SKIP, RAW_DIRECTORY
|
|
17
|
+
from ..plugins import has_nomenclature_plugin
|
|
22
18
|
|
|
23
19
|
__all__ = ["main"]
|
|
24
20
|
|
|
@@ -31,36 +27,6 @@ def main():
|
|
|
31
27
|
"""CLI for PyOBO."""
|
|
32
28
|
|
|
33
29
|
|
|
34
|
-
_ORDERING_TEXT = ", ".join(f"{i}) {x}" for i, x in enumerate(DEFAULT_PRIORITY_LIST, start=1))
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
@main.command(help=f"Prioritize a CURIE from ordering: {_ORDERING_TEXT}")
|
|
38
|
-
@click.argument("curie")
|
|
39
|
-
def prioritize(curie: str):
|
|
40
|
-
"""Prioritize a CURIE."""
|
|
41
|
-
priority_curie = get_priority_curie(curie)
|
|
42
|
-
click.secho(priority_curie)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
@main.command()
|
|
46
|
-
@click.option("-i", "--file-in", type=click.File("r"), default=sys.stdin)
|
|
47
|
-
@click.option("-o", "--file-out", type=click.File("w"), default=sys.stdout)
|
|
48
|
-
@click.option("--column", type=int, default=0, show_default=True)
|
|
49
|
-
@click.option("--sep", default="\t", show_default=True)
|
|
50
|
-
def recurify(file_in, file_out, column: int, sep: str):
|
|
51
|
-
"""Remap a column in a given file stream."""
|
|
52
|
-
remap_file_stream(file_in=file_in, file_out=file_out, column=column, sep=sep)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
@main.command()
|
|
56
|
-
@verbose_option
|
|
57
|
-
def cache():
|
|
58
|
-
"""Cache all resources."""
|
|
59
|
-
for obo in iter_nomenclature_plugins():
|
|
60
|
-
click.secho(f"Caching {obo.ontology}", bold=True, fg="green")
|
|
61
|
-
obo.write_default()
|
|
62
|
-
|
|
63
|
-
|
|
64
30
|
@main.command()
|
|
65
31
|
@click.option("--remove-obo", is_flag=True)
|
|
66
32
|
def clean(remove_obo: bool):
|
|
@@ -93,7 +59,7 @@ def clean(remove_obo: bool):
|
|
|
93
59
|
@main.command()
|
|
94
60
|
def ls():
|
|
95
61
|
"""List how big all of the OBO files are."""
|
|
96
|
-
entries = [(prefix, os.path.getsize(path)) for prefix, path in
|
|
62
|
+
entries = [(prefix, os.path.getsize(path)) for prefix, path in _iter_cached_obo()]
|
|
97
63
|
entries = [
|
|
98
64
|
(prefix, humanize.naturalsize(size), "✅" if not has_nomenclature_plugin(prefix) else "❌")
|
|
99
65
|
for prefix, size in sorted(entries, key=itemgetter(1), reverse=True)
|
|
@@ -101,19 +67,38 @@ def ls():
|
|
|
101
67
|
click.echo(tabulate(entries, headers=["Source", "Size", "OBO"]))
|
|
102
68
|
|
|
103
69
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
70
|
+
def _iter_cached_obo() -> Iterable[tuple[str, str]]:
|
|
71
|
+
"""Iterate over cached OBO paths."""
|
|
72
|
+
for prefix in os.listdir(RAW_DIRECTORY):
|
|
73
|
+
if prefix in GLOBAL_SKIP or _has_no_download(prefix) or bioregistry.is_deprecated(prefix):
|
|
74
|
+
continue
|
|
75
|
+
d = RAW_DIRECTORY.joinpath(prefix)
|
|
76
|
+
if not os.path.isdir(d):
|
|
77
|
+
continue
|
|
78
|
+
for x in os.listdir(d):
|
|
79
|
+
if x.endswith(".obo"):
|
|
80
|
+
p = os.path.join(d, x)
|
|
81
|
+
yield prefix, p
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _has_no_download(prefix: str) -> bool:
|
|
85
|
+
"""Return if the prefix is not available."""
|
|
86
|
+
prefix_norm = bioregistry.normalize_prefix(prefix)
|
|
87
|
+
return prefix_norm is not None and prefix_norm in _no_download()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@lru_cache(maxsize=1)
|
|
91
|
+
def _no_download() -> set[str]:
|
|
92
|
+
"""Get the list of prefixes not available as OBO."""
|
|
93
|
+
return {
|
|
94
|
+
prefix
|
|
95
|
+
for prefix in bioregistry.read_registry()
|
|
96
|
+
if bioregistry.get_obo_download(prefix) is None
|
|
97
|
+
and bioregistry.get_owl_download(prefix) is None
|
|
98
|
+
}
|
|
113
99
|
|
|
114
100
|
|
|
115
101
|
main.add_command(lookup)
|
|
116
|
-
main.add_command(aws_main)
|
|
117
102
|
main.add_command(database_main)
|
|
118
103
|
|
|
119
104
|
if __name__ == "__main__":
|