pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/api/properties.py
CHANGED
|
@@ -1,140 +1,163 @@
|
|
|
1
1
|
"""High-level API for properties."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
import os
|
|
5
4
|
from collections.abc import Mapping
|
|
6
|
-
from typing import Optional
|
|
7
5
|
|
|
8
6
|
import pandas as pd
|
|
9
|
-
|
|
10
|
-
from
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
from typing_extensions import Unpack
|
|
9
|
+
|
|
10
|
+
from .utils import get_version_from_kwargs
|
|
11
|
+
from ..constants import (
|
|
12
|
+
GetOntologyKwargs,
|
|
13
|
+
check_should_cache,
|
|
14
|
+
check_should_force,
|
|
15
|
+
check_should_use_tqdm,
|
|
16
|
+
)
|
|
11
17
|
from ..getters import get_ontology
|
|
12
18
|
from ..identifier_utils import wrap_norm_prefix
|
|
13
|
-
from ..
|
|
19
|
+
from ..struct import Reference
|
|
20
|
+
from ..struct.struct_utils import OBOLiteral, ReferenceHint, _ensure_ref
|
|
21
|
+
from ..utils.cache import cached_df
|
|
14
22
|
from ..utils.io import multidict
|
|
15
|
-
from ..utils.path import
|
|
23
|
+
from ..utils.path import CacheArtifact, get_cache_path
|
|
16
24
|
|
|
17
25
|
__all__ = [
|
|
18
|
-
"get_properties_df",
|
|
19
26
|
"get_filtered_properties_df",
|
|
20
27
|
"get_filtered_properties_mapping",
|
|
21
28
|
"get_filtered_properties_multimapping",
|
|
22
|
-
"
|
|
29
|
+
"get_literal_properties",
|
|
30
|
+
"get_literal_properties_df",
|
|
31
|
+
"get_object_properties",
|
|
32
|
+
"get_object_properties_df",
|
|
23
33
|
"get_properties",
|
|
34
|
+
"get_properties_df",
|
|
35
|
+
"get_property",
|
|
24
36
|
]
|
|
25
37
|
|
|
26
38
|
logger = logging.getLogger(__name__)
|
|
27
39
|
|
|
28
40
|
|
|
41
|
+
def get_object_properties_df(prefix, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
|
|
42
|
+
"""Get a dataframe of object property triples."""
|
|
43
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
44
|
+
path = get_cache_path(prefix, CacheArtifact.object_properties, version=version)
|
|
45
|
+
|
|
46
|
+
@cached_df(
|
|
47
|
+
path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
|
|
48
|
+
)
|
|
49
|
+
def _df_getter() -> pd.DataFrame:
|
|
50
|
+
return get_ontology(prefix, **kwargs).get_object_properties_df(
|
|
51
|
+
use_tqdm=check_should_use_tqdm(kwargs)
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
return _df_getter()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_object_properties(
|
|
58
|
+
prefix, **kwargs: Unpack[GetOntologyKwargs]
|
|
59
|
+
) -> list[tuple[Reference, Reference, Reference]]:
|
|
60
|
+
"""Get a list of object property triples."""
|
|
61
|
+
df = get_object_properties_df(prefix, **kwargs)
|
|
62
|
+
return [
|
|
63
|
+
(Reference.from_curie(s), Reference.from_curie(p), Reference.from_curie(o))
|
|
64
|
+
for s, p, o in df.values
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_literal_properties(
|
|
69
|
+
prefix: str, **kwargs: Unpack[GetOntologyKwargs]
|
|
70
|
+
) -> list[tuple[Reference, Reference, OBOLiteral]]:
|
|
71
|
+
"""Get a list of literal property triples."""
|
|
72
|
+
df = get_literal_properties_df(prefix, **kwargs)
|
|
73
|
+
return [
|
|
74
|
+
(
|
|
75
|
+
Reference.from_curie(s),
|
|
76
|
+
Reference.from_curie(p),
|
|
77
|
+
OBOLiteral(
|
|
78
|
+
value,
|
|
79
|
+
Reference.from_curie(datatype),
|
|
80
|
+
language if language and pd.notna(language) else None,
|
|
81
|
+
),
|
|
82
|
+
)
|
|
83
|
+
for s, p, value, datatype, language in tqdm(
|
|
84
|
+
df.values,
|
|
85
|
+
desc=f"[{prefix}] parsing properties",
|
|
86
|
+
unit_scale=True,
|
|
87
|
+
unit="triple",
|
|
88
|
+
disable=not check_should_use_tqdm(kwargs),
|
|
89
|
+
)
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_literal_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
|
|
94
|
+
"""Get a dataframe of literal property quads."""
|
|
95
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
96
|
+
path = get_cache_path(prefix, CacheArtifact.literal_properties, version=version)
|
|
97
|
+
|
|
98
|
+
@cached_df(
|
|
99
|
+
path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
|
|
100
|
+
)
|
|
101
|
+
def _df_getter() -> pd.DataFrame:
|
|
102
|
+
return get_ontology(prefix, **kwargs).get_literal_properties_df(
|
|
103
|
+
use_tqdm=check_should_use_tqdm(kwargs)
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
return _df_getter()
|
|
107
|
+
|
|
108
|
+
|
|
29
109
|
@wrap_norm_prefix
|
|
30
|
-
def get_properties_df(
|
|
31
|
-
prefix: str, *, force: bool = False, version: Optional[str] = None
|
|
32
|
-
) -> pd.DataFrame:
|
|
110
|
+
def get_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
|
|
33
111
|
"""Extract properties.
|
|
34
112
|
|
|
35
113
|
:param prefix: the resource to load
|
|
36
|
-
:param force: should the resource be re-downloaded, re-parsed, and re-cached?
|
|
37
114
|
:returns: A dataframe with the properties
|
|
38
115
|
"""
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
path = prefix_cache_join(prefix, name="properties.tsv", version=version)
|
|
116
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
117
|
+
path = get_cache_path(prefix, CacheArtifact.properties, version=version)
|
|
42
118
|
|
|
43
|
-
@cached_df(
|
|
119
|
+
@cached_df(
|
|
120
|
+
path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
|
|
121
|
+
)
|
|
44
122
|
def _df_getter() -> pd.DataFrame:
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
|
|
49
|
-
ontology = get_ontology(prefix, force=force, version=version)
|
|
50
|
-
df = ontology.get_properties_df()
|
|
51
|
-
df.dropna(inplace=True)
|
|
52
|
-
return df
|
|
123
|
+
return get_ontology(prefix, **kwargs).get_properties_df(
|
|
124
|
+
use_tqdm=check_should_use_tqdm(kwargs)
|
|
125
|
+
)
|
|
53
126
|
|
|
54
127
|
return _df_getter()
|
|
55
128
|
|
|
56
129
|
|
|
57
130
|
@wrap_norm_prefix
|
|
58
131
|
def get_filtered_properties_mapping(
|
|
59
|
-
prefix: str,
|
|
60
|
-
prop: str,
|
|
61
|
-
*,
|
|
62
|
-
use_tqdm: bool = False,
|
|
63
|
-
force: bool = False,
|
|
64
|
-
version: Optional[str] = None,
|
|
132
|
+
prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
|
|
65
133
|
) -> Mapping[str, str]:
|
|
66
134
|
"""Extract a single property for each term as a dictionary.
|
|
67
135
|
|
|
68
136
|
:param prefix: the resource to load
|
|
69
137
|
:param prop: the property to extract
|
|
70
|
-
:param use_tqdm: should a progress bar be shown?
|
|
71
|
-
:param force: should the resource be re-downloaded, re-parsed, and re-cached?
|
|
72
138
|
:returns: A mapping from identifier to property value
|
|
73
139
|
"""
|
|
74
|
-
df =
|
|
75
|
-
|
|
76
|
-
return dict(df[[f"{prefix}_id", "value"]].values)
|
|
77
|
-
|
|
78
|
-
if version is None:
|
|
79
|
-
version = get_version(prefix)
|
|
80
|
-
path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
|
|
81
|
-
all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
|
|
82
|
-
|
|
83
|
-
@cached_mapping(path=path, header=[f"{prefix}_id", prop], force=force)
|
|
84
|
-
def _mapping_getter() -> Mapping[str, str]:
|
|
85
|
-
if os.path.exists(all_properties_path):
|
|
86
|
-
logger.info("[%s] loading pre-cached properties", prefix)
|
|
87
|
-
df = pd.read_csv(all_properties_path, sep="\t")
|
|
88
|
-
logger.info("[%s] filtering pre-cached properties", prefix)
|
|
89
|
-
df = df.loc[df["property"] == prop, [f"{prefix}_id", "value"]]
|
|
90
|
-
return dict(df.values)
|
|
91
|
-
|
|
92
|
-
logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
|
|
93
|
-
ontology = get_ontology(prefix, force=force, version=version)
|
|
94
|
-
return ontology.get_filtered_properties_mapping(prop, use_tqdm=use_tqdm)
|
|
95
|
-
|
|
96
|
-
return _mapping_getter()
|
|
140
|
+
df = get_filtered_properties_df(prefix, prop, **kwargs)
|
|
141
|
+
return dict(df.values)
|
|
97
142
|
|
|
98
143
|
|
|
99
144
|
@wrap_norm_prefix
|
|
100
145
|
def get_filtered_properties_multimapping(
|
|
101
|
-
prefix: str,
|
|
102
|
-
prop: str,
|
|
103
|
-
*,
|
|
104
|
-
use_tqdm: bool = False,
|
|
105
|
-
force: bool = False,
|
|
106
|
-
version: Optional[str] = None,
|
|
146
|
+
prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
|
|
107
147
|
) -> Mapping[str, list[str]]:
|
|
108
148
|
"""Extract multiple properties for each term as a dictionary.
|
|
109
149
|
|
|
110
150
|
:param prefix: the resource to load
|
|
111
151
|
:param prop: the property to extract
|
|
112
|
-
:param use_tqdm: should a progress bar be shown?
|
|
113
|
-
:param force: should the resource be re-downloaded, re-parsed, and re-cached?
|
|
114
152
|
:returns: A mapping from identifier to property values
|
|
115
153
|
"""
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
|
|
119
|
-
all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
|
|
120
|
-
|
|
121
|
-
@cached_multidict(path=path, header=[f"{prefix}_id", prop], force=force)
|
|
122
|
-
def _mapping_getter() -> Mapping[str, list[str]]:
|
|
123
|
-
if os.path.exists(all_properties_path):
|
|
124
|
-
logger.info("[%s] loading pre-cached properties", prefix)
|
|
125
|
-
df = pd.read_csv(all_properties_path, sep="\t")
|
|
126
|
-
logger.info("[%s] filtering pre-cached properties", prefix)
|
|
127
|
-
df = df.loc[df["property"] == prop, [f"{prefix}_id", "value"]]
|
|
128
|
-
return multidict(df.values)
|
|
154
|
+
df = get_filtered_properties_df(prefix, prop, **kwargs)
|
|
155
|
+
return multidict(df.values)
|
|
129
156
|
|
|
130
|
-
logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
|
|
131
|
-
ontology = get_ontology(prefix, force=force, version=version)
|
|
132
|
-
return ontology.get_filtered_properties_multimapping(prop, use_tqdm=use_tqdm)
|
|
133
157
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def get_property(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[str]:
|
|
158
|
+
def get_property(
|
|
159
|
+
prefix: str, identifier: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
|
|
160
|
+
) -> str | None:
|
|
138
161
|
"""Extract a single property for the given entity.
|
|
139
162
|
|
|
140
163
|
:param prefix: the resource to load
|
|
@@ -152,7 +175,12 @@ def get_property(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[
|
|
|
152
175
|
return filtered_properties_mapping.get(identifier)
|
|
153
176
|
|
|
154
177
|
|
|
155
|
-
def get_properties(
|
|
178
|
+
def get_properties(
|
|
179
|
+
prefix: str,
|
|
180
|
+
identifier: str,
|
|
181
|
+
prop: ReferenceHint,
|
|
182
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
183
|
+
) -> list[str] | None:
|
|
156
184
|
"""Extract a set of properties for the given entity.
|
|
157
185
|
|
|
158
186
|
:param prefix: the resource to load
|
|
@@ -168,39 +196,15 @@ def get_properties(prefix: str, identifier: str, prop: str, **kwargs) -> Optiona
|
|
|
168
196
|
|
|
169
197
|
@wrap_norm_prefix
|
|
170
198
|
def get_filtered_properties_df(
|
|
171
|
-
prefix: str,
|
|
172
|
-
prop: str,
|
|
173
|
-
*,
|
|
174
|
-
use_tqdm: bool = False,
|
|
175
|
-
force: bool = False,
|
|
176
|
-
version: Optional[str] = None,
|
|
199
|
+
prefix: str, prop: ReferenceHint, **kwargs: Unpack[GetOntologyKwargs]
|
|
177
200
|
) -> pd.DataFrame:
|
|
178
201
|
"""Extract a single property for each term.
|
|
179
202
|
|
|
180
203
|
:param prefix: the resource to load
|
|
181
204
|
:param prop: the property to extract
|
|
182
|
-
:param use_tqdm: should a progress bar be shown?
|
|
183
|
-
:param force: should the resource be re-downloaded, re-parsed, and re-cached?
|
|
184
205
|
:returns: A dataframe from identifier to property value. Columns are [<prefix>_id, value].
|
|
185
206
|
"""
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
@cached_df(path=path, dtype=str, force=force)
|
|
192
|
-
def _df_getter() -> pd.DataFrame:
|
|
193
|
-
if os.path.exists(all_properties_path):
|
|
194
|
-
logger.info("[%s] loading pre-cached properties", prefix)
|
|
195
|
-
df = pd.read_csv(all_properties_path, sep="\t")
|
|
196
|
-
logger.info("[%s] filtering pre-cached properties", prefix)
|
|
197
|
-
return df.loc[df["property"] == prop, [f"{prefix}_id", "value"]]
|
|
198
|
-
|
|
199
|
-
if force:
|
|
200
|
-
logger.info("[%s] forcing reload for properties", prefix)
|
|
201
|
-
else:
|
|
202
|
-
logger.info("[%s] no cached properties found. getting from OBO loader", prefix)
|
|
203
|
-
ontology = get_ontology(prefix, force=force, version=version)
|
|
204
|
-
return ontology.get_filtered_properties_df(prop, use_tqdm=use_tqdm)
|
|
205
|
-
|
|
206
|
-
return _df_getter()
|
|
207
|
+
prop = _ensure_ref(prop, ontology_prefix=prefix)
|
|
208
|
+
df = get_properties_df(prefix, **kwargs)
|
|
209
|
+
df = df.loc[df["property"] == prop.curie, [f"{prefix}_id", "value"]]
|
|
210
|
+
return df
|
pyobo/api/relations.py
CHANGED
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
"""High-level API for relations."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
import os
|
|
5
4
|
from collections.abc import Mapping
|
|
6
5
|
from functools import lru_cache
|
|
7
|
-
from typing import Optional
|
|
8
6
|
|
|
9
|
-
import networkx as nx
|
|
10
7
|
import pandas as pd
|
|
8
|
+
from typing_extensions import Unpack
|
|
11
9
|
|
|
12
|
-
from .utils import
|
|
10
|
+
from .utils import get_version_from_kwargs
|
|
13
11
|
from ..constants import (
|
|
14
12
|
RELATION_COLUMNS,
|
|
15
13
|
RELATION_ID,
|
|
@@ -18,50 +16,60 @@ from ..constants import (
|
|
|
18
16
|
SOURCE_PREFIX,
|
|
19
17
|
TARGET_ID,
|
|
20
18
|
TARGET_PREFIX,
|
|
19
|
+
GetOntologyKwargs,
|
|
20
|
+
check_should_cache,
|
|
21
|
+
check_should_force,
|
|
22
|
+
check_should_use_tqdm,
|
|
21
23
|
)
|
|
22
24
|
from ..getters import get_ontology
|
|
23
25
|
from ..identifier_utils import wrap_norm_prefix
|
|
24
|
-
from ..struct import Reference
|
|
26
|
+
from ..struct.reference import Reference
|
|
27
|
+
from ..struct.struct_utils import ReferenceHint, _ensure_ref
|
|
25
28
|
from ..utils.cache import cached_df
|
|
26
|
-
from ..utils.path import
|
|
29
|
+
from ..utils.path import CacheArtifact, get_cache_path, get_relation_cache_path
|
|
27
30
|
|
|
28
31
|
__all__ = [
|
|
29
|
-
"get_relations_df",
|
|
30
32
|
"get_filtered_relations_df",
|
|
31
33
|
"get_id_multirelations_mapping",
|
|
32
|
-
"get_relation_mapping",
|
|
33
34
|
"get_relation",
|
|
34
|
-
"
|
|
35
|
+
"get_relation_mapping",
|
|
36
|
+
"get_relations",
|
|
37
|
+
"get_relations_df",
|
|
35
38
|
]
|
|
36
39
|
|
|
37
|
-
# TODO get_relation, get_relations
|
|
38
|
-
|
|
39
40
|
logger = logging.getLogger(__name__)
|
|
40
41
|
|
|
41
42
|
|
|
43
|
+
@wrap_norm_prefix
|
|
44
|
+
def get_relations(
|
|
45
|
+
prefix: str, **kwargs: Unpack[GetOntologyKwargs]
|
|
46
|
+
) -> list[tuple[Reference, Reference, Reference]]:
|
|
47
|
+
"""Get relations."""
|
|
48
|
+
df = get_relations_df(prefix, wide=False, **kwargs)
|
|
49
|
+
return [
|
|
50
|
+
(
|
|
51
|
+
Reference(prefix=prefix, identifier=source_id),
|
|
52
|
+
Reference(prefix=relation_prefix, identifier=relation_id),
|
|
53
|
+
Reference(prefix=target_prefix, identifier=target_id),
|
|
54
|
+
)
|
|
55
|
+
for source_id, relation_prefix, relation_id, target_prefix, target_id in df.values
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
|
|
42
59
|
@wrap_norm_prefix
|
|
43
60
|
def get_relations_df(
|
|
44
|
-
prefix: str,
|
|
45
|
-
*,
|
|
46
|
-
use_tqdm: bool = False,
|
|
47
|
-
force: bool = False,
|
|
48
|
-
wide: bool = False,
|
|
49
|
-
strict: bool = True,
|
|
50
|
-
version: Optional[str] = None,
|
|
61
|
+
prefix: str, *, wide: bool = False, **kwargs: Unpack[GetOntologyKwargs]
|
|
51
62
|
) -> pd.DataFrame:
|
|
52
63
|
"""Get all relations from the OBO."""
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
path = prefix_cache_join(prefix, name="relations.tsv", version=version)
|
|
64
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
65
|
+
path = get_cache_path(prefix, CacheArtifact.relations, version=version)
|
|
56
66
|
|
|
57
|
-
@cached_df(
|
|
67
|
+
@cached_df(
|
|
68
|
+
path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
|
|
69
|
+
)
|
|
58
70
|
def _df_getter() -> pd.DataFrame:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
else:
|
|
62
|
-
logger.info("[%s] no cached relations found. getting from OBO loader", prefix)
|
|
63
|
-
ontology = get_ontology(prefix, force=force, version=version, strict=strict)
|
|
64
|
-
return ontology.get_relations_df(use_tqdm=use_tqdm)
|
|
71
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
72
|
+
return ontology.get_relations_df(use_tqdm=check_should_use_tqdm(kwargs))
|
|
65
73
|
|
|
66
74
|
rv = _df_getter()
|
|
67
75
|
|
|
@@ -76,38 +84,29 @@ def get_relations_df(
|
|
|
76
84
|
@wrap_norm_prefix
|
|
77
85
|
def get_filtered_relations_df(
|
|
78
86
|
prefix: str,
|
|
79
|
-
relation:
|
|
80
|
-
|
|
81
|
-
use_tqdm: bool = False,
|
|
82
|
-
force: bool = False,
|
|
83
|
-
version: Optional[str] = None,
|
|
87
|
+
relation: ReferenceHint,
|
|
88
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
84
89
|
) -> pd.DataFrame:
|
|
85
90
|
"""Get all the given relation."""
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
prefix,
|
|
91
|
-
"
|
|
92
|
-
|
|
93
|
-
|
|
91
|
+
relation = _ensure_ref(relation, ontology_prefix=prefix)
|
|
92
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
93
|
+
all_relations_path = get_cache_path(prefix, CacheArtifact.relations, version=version)
|
|
94
|
+
if all_relations_path.is_file():
|
|
95
|
+
logger.debug("[%] loading all relations from %s", prefix, all_relations_path)
|
|
96
|
+
df = pd.read_csv(all_relations_path, sep="\t", dtype=str)
|
|
97
|
+
idx = (df[RELATION_PREFIX] == relation.prefix) & (df[RELATION_ID] == relation.identifier)
|
|
98
|
+
columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID]
|
|
99
|
+
return df.loc[idx, columns]
|
|
100
|
+
|
|
101
|
+
path = get_relation_cache_path(prefix, relation, version=version)
|
|
102
|
+
|
|
103
|
+
@cached_df(
|
|
104
|
+
path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
|
|
94
105
|
)
|
|
95
|
-
all_relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
|
|
96
|
-
|
|
97
|
-
@cached_df(path=path, dtype=str, force=force)
|
|
98
106
|
def _df_getter() -> pd.DataFrame:
|
|
99
|
-
if os.path.exists(all_relations_path):
|
|
100
|
-
logger.debug("[%] loading all relations from %s", prefix, all_relations_path)
|
|
101
|
-
df = pd.read_csv(all_relations_path, sep="\t", dtype=str)
|
|
102
|
-
idx = (df[RELATION_PREFIX] == relation_prefix) & (
|
|
103
|
-
df[RELATION_ID] == relation_identifier
|
|
104
|
-
)
|
|
105
|
-
columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID]
|
|
106
|
-
return df.loc[idx, columns]
|
|
107
|
-
|
|
108
107
|
logger.info("[%s] no cached relations found. getting from OBO loader", prefix)
|
|
109
|
-
ontology = get_ontology(prefix,
|
|
110
|
-
return ontology.get_filtered_relations_df(relation, use_tqdm=
|
|
108
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
109
|
+
return ontology.get_filtered_relations_df(relation, use_tqdm=check_should_use_tqdm(kwargs))
|
|
111
110
|
|
|
112
111
|
return _df_getter()
|
|
113
112
|
|
|
@@ -115,29 +114,24 @@ def get_filtered_relations_df(
|
|
|
115
114
|
@wrap_norm_prefix
|
|
116
115
|
def get_id_multirelations_mapping(
|
|
117
116
|
prefix: str,
|
|
118
|
-
typedef:
|
|
119
|
-
|
|
120
|
-
use_tqdm: bool = False,
|
|
121
|
-
force: bool = False,
|
|
122
|
-
version: Optional[str] = None,
|
|
117
|
+
typedef: ReferenceHint,
|
|
118
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
123
119
|
) -> Mapping[str, list[Reference]]:
|
|
124
120
|
"""Get the OBO file and output a synonym dictionary."""
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
ontology
|
|
128
|
-
|
|
121
|
+
kwargs["version"] = get_version_from_kwargs(prefix, kwargs)
|
|
122
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
123
|
+
return ontology.get_id_multirelations_mapping(
|
|
124
|
+
typedef=typedef, use_tqdm=check_should_use_tqdm(kwargs)
|
|
125
|
+
)
|
|
129
126
|
|
|
130
127
|
|
|
131
128
|
@lru_cache
|
|
132
129
|
@wrap_norm_prefix
|
|
133
130
|
def get_relation_mapping(
|
|
134
131
|
prefix: str,
|
|
135
|
-
relation:
|
|
132
|
+
relation: ReferenceHint,
|
|
136
133
|
target_prefix: str,
|
|
137
|
-
|
|
138
|
-
use_tqdm: bool = False,
|
|
139
|
-
force: bool = False,
|
|
140
|
-
version: Optional[str] = None,
|
|
134
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
141
135
|
) -> Mapping[str, str]:
|
|
142
136
|
"""Get relations from identifiers in the source prefix to target prefix with the given relation.
|
|
143
137
|
|
|
@@ -151,11 +145,9 @@ def get_relation_mapping(
|
|
|
151
145
|
>>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping("hgnc", "ro:HOM0000017", "mgi")
|
|
152
146
|
>>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id]
|
|
153
147
|
"""
|
|
154
|
-
|
|
155
|
-
version = get_version(prefix)
|
|
156
|
-
ontology = get_ontology(prefix, force=force, version=version)
|
|
148
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
157
149
|
return ontology.get_relation_mapping(
|
|
158
|
-
relation=relation, target_prefix=target_prefix, use_tqdm=
|
|
150
|
+
relation=relation, target_prefix=target_prefix, use_tqdm=check_should_use_tqdm(kwargs)
|
|
159
151
|
)
|
|
160
152
|
|
|
161
153
|
|
|
@@ -163,13 +155,10 @@ def get_relation_mapping(
|
|
|
163
155
|
def get_relation(
|
|
164
156
|
prefix: str,
|
|
165
157
|
source_identifier: str,
|
|
166
|
-
relation:
|
|
158
|
+
relation: ReferenceHint,
|
|
167
159
|
target_prefix: str,
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
force: bool = False,
|
|
171
|
-
**kwargs,
|
|
172
|
-
) -> Optional[str]:
|
|
160
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
161
|
+
) -> str | None:
|
|
173
162
|
"""Get the target identifier corresponding to the given relationship from the source prefix/identifier pair.
|
|
174
163
|
|
|
175
164
|
.. warning:: Assumes there's only one version of the property for each term.
|
|
@@ -187,21 +176,6 @@ def get_relation(
|
|
|
187
176
|
prefix=prefix,
|
|
188
177
|
relation=relation,
|
|
189
178
|
target_prefix=target_prefix,
|
|
190
|
-
use_tqdm=use_tqdm,
|
|
191
|
-
force=force,
|
|
192
179
|
**kwargs,
|
|
193
180
|
)
|
|
194
181
|
return relation_mapping.get(source_identifier)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
def get_graph(prefix: str, **kwargs) -> nx.DiGraph:
|
|
198
|
-
"""Get the relation graph."""
|
|
199
|
-
rv = nx.MultiDiGraph()
|
|
200
|
-
df = get_relations_df(prefix=prefix, **kwargs)
|
|
201
|
-
for source_id, relation_prefix, relation_id, target_ns, target_id in df.values:
|
|
202
|
-
rv.add_edge(
|
|
203
|
-
f"{prefix}:{source_id}",
|
|
204
|
-
f"{target_ns}:{target_id}",
|
|
205
|
-
key=f"{relation_prefix}:{relation_id}",
|
|
206
|
-
)
|
|
207
|
-
return rv
|
pyobo/api/species.py
CHANGED
|
@@ -3,14 +3,17 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
from collections.abc import Mapping
|
|
5
5
|
from functools import lru_cache
|
|
6
|
-
|
|
6
|
+
|
|
7
|
+
import curies
|
|
8
|
+
from typing_extensions import Unpack
|
|
7
9
|
|
|
8
10
|
from .alts import get_primary_identifier
|
|
9
|
-
from .utils import
|
|
11
|
+
from .utils import _get_pi, get_version_from_kwargs
|
|
12
|
+
from ..constants import GetOntologyKwargs, check_should_force
|
|
10
13
|
from ..getters import NoBuildError, get_ontology
|
|
11
14
|
from ..identifier_utils import wrap_norm_prefix
|
|
12
15
|
from ..utils.cache import cached_mapping
|
|
13
|
-
from ..utils.path import
|
|
16
|
+
from ..utils.path import CacheArtifact, get_cache_path
|
|
14
17
|
|
|
15
18
|
__all__ = [
|
|
16
19
|
"get_id_species_mapping",
|
|
@@ -20,34 +23,35 @@ __all__ = [
|
|
|
20
23
|
logger = logging.getLogger(__name__)
|
|
21
24
|
|
|
22
25
|
|
|
23
|
-
|
|
24
|
-
|
|
26
|
+
def get_species(
|
|
27
|
+
prefix: str | curies.Reference | curies.ReferenceTuple,
|
|
28
|
+
identifier: str | None = None,
|
|
29
|
+
/,
|
|
30
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
31
|
+
) -> str | None:
|
|
25
32
|
"""Get the species."""
|
|
26
|
-
|
|
33
|
+
t = _get_pi(prefix, identifier)
|
|
34
|
+
|
|
35
|
+
if t.prefix == "uniprot":
|
|
27
36
|
raise NotImplementedError
|
|
28
37
|
|
|
29
38
|
try:
|
|
30
|
-
id_species = get_id_species_mapping(prefix,
|
|
39
|
+
id_species = get_id_species_mapping(t.prefix, **kwargs)
|
|
31
40
|
except NoBuildError:
|
|
32
|
-
logger.warning("unable to look up species for prefix %s", prefix)
|
|
41
|
+
logger.warning("unable to look up species for prefix %s", t.prefix)
|
|
33
42
|
return None
|
|
34
43
|
|
|
35
44
|
if not id_species:
|
|
36
|
-
logger.warning("no results produced for prefix %s", prefix)
|
|
45
|
+
logger.warning("no results produced for prefix %s", t.prefix)
|
|
37
46
|
return None
|
|
38
47
|
|
|
39
|
-
primary_id = get_primary_identifier(
|
|
48
|
+
primary_id = get_primary_identifier(t, **kwargs)
|
|
40
49
|
return id_species.get(primary_id)
|
|
41
50
|
|
|
42
51
|
|
|
43
52
|
@lru_cache
|
|
44
53
|
@wrap_norm_prefix
|
|
45
|
-
def get_id_species_mapping(
|
|
46
|
-
prefix: str,
|
|
47
|
-
force: bool = False,
|
|
48
|
-
strict: bool = True,
|
|
49
|
-
version: Optional[str] = None,
|
|
50
|
-
) -> Mapping[str, str]:
|
|
54
|
+
def get_id_species_mapping(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> Mapping[str, str]:
|
|
51
55
|
"""Get an identifier to species mapping."""
|
|
52
56
|
if prefix == "ncbigene":
|
|
53
57
|
from ..sources.ncbigene import get_ncbigene_id_to_species_mapping
|
|
@@ -57,14 +61,13 @@ def get_id_species_mapping(
|
|
|
57
61
|
logger.info("[%s] done loading species mappings", prefix)
|
|
58
62
|
return rv
|
|
59
63
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
path = prefix_cache_join(prefix, name="species.tsv", version=version)
|
|
64
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
65
|
+
path = get_cache_path(prefix, CacheArtifact.species, version=version)
|
|
63
66
|
|
|
64
|
-
@cached_mapping(path=path, header=[f"{prefix}_id", "species"], force=
|
|
67
|
+
@cached_mapping(path=path, header=[f"{prefix}_id", "species"], force=check_should_force(kwargs))
|
|
65
68
|
def _get_id_species_mapping() -> Mapping[str, str]:
|
|
66
69
|
logger.info("[%s] no cached species found. getting from OBO loader", prefix)
|
|
67
|
-
ontology = get_ontology(prefix,
|
|
70
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
68
71
|
logger.info("[%s] loading species mappings", prefix)
|
|
69
72
|
return ontology.get_id_species_mapping()
|
|
70
73
|
|