pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/api/names.py
CHANGED
|
@@ -4,43 +4,54 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
import subprocess
|
|
7
|
-
from collections.abc import Mapping
|
|
7
|
+
from collections.abc import Callable, Mapping
|
|
8
8
|
from functools import lru_cache
|
|
9
|
-
from typing import
|
|
9
|
+
from typing import Any, TypeVar
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
import curies
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import ssslm
|
|
14
|
+
from pystow.cache import Cached
|
|
15
|
+
from ssslm import LiteralMapping
|
|
16
|
+
from typing_extensions import Unpack
|
|
12
17
|
|
|
13
18
|
from .alts import get_primary_identifier
|
|
14
|
-
from .utils import
|
|
19
|
+
from .utils import _get_pi, get_version_from_kwargs
|
|
20
|
+
from ..constants import (
|
|
21
|
+
GetOntologyKwargs,
|
|
22
|
+
check_should_cache,
|
|
23
|
+
check_should_force,
|
|
24
|
+
)
|
|
15
25
|
from ..getters import NoBuildError, get_ontology
|
|
16
|
-
from ..identifier_utils import
|
|
17
|
-
from ..
|
|
18
|
-
from ..utils.
|
|
26
|
+
from ..identifier_utils import wrap_norm_prefix
|
|
27
|
+
from ..struct import Reference
|
|
28
|
+
from ..utils.cache import cached_collection, cached_df, cached_mapping
|
|
29
|
+
from ..utils.io import multidict
|
|
30
|
+
from ..utils.path import CacheArtifact, get_cache_path
|
|
19
31
|
|
|
20
32
|
__all__ = [
|
|
21
|
-
"get_name",
|
|
22
|
-
"get_name_by_curie",
|
|
23
|
-
"get_ids",
|
|
24
|
-
"get_id_name_mapping",
|
|
25
|
-
"get_name_id_mapping",
|
|
26
33
|
"get_definition",
|
|
27
34
|
"get_id_definition_mapping",
|
|
28
|
-
"
|
|
35
|
+
"get_id_name_mapping",
|
|
29
36
|
"get_id_synonyms_mapping",
|
|
37
|
+
"get_ids",
|
|
38
|
+
"get_literal_mappings",
|
|
39
|
+
"get_literal_mappings_df",
|
|
40
|
+
"get_name",
|
|
41
|
+
"get_name_by_curie",
|
|
42
|
+
"get_name_id_mapping",
|
|
30
43
|
"get_obsolete",
|
|
44
|
+
"get_obsolete_references",
|
|
45
|
+
"get_references",
|
|
46
|
+
"get_synonyms",
|
|
31
47
|
]
|
|
32
48
|
|
|
33
49
|
logger = logging.getLogger(__name__)
|
|
34
50
|
|
|
35
51
|
|
|
36
|
-
def get_name_by_curie(curie: str,
|
|
52
|
+
def get_name_by_curie(curie: str, **kwargs: Any) -> str | None:
|
|
37
53
|
"""Get the name for a CURIE, if possible."""
|
|
38
|
-
|
|
39
|
-
version = get_version(curie.split(":")[0])
|
|
40
|
-
prefix, identifier = normalize_curie(curie)
|
|
41
|
-
if prefix and identifier:
|
|
42
|
-
return get_name(prefix, identifier, version=version)
|
|
43
|
-
return None
|
|
54
|
+
return get_name(curie, **kwargs)
|
|
44
55
|
|
|
45
56
|
|
|
46
57
|
X = TypeVar("X")
|
|
@@ -50,108 +61,140 @@ NO_BUILD_LOGGED: set = set()
|
|
|
50
61
|
|
|
51
62
|
|
|
52
63
|
def _help_get(
|
|
53
|
-
f: Callable[[str], Mapping[str, X]],
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
force: bool = False,
|
|
57
|
-
strict: bool = False,
|
|
58
|
-
version: str | None = None,
|
|
64
|
+
f: Callable[[str, Unpack[GetOntologyKwargs]], Mapping[str, X]],
|
|
65
|
+
reference: Reference,
|
|
66
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
59
67
|
) -> X | None:
|
|
60
68
|
"""Get the result for an entity based on a mapping maker function ``f``."""
|
|
61
69
|
try:
|
|
62
|
-
mapping = f(prefix,
|
|
70
|
+
mapping = f(reference.prefix, **kwargs) # type:ignore
|
|
63
71
|
except NoBuildError:
|
|
64
|
-
if prefix not in NO_BUILD_PREFIXES:
|
|
65
|
-
logger.warning("[%s] unable to look up results with %s", prefix, f)
|
|
66
|
-
NO_BUILD_PREFIXES.add(prefix)
|
|
72
|
+
if reference.prefix not in NO_BUILD_PREFIXES:
|
|
73
|
+
logger.warning("[%s] unable to look up results with %s", reference.prefix, f)
|
|
74
|
+
NO_BUILD_PREFIXES.add(reference.prefix)
|
|
67
75
|
return None
|
|
68
76
|
except ValueError as e:
|
|
69
|
-
if prefix not in NO_BUILD_PREFIXES:
|
|
70
|
-
logger.warning(
|
|
71
|
-
|
|
77
|
+
if reference.prefix not in NO_BUILD_PREFIXES:
|
|
78
|
+
logger.warning(
|
|
79
|
+
"[%s] value error while looking up results with %s: %s", reference.prefix, f, e
|
|
80
|
+
)
|
|
81
|
+
NO_BUILD_PREFIXES.add(reference.prefix)
|
|
72
82
|
return None
|
|
73
83
|
|
|
74
84
|
if not mapping:
|
|
75
|
-
if prefix not in NO_BUILD_PREFIXES:
|
|
76
|
-
logger.warning("[%s] no results produced with %s", prefix, f)
|
|
77
|
-
NO_BUILD_PREFIXES.add(prefix)
|
|
85
|
+
if reference.prefix not in NO_BUILD_PREFIXES:
|
|
86
|
+
logger.warning("[%s] no results produced with %s", reference.prefix, f)
|
|
87
|
+
NO_BUILD_PREFIXES.add(reference.prefix)
|
|
78
88
|
return None
|
|
79
89
|
|
|
80
|
-
primary_id = get_primary_identifier(
|
|
90
|
+
primary_id = get_primary_identifier(reference, **kwargs)
|
|
81
91
|
return mapping.get(primary_id)
|
|
82
92
|
|
|
83
93
|
|
|
84
|
-
@wrap_norm_prefix
|
|
85
94
|
def get_name(
|
|
86
|
-
prefix: str | Reference | ReferenceTuple,
|
|
95
|
+
prefix: str | curies.Reference | curies.ReferenceTuple,
|
|
87
96
|
identifier: str | None = None,
|
|
88
97
|
/,
|
|
89
|
-
|
|
90
|
-
version: str | None = None,
|
|
98
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
91
99
|
) -> str | None:
|
|
92
100
|
"""Get the name for an entity."""
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
return _help_get(get_id_name_mapping, prefix, identifier, version=version) # type:ignore
|
|
101
|
+
reference = _get_pi(prefix, identifier)
|
|
102
|
+
return _help_get(get_id_name_mapping, reference, **kwargs)
|
|
96
103
|
|
|
97
104
|
|
|
98
105
|
@lru_cache
|
|
99
106
|
@wrap_norm_prefix
|
|
100
|
-
def get_ids(
|
|
101
|
-
prefix: str, *, force: bool = False, strict: bool = False, version: str | None = None
|
|
102
|
-
) -> set[str]:
|
|
107
|
+
def get_ids(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> set[str]:
|
|
103
108
|
"""Get the set of identifiers for this prefix."""
|
|
104
109
|
if prefix == "ncbigene":
|
|
105
|
-
from ..sources.ncbigene import get_ncbigene_ids
|
|
110
|
+
from ..sources.ncbi.ncbigene import get_ncbigene_ids
|
|
106
111
|
|
|
107
112
|
logger.info("[%s] loading name mappings", prefix)
|
|
108
113
|
rv = get_ncbigene_ids()
|
|
109
114
|
logger.info("[%s] done loading name mappings", prefix)
|
|
110
115
|
return rv
|
|
111
116
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
117
|
+
return {
|
|
118
|
+
reference.identifier
|
|
119
|
+
for reference in get_references(prefix, **kwargs)
|
|
120
|
+
if reference.prefix == prefix
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class CachedReferences(Cached[list[Reference]]):
|
|
125
|
+
"""Make a function lazily cache its return value as file."""
|
|
126
|
+
|
|
127
|
+
def load(self) -> list[Reference]:
|
|
128
|
+
"""Load data from the cache as a list of strings.
|
|
129
|
+
|
|
130
|
+
:returns: A list of strings loaded from the cache
|
|
131
|
+
"""
|
|
132
|
+
with open(self.path) as file:
|
|
133
|
+
return [Reference.from_curie(line.strip()) for line in file]
|
|
126
134
|
|
|
127
|
-
|
|
135
|
+
def dump(self, references: list[Reference]) -> None:
|
|
136
|
+
"""Dump data to the cache as a list of strings.
|
|
137
|
+
|
|
138
|
+
:param references: The list of strings to dump
|
|
139
|
+
"""
|
|
140
|
+
with open(self.path, "w") as file:
|
|
141
|
+
for reference in references:
|
|
142
|
+
print(reference.curie, file=file)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@wrap_norm_prefix
|
|
146
|
+
def get_references(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> set[Reference]:
|
|
147
|
+
"""Get the set of identifiers for this prefix."""
|
|
148
|
+
if prefix == "ncbigene":
|
|
149
|
+
from ..sources.ncbi.ncbigene import get_ncbigene_ids
|
|
150
|
+
|
|
151
|
+
logger.info("[%s] loading identifiers ", prefix)
|
|
152
|
+
rv = {Reference(prefix="ncbigene", identifier=i) for i in get_ncbigene_ids()}
|
|
153
|
+
logger.info("[%s] done loading identifiers", prefix)
|
|
154
|
+
return rv
|
|
155
|
+
|
|
156
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
157
|
+
# TODO pre-cache these!
|
|
158
|
+
path = get_cache_path(prefix, CacheArtifact.references, version=version)
|
|
159
|
+
|
|
160
|
+
@CachedReferences(
|
|
161
|
+
path=path,
|
|
162
|
+
force=check_should_force(kwargs),
|
|
163
|
+
cache=check_should_cache(kwargs),
|
|
164
|
+
)
|
|
165
|
+
def _get_references() -> list[Reference]:
|
|
166
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
167
|
+
return sorted(ontology.iterate_references())
|
|
168
|
+
|
|
169
|
+
return set(_get_references())
|
|
128
170
|
|
|
129
171
|
|
|
130
172
|
@lru_cache
|
|
131
173
|
@wrap_norm_prefix
|
|
132
174
|
def get_id_name_mapping(
|
|
133
|
-
prefix: str,
|
|
175
|
+
prefix: str,
|
|
176
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
134
177
|
) -> Mapping[str, str]:
|
|
135
178
|
"""Get an identifier to name mapping for the OBO file."""
|
|
136
179
|
if prefix == "ncbigene":
|
|
137
|
-
from ..sources.ncbigene import get_ncbigene_id_to_name_mapping
|
|
180
|
+
from ..sources.ncbi.ncbigene import get_ncbigene_id_to_name_mapping
|
|
138
181
|
|
|
139
|
-
logger.info("[%s] loading
|
|
182
|
+
logger.info("[%s] loading identifiers", prefix)
|
|
140
183
|
rv = get_ncbigene_id_to_name_mapping()
|
|
141
|
-
logger.info("[%s] done loading
|
|
184
|
+
logger.info("[%s] done loading identifiers", prefix)
|
|
142
185
|
return rv
|
|
143
186
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
path = prefix_cache_join(prefix, name="names.tsv", version=version)
|
|
187
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
188
|
+
path = get_cache_path(prefix, CacheArtifact.names, version=version)
|
|
147
189
|
|
|
148
|
-
@cached_mapping(
|
|
190
|
+
@cached_mapping(
|
|
191
|
+
path=path,
|
|
192
|
+
header=[f"{prefix}_id", "name"],
|
|
193
|
+
force=check_should_force(kwargs),
|
|
194
|
+
cache=check_should_cache(kwargs),
|
|
195
|
+
)
|
|
149
196
|
def _get_id_name_mapping() -> Mapping[str, str]:
|
|
150
|
-
|
|
151
|
-
logger.debug("[%s v%s] forcing reload for names", prefix, version)
|
|
152
|
-
else:
|
|
153
|
-
logger.debug("[%s v%s] no cached names found. getting from OBO loader", prefix, version)
|
|
154
|
-
ontology = get_ontology(prefix, force=force, strict=strict, version=version)
|
|
197
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
155
198
|
return ontology.get_id_name_mapping()
|
|
156
199
|
|
|
157
200
|
try:
|
|
@@ -167,89 +210,126 @@ def get_id_name_mapping(
|
|
|
167
210
|
@lru_cache
|
|
168
211
|
@wrap_norm_prefix
|
|
169
212
|
def get_name_id_mapping(
|
|
170
|
-
prefix: str,
|
|
213
|
+
prefix: str,
|
|
214
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
171
215
|
) -> Mapping[str, str]:
|
|
172
216
|
"""Get a name to identifier mapping for the OBO file."""
|
|
173
|
-
id_name = get_id_name_mapping(prefix
|
|
217
|
+
id_name = get_id_name_mapping(prefix, **kwargs)
|
|
174
218
|
return {v: k for k, v in id_name.items()}
|
|
175
219
|
|
|
176
220
|
|
|
177
|
-
@wrap_norm_prefix
|
|
178
221
|
def get_definition(
|
|
179
|
-
prefix: str
|
|
222
|
+
prefix: str | curies.Reference | curies.ReferenceTuple,
|
|
223
|
+
identifier: str | None = None,
|
|
224
|
+
/,
|
|
225
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
180
226
|
) -> str | None:
|
|
181
227
|
"""Get the definition for an entity."""
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
return _help_get(get_id_definition_mapping, prefix, identifier, version=version)
|
|
228
|
+
reference = _get_pi(prefix, identifier)
|
|
229
|
+
return _help_get(get_id_definition_mapping, reference, **kwargs)
|
|
185
230
|
|
|
186
231
|
|
|
187
232
|
def get_id_definition_mapping(
|
|
188
|
-
prefix: str,
|
|
189
|
-
*,
|
|
190
|
-
force: bool = False,
|
|
191
|
-
strict: bool = False,
|
|
192
|
-
version: str | None = None,
|
|
233
|
+
prefix: str, **kwargs: Unpack[GetOntologyKwargs]
|
|
193
234
|
) -> Mapping[str, str]:
|
|
194
235
|
"""Get a mapping of descriptions."""
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
236
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
237
|
+
path = get_cache_path(prefix, CacheArtifact.definitions, version=version)
|
|
238
|
+
|
|
239
|
+
@cached_mapping(
|
|
240
|
+
path=path,
|
|
241
|
+
header=[f"{prefix}_id", "definition"],
|
|
242
|
+
force=check_should_force(kwargs),
|
|
243
|
+
cache=check_should_cache(kwargs),
|
|
244
|
+
)
|
|
200
245
|
def _get_mapping() -> Mapping[str, str]:
|
|
201
246
|
logger.info(
|
|
202
247
|
"[%s v%s] no cached descriptions found. getting from OBO loader", prefix, version
|
|
203
248
|
)
|
|
204
|
-
ontology = get_ontology(prefix,
|
|
249
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
205
250
|
return ontology.get_id_definition_mapping()
|
|
206
251
|
|
|
207
252
|
return _get_mapping()
|
|
208
253
|
|
|
209
254
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
*,
|
|
213
|
-
force: bool = False,
|
|
214
|
-
strict: bool = False,
|
|
215
|
-
version: str | None = None,
|
|
216
|
-
) -> set[str]:
|
|
255
|
+
@wrap_norm_prefix
|
|
256
|
+
def get_obsolete(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> set[str]:
|
|
217
257
|
"""Get the set of obsolete local unique identifiers."""
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
path =
|
|
221
|
-
|
|
222
|
-
@cached_collection(
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
258
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
259
|
+
# TODO pre-cache these!
|
|
260
|
+
path = get_cache_path(prefix, CacheArtifact.obsoletes, version=version)
|
|
261
|
+
|
|
262
|
+
@cached_collection(
|
|
263
|
+
path=path,
|
|
264
|
+
force=check_should_force(kwargs),
|
|
265
|
+
cache=check_should_cache(kwargs),
|
|
266
|
+
)
|
|
267
|
+
def _get_obsolete() -> list[str]:
|
|
268
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
269
|
+
return sorted(ontology.get_obsolete())
|
|
226
270
|
|
|
227
271
|
return set(_get_obsolete())
|
|
228
272
|
|
|
229
273
|
|
|
230
274
|
@wrap_norm_prefix
|
|
231
|
-
def
|
|
275
|
+
def get_obsolete_references(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> set[Reference]:
|
|
276
|
+
"""Get the set of obsolete references."""
|
|
277
|
+
return {
|
|
278
|
+
Reference(prefix=prefix, identifier=identifier)
|
|
279
|
+
for identifier in get_obsolete(prefix, **kwargs)
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def get_synonyms(
|
|
284
|
+
prefix: str | curies.Reference | curies.ReferenceTuple,
|
|
285
|
+
identifier: str | None = None,
|
|
286
|
+
/,
|
|
287
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
288
|
+
) -> list[str] | None:
|
|
232
289
|
"""Get the synonyms for an entity."""
|
|
233
|
-
|
|
290
|
+
reference = _get_pi(prefix, identifier)
|
|
291
|
+
return _help_get(get_id_synonyms_mapping, reference, **kwargs)
|
|
234
292
|
|
|
235
293
|
|
|
236
294
|
@wrap_norm_prefix
|
|
237
295
|
def get_id_synonyms_mapping(
|
|
238
|
-
prefix: str,
|
|
239
|
-
*,
|
|
240
|
-
force: bool = False,
|
|
241
|
-
strict: bool = False,
|
|
242
|
-
version: str | None = None,
|
|
296
|
+
prefix: str, **kwargs: Unpack[GetOntologyKwargs]
|
|
243
297
|
) -> Mapping[str, list[str]]:
|
|
244
298
|
"""Get the OBO file and output a synonym dictionary."""
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
299
|
+
df = get_literal_mappings_df(prefix=prefix, **kwargs)
|
|
300
|
+
prefix_with_colon = f"{prefix}:"
|
|
301
|
+
prefix_with_colon_len = len(prefix_with_colon)
|
|
302
|
+
# keep only literal mappings with the right prefix
|
|
303
|
+
df = df[df["curie"].str.startswith(prefix_with_colon)]
|
|
304
|
+
return multidict(
|
|
305
|
+
(curie[prefix_with_colon_len:], text) for curie, text in df[["curie", "text"]].values
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def get_literal_mappings(
|
|
310
|
+
prefix: str, *, skip_obsolete: bool = False, **kwargs: Unpack[GetOntologyKwargs]
|
|
311
|
+
) -> list[LiteralMapping]:
|
|
312
|
+
"""Get literal mappings."""
|
|
313
|
+
df = get_literal_mappings_df(prefix=prefix, **kwargs)
|
|
314
|
+
rv = ssslm.df_to_literal_mappings(df, reference_cls=Reference)
|
|
315
|
+
if skip_obsolete:
|
|
316
|
+
obsoletes = get_obsolete_references(prefix, **kwargs)
|
|
317
|
+
rv = [lm for lm in rv if lm.reference not in obsoletes]
|
|
318
|
+
return rv
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def get_literal_mappings_df(
|
|
322
|
+
prefix: str,
|
|
323
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
324
|
+
) -> pd.DataFrame:
|
|
325
|
+
"""Get a literal mappings dataframe."""
|
|
326
|
+
version = get_version_from_kwargs(prefix, kwargs)
|
|
327
|
+
path = get_cache_path(prefix, CacheArtifact.literal_mappings, version=version)
|
|
328
|
+
|
|
329
|
+
@cached_df(
|
|
330
|
+
path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
|
|
331
|
+
)
|
|
332
|
+
def _df_getter() -> pd.DataFrame:
|
|
333
|
+
return get_ontology(prefix, **kwargs).get_literal_mappings_df()
|
|
334
|
+
|
|
335
|
+
return _df_getter()
|