pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/gilda_utils.py
CHANGED
|
@@ -3,63 +3,67 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
-
|
|
7
|
-
from
|
|
6
|
+
import warnings
|
|
7
|
+
from collections.abc import Iterable, Sequence
|
|
8
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
8
9
|
|
|
9
10
|
import bioregistry
|
|
10
|
-
import
|
|
11
|
-
import
|
|
12
|
-
from gilda.grounder import Grounder
|
|
13
|
-
from gilda.process import normalize
|
|
14
|
-
from gilda.term import filter_out_duplicates
|
|
11
|
+
import ssslm
|
|
12
|
+
from ssslm import GildaGrounder, literal_mappings_to_gilda
|
|
15
13
|
from tqdm.auto import tqdm
|
|
14
|
+
from typing_extensions import Unpack
|
|
16
15
|
|
|
17
|
-
from pyobo import (
|
|
18
|
-
get_descendants,
|
|
16
|
+
from pyobo.api import (
|
|
19
17
|
get_id_name_mapping,
|
|
20
|
-
get_id_species_mapping,
|
|
21
|
-
get_id_synonyms_mapping,
|
|
22
18
|
get_ids,
|
|
23
|
-
|
|
19
|
+
get_literal_mappings,
|
|
20
|
+
get_literal_mappings_subset,
|
|
24
21
|
)
|
|
25
|
-
from pyobo.
|
|
26
|
-
from pyobo.
|
|
22
|
+
from pyobo.constants import GetOntologyKwargs
|
|
23
|
+
from pyobo.struct.reference import Reference
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
import gilda
|
|
27
27
|
|
|
28
28
|
__all__ = [
|
|
29
|
-
"iter_gilda_prediction_tuples",
|
|
30
29
|
"get_grounder",
|
|
31
|
-
"
|
|
30
|
+
"iter_gilda_prediction_tuples",
|
|
32
31
|
]
|
|
33
32
|
|
|
34
33
|
logger = logging.getLogger(__name__)
|
|
35
34
|
|
|
36
35
|
|
|
36
|
+
# TODO the only place this is used is in Biomappings -
|
|
37
|
+
# might be better to directly move it there
|
|
37
38
|
def iter_gilda_prediction_tuples(
|
|
38
39
|
prefix: str,
|
|
39
40
|
relation: str = "skos:exactMatch",
|
|
40
41
|
*,
|
|
41
|
-
grounder: Grounder | None = None,
|
|
42
|
+
grounder: gilda.Grounder | None = None,
|
|
42
43
|
identifiers_are_names: bool = False,
|
|
43
44
|
strict: bool = False,
|
|
44
45
|
) -> Iterable[tuple[str, str, str, str, str, str, str, str, float]]:
|
|
45
46
|
"""Iterate over prediction tuples for a given prefix."""
|
|
46
47
|
if grounder is None:
|
|
48
|
+
import gilda.api
|
|
49
|
+
|
|
47
50
|
grounder = gilda.api.grounder
|
|
51
|
+
grounder_ = GildaGrounder(grounder)
|
|
48
52
|
id_name_mapping = get_id_name_mapping(prefix, strict=strict)
|
|
49
53
|
it = tqdm(
|
|
50
54
|
id_name_mapping.items(), desc=f"[{prefix}] gilda tuples", unit_scale=True, unit="name"
|
|
51
55
|
)
|
|
52
56
|
for identifier, name in it:
|
|
53
|
-
|
|
54
|
-
|
|
57
|
+
norm_identifier = _normalize_identifier(prefix, identifier)
|
|
58
|
+
for scored_match in grounder_.get_matches(name):
|
|
55
59
|
yield (
|
|
56
60
|
prefix,
|
|
57
|
-
|
|
61
|
+
norm_identifier,
|
|
58
62
|
name,
|
|
59
63
|
relation,
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
64
|
+
scored_match.prefix,
|
|
65
|
+
_normalize_identifier(scored_match.prefix, scored_match.identifier),
|
|
66
|
+
name,
|
|
63
67
|
"semapv:LexicalMatching",
|
|
64
68
|
round(scored_match.score, 3),
|
|
65
69
|
)
|
|
@@ -67,22 +71,22 @@ def iter_gilda_prediction_tuples(
|
|
|
67
71
|
if identifiers_are_names:
|
|
68
72
|
it = tqdm(get_ids(prefix), desc=f"[{prefix}] gilda tuples", unit_scale=True, unit="id")
|
|
69
73
|
for identifier in it:
|
|
70
|
-
|
|
71
|
-
|
|
74
|
+
norm_identifier = _normalize_identifier(prefix, identifier)
|
|
75
|
+
for scored_match in grounder_.get_matches(identifier):
|
|
72
76
|
yield (
|
|
73
77
|
prefix,
|
|
74
|
-
|
|
78
|
+
norm_identifier,
|
|
75
79
|
identifier,
|
|
76
80
|
relation,
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
81
|
+
scored_match.prefix,
|
|
82
|
+
_normalize_identifier(scored_match.prefix, scored_match.identifier),
|
|
83
|
+
identifier,
|
|
80
84
|
"semapv:LexicalMatching",
|
|
81
85
|
scored_match.score,
|
|
82
86
|
)
|
|
83
87
|
|
|
84
88
|
|
|
85
|
-
def
|
|
89
|
+
def _normalize_identifier(prefix: str, identifier: str) -> str:
|
|
86
90
|
"""Normalize the identifier."""
|
|
87
91
|
resource = bioregistry.get_resource(prefix)
|
|
88
92
|
if resource is None:
|
|
@@ -90,183 +94,58 @@ def normalize_identifier(prefix: str, identifier: str) -> str:
|
|
|
90
94
|
return resource.miriam_standardize_identifier(identifier) or identifier
|
|
91
95
|
|
|
92
96
|
|
|
93
|
-
def
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
progress: bool = True,
|
|
102
|
-
) -> Grounder:
|
|
103
|
-
"""Get a Gilda grounder for the given prefix(es)."""
|
|
104
|
-
unnamed = set() if unnamed is None else set(unnamed)
|
|
105
|
-
if isinstance(prefixes, str):
|
|
106
|
-
prefixes = [prefixes]
|
|
107
|
-
else:
|
|
108
|
-
prefixes = list(prefixes)
|
|
109
|
-
if versions is None:
|
|
110
|
-
versions = [None] * len(prefixes)
|
|
111
|
-
elif isinstance(versions, str):
|
|
112
|
-
versions = [versions]
|
|
113
|
-
elif isinstance(versions, dict):
|
|
114
|
-
versions = [versions.get(prefix) for prefix in prefixes]
|
|
115
|
-
else:
|
|
116
|
-
versions = list(versions)
|
|
117
|
-
if len(prefixes) != len(versions):
|
|
118
|
-
raise ValueError
|
|
119
|
-
|
|
120
|
-
terms: list[gilda.term.Term] = []
|
|
121
|
-
for prefix, version in zip(tqdm(prefixes, leave=False, disable=not progress), versions):
|
|
122
|
-
try:
|
|
123
|
-
p_terms = list(
|
|
124
|
-
get_gilda_terms(
|
|
125
|
-
prefix,
|
|
126
|
-
identifiers_are_names=prefix in unnamed,
|
|
127
|
-
version=version,
|
|
128
|
-
strict=strict,
|
|
129
|
-
skip_obsolete=skip_obsolete,
|
|
130
|
-
progress=progress,
|
|
131
|
-
)
|
|
132
|
-
)
|
|
133
|
-
except (NoBuildError, CalledProcessError):
|
|
134
|
-
continue
|
|
135
|
-
else:
|
|
136
|
-
terms.extend(p_terms)
|
|
137
|
-
terms = filter_out_duplicates(terms)
|
|
138
|
-
terms_dict = multidict((term.norm_text, term) for term in terms)
|
|
139
|
-
if grounder_cls is None:
|
|
140
|
-
return Grounder(terms_dict)
|
|
141
|
-
else:
|
|
142
|
-
return grounder_cls(terms_dict)
|
|
97
|
+
def normalize_identifier(prefix: str, identifier: str) -> str:
|
|
98
|
+
"""Normalize the identifier."""
|
|
99
|
+
warnings.warn(
|
|
100
|
+
"normalization to MIRIAM is deprecated, please update to using Bioregistry standard identifiers",
|
|
101
|
+
DeprecationWarning,
|
|
102
|
+
stacklevel=2,
|
|
103
|
+
)
|
|
104
|
+
return _normalize_identifier(prefix, identifier)
|
|
143
105
|
|
|
144
106
|
|
|
145
|
-
def
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
identifier: str,
|
|
150
|
-
name: str,
|
|
151
|
-
status: str,
|
|
152
|
-
organism: str | None = None,
|
|
153
|
-
) -> gilda.term.Term | None:
|
|
154
|
-
try:
|
|
155
|
-
term = gilda.term.Term(
|
|
156
|
-
norm_text=normalize(text),
|
|
157
|
-
text=text,
|
|
158
|
-
db=prefix,
|
|
159
|
-
id=identifier,
|
|
160
|
-
entry_name=name,
|
|
161
|
-
status=status,
|
|
162
|
-
source=prefix,
|
|
163
|
-
organism=organism,
|
|
164
|
-
)
|
|
165
|
-
except ValueError:
|
|
166
|
-
return None
|
|
167
|
-
return term
|
|
107
|
+
def get_grounder(*args: Any, **kwargs: Any) -> gilda.Grounder:
|
|
108
|
+
"""Get a grounder."""
|
|
109
|
+
warnings.warn("use pyobo.ner.get_grounder", DeprecationWarning, stacklevel=2)
|
|
110
|
+
import pyobo.ner
|
|
168
111
|
|
|
112
|
+
grounder = cast(ssslm.ner.GildaGrounder, pyobo.get_grounder(*args, **kwargs))
|
|
113
|
+
return grounder._grounder
|
|
169
114
|
|
|
170
|
-
def get_gilda_terms(
|
|
171
|
-
prefix: str,
|
|
172
|
-
*,
|
|
173
|
-
identifiers_are_names: bool = False,
|
|
174
|
-
version: str | None = None,
|
|
175
|
-
strict: bool = True,
|
|
176
|
-
skip_obsolete: bool = False,
|
|
177
|
-
progress: bool = True,
|
|
178
|
-
) -> Iterable[gilda.term.Term]:
|
|
179
|
-
"""Get gilda terms for the given namespace."""
|
|
180
|
-
id_to_name = get_id_name_mapping(prefix, version=version, strict=strict)
|
|
181
|
-
id_to_species = get_id_species_mapping(prefix, version=version, strict=strict)
|
|
182
|
-
obsoletes = get_obsolete(prefix, version=version, strict=strict) if skip_obsolete else set()
|
|
183
115
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
116
|
+
def get_gilda_terms(prefix: str, *, skip_obsolete: bool = False, **kwargs) -> Iterable[gilda.Term]:
|
|
117
|
+
"""Get gilda terms."""
|
|
118
|
+
warnings.warn(
|
|
119
|
+
"use pyobo.get_literal_mappings() directly and convert to gilda yourself",
|
|
120
|
+
DeprecationWarning,
|
|
121
|
+
stacklevel=2,
|
|
122
|
+
)
|
|
123
|
+
yield from literal_mappings_to_gilda(
|
|
124
|
+
get_literal_mappings(prefix, skip_obsolete=skip_obsolete, **kwargs)
|
|
190
125
|
)
|
|
191
|
-
for identifier, name in it:
|
|
192
|
-
if identifier in obsoletes:
|
|
193
|
-
continue
|
|
194
|
-
term = _fast_term(
|
|
195
|
-
text=name,
|
|
196
|
-
prefix=prefix,
|
|
197
|
-
identifier=identifier,
|
|
198
|
-
name=name,
|
|
199
|
-
status="name",
|
|
200
|
-
organism=id_to_species.get(identifier),
|
|
201
|
-
)
|
|
202
|
-
if term is not None:
|
|
203
|
-
yield term
|
|
204
|
-
|
|
205
|
-
id_to_synonyms = get_id_synonyms_mapping(prefix, version=version)
|
|
206
|
-
if id_to_synonyms:
|
|
207
|
-
it = tqdm(
|
|
208
|
-
id_to_synonyms.items(),
|
|
209
|
-
desc=f"[{prefix}] mapping",
|
|
210
|
-
unit_scale=True,
|
|
211
|
-
unit="synonym",
|
|
212
|
-
disable=not progress,
|
|
213
|
-
)
|
|
214
|
-
for identifier, synonyms in it:
|
|
215
|
-
if identifier in obsoletes:
|
|
216
|
-
continue
|
|
217
|
-
name = id_to_name[identifier]
|
|
218
|
-
for synonym in synonyms:
|
|
219
|
-
if not synonym:
|
|
220
|
-
continue
|
|
221
|
-
term = _fast_term(
|
|
222
|
-
text=synonym,
|
|
223
|
-
prefix=prefix,
|
|
224
|
-
identifier=identifier,
|
|
225
|
-
name=name,
|
|
226
|
-
status="synonym",
|
|
227
|
-
organism=id_to_species.get(identifier),
|
|
228
|
-
)
|
|
229
|
-
if term is not None:
|
|
230
|
-
yield term
|
|
231
|
-
|
|
232
|
-
if identifiers_are_names:
|
|
233
|
-
it = tqdm(
|
|
234
|
-
get_ids(prefix),
|
|
235
|
-
desc=f"[{prefix}] mapping",
|
|
236
|
-
unit_scale=True,
|
|
237
|
-
unit="id",
|
|
238
|
-
disable=not progress,
|
|
239
|
-
)
|
|
240
|
-
for identifier in it:
|
|
241
|
-
if identifier in obsoletes:
|
|
242
|
-
continue
|
|
243
|
-
term = _fast_term(
|
|
244
|
-
text=identifier,
|
|
245
|
-
prefix=prefix,
|
|
246
|
-
identifier=identifier,
|
|
247
|
-
name=identifier,
|
|
248
|
-
status="name",
|
|
249
|
-
organism=id_to_species.get(identifier),
|
|
250
|
-
)
|
|
251
|
-
if term is not None:
|
|
252
|
-
yield term
|
|
253
126
|
|
|
254
127
|
|
|
255
128
|
def get_gilda_term_subset(
|
|
256
|
-
source: str,
|
|
257
|
-
|
|
129
|
+
source: str,
|
|
130
|
+
ancestors: str | Sequence[str],
|
|
131
|
+
*,
|
|
132
|
+
skip_obsolete: bool = False,
|
|
133
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
134
|
+
) -> Iterable[gilda.Term]:
|
|
258
135
|
"""Get a subset of terms."""
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
136
|
+
warnings.warn(
|
|
137
|
+
"use pyobo.get_literal_mappings_subset() directly and convert to gilda yourself",
|
|
138
|
+
DeprecationWarning,
|
|
139
|
+
stacklevel=2,
|
|
140
|
+
)
|
|
141
|
+
if isinstance(ancestors, str):
|
|
142
|
+
ancestors = [ancestors]
|
|
143
|
+
|
|
144
|
+
yield from literal_mappings_to_gilda(
|
|
145
|
+
get_literal_mappings_subset(
|
|
146
|
+
source,
|
|
147
|
+
ancestors=[Reference.from_curie(a) for a in ancestors],
|
|
148
|
+
skip_obsolete=skip_obsolete,
|
|
149
|
+
**kwargs,
|
|
150
|
+
)
|
|
151
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Extract registry information."""
|
|
2
|
+
|
|
3
|
+
from .api import (
|
|
4
|
+
BlacklistedError,
|
|
5
|
+
DefaultCoercionError,
|
|
6
|
+
EmptyStringError,
|
|
7
|
+
NotCURIEError,
|
|
8
|
+
ParseError,
|
|
9
|
+
ParseValidationError,
|
|
10
|
+
UnparsableIRIError,
|
|
11
|
+
UnregisteredPrefixError,
|
|
12
|
+
_is_valid_identifier,
|
|
13
|
+
_parse_str_or_curie_or_uri_helper,
|
|
14
|
+
standardize_ec,
|
|
15
|
+
wrap_norm_prefix,
|
|
16
|
+
)
|
|
17
|
+
from .preprocessing import (
|
|
18
|
+
remap_full,
|
|
19
|
+
remap_prefix,
|
|
20
|
+
str_is_blacklisted,
|
|
21
|
+
)
|
|
22
|
+
from .relations import ground_relation
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"BlacklistedError",
|
|
26
|
+
"DefaultCoercionError",
|
|
27
|
+
"EmptyStringError",
|
|
28
|
+
"NotCURIEError",
|
|
29
|
+
"ParseError",
|
|
30
|
+
"ParseValidationError",
|
|
31
|
+
"UnparsableIRIError",
|
|
32
|
+
"UnregisteredPrefixError",
|
|
33
|
+
"_is_valid_identifier",
|
|
34
|
+
"_parse_str_or_curie_or_uri_helper",
|
|
35
|
+
"ground_relation",
|
|
36
|
+
"remap_full",
|
|
37
|
+
"remap_prefix",
|
|
38
|
+
"standardize_ec",
|
|
39
|
+
"str_is_blacklisted",
|
|
40
|
+
"wrap_norm_prefix",
|
|
41
|
+
]
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""Utilities for handling prefixes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from functools import wraps
|
|
7
|
+
from typing import Annotated, ClassVar
|
|
8
|
+
|
|
9
|
+
import bioregistry
|
|
10
|
+
import click
|
|
11
|
+
from bioregistry import NormalizedNamableReference as Reference
|
|
12
|
+
from bioregistry.constants import FailureReturnType
|
|
13
|
+
from curies import ReferenceTuple
|
|
14
|
+
from pydantic import ValidationError
|
|
15
|
+
from typing_extensions import Doc
|
|
16
|
+
|
|
17
|
+
from .preprocessing import remap_full, remap_prefix, str_is_blacklisted
|
|
18
|
+
from .relations import ground_relation
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"BlacklistedError",
|
|
22
|
+
"DefaultCoercionError",
|
|
23
|
+
"EmptyStringError",
|
|
24
|
+
"NotCURIEError",
|
|
25
|
+
"ParseError",
|
|
26
|
+
"ParseValidationError",
|
|
27
|
+
"UnparsableIRIError",
|
|
28
|
+
"UnregisteredPrefixError",
|
|
29
|
+
"_parse_str_or_curie_or_uri_helper",
|
|
30
|
+
"standardize_ec",
|
|
31
|
+
"wrap_norm_prefix",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class BlacklistedError(ValueError):
|
|
38
|
+
"""A sentinel for blacklisted strings."""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
Line = Annotated[str | None, Doc("""The OBO line where the parsing happened""")]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ParseError(BaseException):
|
|
45
|
+
"""Raised on a missing prefix."""
|
|
46
|
+
|
|
47
|
+
message: ClassVar[str]
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
curie: str,
|
|
52
|
+
*,
|
|
53
|
+
context: str | None,
|
|
54
|
+
ontology_prefix: str | None = None,
|
|
55
|
+
node: Reference | None = None,
|
|
56
|
+
predicate: Reference | None = None,
|
|
57
|
+
line: Line = None,
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Initialize the error."""
|
|
60
|
+
self.curie = curie
|
|
61
|
+
self.context = context
|
|
62
|
+
self.ontology_prefix = ontology_prefix
|
|
63
|
+
self.node = node
|
|
64
|
+
self.predicate = predicate
|
|
65
|
+
self.line = line
|
|
66
|
+
|
|
67
|
+
def __str__(self) -> str:
|
|
68
|
+
s = ""
|
|
69
|
+
if self.node:
|
|
70
|
+
if self.predicate:
|
|
71
|
+
s += f"[{self.node.curie} - {self.predicate.curie}] "
|
|
72
|
+
else:
|
|
73
|
+
s += f"[{self.node.curie}] "
|
|
74
|
+
elif self.ontology_prefix:
|
|
75
|
+
s += f"[{self.ontology_prefix}] "
|
|
76
|
+
s += f"{self.message} {click.style(self.curie, fg='cyan')}"
|
|
77
|
+
if self.context:
|
|
78
|
+
s += f" in {self.context}"
|
|
79
|
+
if self.line and self.line != self.curie:
|
|
80
|
+
s += f" in {click.style(self.line, fg='yellow')}"
|
|
81
|
+
return s
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class ParseValidationError(ParseError):
|
|
85
|
+
"""Raised on a validation error."""
|
|
86
|
+
|
|
87
|
+
message = "failed Pydantic validation"
|
|
88
|
+
|
|
89
|
+
def __init__(self, *args, exc: ValidationError, **kwargs) -> None:
|
|
90
|
+
"""Initialize the error."""
|
|
91
|
+
super().__init__(*args, **kwargs)
|
|
92
|
+
self.exc = exc
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class UnregisteredPrefixError(ParseError):
|
|
96
|
+
"""Raised on a missing prefix."""
|
|
97
|
+
|
|
98
|
+
message = "unregistered prefix in"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class UnparsableIRIError(ParseError):
|
|
102
|
+
"""Raised on a an unparsable IRI."""
|
|
103
|
+
|
|
104
|
+
message = "couldn't parse IRI"
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class EmptyStringError(ParseError):
|
|
108
|
+
"""Raised on a an empty string."""
|
|
109
|
+
|
|
110
|
+
message = "is empty"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class NotCURIEError(ParseError):
|
|
114
|
+
"""Raised on a text that can't be parsed as a CURIE."""
|
|
115
|
+
|
|
116
|
+
message = "not a CURIE"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class DefaultCoercionError(ParseError):
|
|
120
|
+
"""Raised on a text that can't be coerced into a default reference."""
|
|
121
|
+
|
|
122
|
+
message = "can't be coerced into a default reference"
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _is_uri(s: str) -> bool:
|
|
126
|
+
return s.startswith("http:") or s.startswith("https:")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _preclean_uri(s: str) -> str:
|
|
130
|
+
s = s.strip().removeprefix(r"url\:").removeprefix(r"uri\:")
|
|
131
|
+
s = s.strip().removeprefix(r"URL\:").removeprefix(r"URI\:")
|
|
132
|
+
s = s.strip().removeprefix("url:").removeprefix("uri:")
|
|
133
|
+
s = s.removeprefix("URL:").removeprefix("URI:")
|
|
134
|
+
s = s.removeprefix("WWW:").removeprefix("www:").lstrip()
|
|
135
|
+
s = s.replace("http\\:", "http:")
|
|
136
|
+
s = s.replace("https\\:", "https:")
|
|
137
|
+
s = s.rstrip("/")
|
|
138
|
+
return s
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _parse_str_or_curie_or_uri_helper(
|
|
142
|
+
str_or_curie_or_uri: str,
|
|
143
|
+
*,
|
|
144
|
+
ontology_prefix: str | None = None,
|
|
145
|
+
node: Reference | None = None,
|
|
146
|
+
predicate: Reference | None = None,
|
|
147
|
+
upgrade: bool = True,
|
|
148
|
+
line: str | None = None,
|
|
149
|
+
name: str | None = None,
|
|
150
|
+
context: str | None = None,
|
|
151
|
+
) -> Reference | ParseError | BlacklistedError:
|
|
152
|
+
"""Parse a string that looks like a CURIE.
|
|
153
|
+
|
|
154
|
+
:param str_or_curie_or_uri: A compact uniform resource identifier (CURIE)
|
|
155
|
+
:param ontology_prefix: The ontology in which the CURIE appears
|
|
156
|
+
|
|
157
|
+
:returns: A parse tuple or a tuple of None, None if not able to parse and not strict
|
|
158
|
+
|
|
159
|
+
- Normalizes the namespace
|
|
160
|
+
- Checks against a blacklist for the entire curie, for the namespace, and for
|
|
161
|
+
suffixes.
|
|
162
|
+
"""
|
|
163
|
+
str_or_curie_or_uri = _preclean_uri(str_or_curie_or_uri)
|
|
164
|
+
if not str_or_curie_or_uri:
|
|
165
|
+
return EmptyStringError(
|
|
166
|
+
str_or_curie_or_uri,
|
|
167
|
+
ontology_prefix=ontology_prefix,
|
|
168
|
+
node=node,
|
|
169
|
+
predicate=predicate,
|
|
170
|
+
line=line,
|
|
171
|
+
context=context,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
if upgrade:
|
|
175
|
+
# Remap the curie with the full list
|
|
176
|
+
if r1 := remap_full(str_or_curie_or_uri, ontology_prefix=ontology_prefix):
|
|
177
|
+
return r1
|
|
178
|
+
|
|
179
|
+
# Remap node's prefix (if necessary)
|
|
180
|
+
str_or_curie_or_uri = remap_prefix(str_or_curie_or_uri, ontology_prefix=ontology_prefix)
|
|
181
|
+
|
|
182
|
+
if r2 := ground_relation(str_or_curie_or_uri):
|
|
183
|
+
return r2
|
|
184
|
+
|
|
185
|
+
if str_is_blacklisted(str_or_curie_or_uri, ontology_prefix=ontology_prefix):
|
|
186
|
+
return BlacklistedError()
|
|
187
|
+
|
|
188
|
+
if _is_uri(str_or_curie_or_uri):
|
|
189
|
+
rt = bioregistry.parse_iri(
|
|
190
|
+
str_or_curie_or_uri, on_failure_return_type=FailureReturnType.single
|
|
191
|
+
)
|
|
192
|
+
if rt is None:
|
|
193
|
+
return UnparsableIRIError(
|
|
194
|
+
str_or_curie_or_uri,
|
|
195
|
+
ontology_prefix=ontology_prefix,
|
|
196
|
+
node=node,
|
|
197
|
+
predicate=predicate,
|
|
198
|
+
line=line,
|
|
199
|
+
context=context,
|
|
200
|
+
)
|
|
201
|
+
try:
|
|
202
|
+
rv = Reference.model_validate(
|
|
203
|
+
{"prefix": rt.prefix, "identifier": rt.identifier, "name": name}
|
|
204
|
+
)
|
|
205
|
+
except ValidationError as exc:
|
|
206
|
+
return ParseValidationError(
|
|
207
|
+
str_or_curie_or_uri,
|
|
208
|
+
ontology_prefix=ontology_prefix,
|
|
209
|
+
node=node,
|
|
210
|
+
predicate=predicate,
|
|
211
|
+
line=line,
|
|
212
|
+
context=context,
|
|
213
|
+
exc=exc,
|
|
214
|
+
)
|
|
215
|
+
else:
|
|
216
|
+
return rv
|
|
217
|
+
|
|
218
|
+
prefix, delimiter, identifier = str_or_curie_or_uri.partition(":")
|
|
219
|
+
if not delimiter:
|
|
220
|
+
return NotCURIEError(
|
|
221
|
+
str_or_curie_or_uri,
|
|
222
|
+
ontology_prefix=ontology_prefix,
|
|
223
|
+
node=node,
|
|
224
|
+
predicate=predicate,
|
|
225
|
+
line=line,
|
|
226
|
+
context=context,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
norm_node_prefix = bioregistry.normalize_prefix(prefix)
|
|
230
|
+
if not norm_node_prefix:
|
|
231
|
+
return UnregisteredPrefixError(
|
|
232
|
+
str_or_curie_or_uri,
|
|
233
|
+
ontology_prefix=ontology_prefix,
|
|
234
|
+
node=node,
|
|
235
|
+
predicate=predicate,
|
|
236
|
+
line=line,
|
|
237
|
+
context=context,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
identifier = bioregistry.standardize_identifier(norm_node_prefix, identifier)
|
|
241
|
+
try:
|
|
242
|
+
rv = Reference.model_validate(
|
|
243
|
+
{"prefix": norm_node_prefix, "identifier": identifier, "name": name}
|
|
244
|
+
)
|
|
245
|
+
except ValidationError as exc:
|
|
246
|
+
return ParseValidationError(
|
|
247
|
+
str_or_curie_or_uri,
|
|
248
|
+
ontology_prefix=ontology_prefix,
|
|
249
|
+
node=node,
|
|
250
|
+
predicate=predicate,
|
|
251
|
+
line=line,
|
|
252
|
+
exc=exc,
|
|
253
|
+
context=context,
|
|
254
|
+
)
|
|
255
|
+
else:
|
|
256
|
+
return rv
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def wrap_norm_prefix(f):
|
|
260
|
+
"""Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
|
|
261
|
+
|
|
262
|
+
@wraps(f)
|
|
263
|
+
def _wrapped(prefix: str | Reference | ReferenceTuple, *args, **kwargs):
|
|
264
|
+
if isinstance(prefix, str):
|
|
265
|
+
norm_prefix = bioregistry.normalize_prefix(prefix)
|
|
266
|
+
if norm_prefix is None:
|
|
267
|
+
raise ValueError(f"Invalid prefix: {prefix}")
|
|
268
|
+
prefix = norm_prefix
|
|
269
|
+
elif isinstance(prefix, Reference):
|
|
270
|
+
norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
|
|
271
|
+
if norm_prefix is None:
|
|
272
|
+
raise ValueError(f"Invalid prefix: {prefix.prefix}")
|
|
273
|
+
prefix = Reference(prefix=norm_prefix, identifier=prefix.identifier)
|
|
274
|
+
elif isinstance(prefix, ReferenceTuple):
|
|
275
|
+
norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
|
|
276
|
+
if norm_prefix is None:
|
|
277
|
+
raise ValueError(f"Invalid prefix: {prefix.prefix}")
|
|
278
|
+
prefix = ReferenceTuple(norm_prefix, prefix.identifier)
|
|
279
|
+
else:
|
|
280
|
+
raise TypeError
|
|
281
|
+
return f(prefix, *args, **kwargs)
|
|
282
|
+
|
|
283
|
+
return _wrapped
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def standardize_ec(ec: str) -> str:
|
|
287
|
+
"""Standardize an EC code identifier by removing all trailing dashes and dots."""
|
|
288
|
+
ec = ec.strip().replace(" ", "")
|
|
289
|
+
for _ in range(4):
|
|
290
|
+
ec = ec.rstrip("-").rstrip(".")
|
|
291
|
+
return ec
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _is_valid_identifier(curie_or_uri: str) -> bool:
|
|
295
|
+
# TODO this needs more careful implementation
|
|
296
|
+
return bool(curie_or_uri.strip()) and " " not in curie_or_uri
|