pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -117
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +107 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +210 -160
- pyobo/cli/database_utils.py +155 -0
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +209 -191
- pyobo/gilda_utils.py +52 -250
- pyobo/identifier_utils/__init__.py +33 -0
- pyobo/identifier_utils/api.py +305 -0
- pyobo/identifier_utils/preprocessing.json +873 -0
- pyobo/identifier_utils/preprocessing.py +27 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +48 -40
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1354 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +9 -6
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +8 -13
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +11 -4
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +272 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1484 -657
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +16 -15
- pyobo/utils/io.py +51 -41
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +73 -70
- pyobo/version.py +3 -3
- pyobo-0.12.1.dist-info/METADATA +671 -0
- pyobo-0.12.1.dist-info/RECORD +201 -0
- pyobo-0.12.1.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo/xrefdb/xrefs_pipeline.py +0 -180
- pyobo-0.11.2.dist-info/METADATA +0 -711
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/gilda_utils.py
CHANGED
|
@@ -2,271 +2,73 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import
|
|
6
|
-
from collections.abc import Iterable
|
|
7
|
-
from
|
|
5
|
+
import warnings
|
|
6
|
+
from collections.abc import Iterable, Sequence
|
|
7
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
8
8
|
|
|
9
|
-
import
|
|
10
|
-
import
|
|
11
|
-
import
|
|
12
|
-
from gilda.grounder import Grounder
|
|
13
|
-
from gilda.process import normalize
|
|
14
|
-
from gilda.term import filter_out_duplicates
|
|
15
|
-
from tqdm.auto import tqdm
|
|
9
|
+
import ssslm
|
|
10
|
+
from ssslm import literal_mappings_to_gilda
|
|
11
|
+
from typing_extensions import Unpack
|
|
16
12
|
|
|
17
|
-
from pyobo import (
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
get_id_species_mapping,
|
|
21
|
-
get_id_synonyms_mapping,
|
|
22
|
-
get_ids,
|
|
23
|
-
get_obsolete,
|
|
13
|
+
from pyobo.api import (
|
|
14
|
+
get_literal_mappings,
|
|
15
|
+
get_literal_mappings_subset,
|
|
24
16
|
)
|
|
25
|
-
from pyobo.
|
|
26
|
-
from pyobo.
|
|
17
|
+
from pyobo.constants import GetOntologyKwargs
|
|
18
|
+
from pyobo.struct.reference import Reference
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
import gilda
|
|
27
22
|
|
|
28
23
|
__all__ = [
|
|
29
|
-
"
|
|
30
|
-
"get_grounder",
|
|
24
|
+
"get_gilda_term_subset",
|
|
31
25
|
"get_gilda_terms",
|
|
26
|
+
"get_grounder",
|
|
32
27
|
]
|
|
33
28
|
|
|
34
|
-
logger = logging.getLogger(__name__)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def iter_gilda_prediction_tuples(
|
|
38
|
-
prefix: str,
|
|
39
|
-
relation: str = "skos:exactMatch",
|
|
40
|
-
*,
|
|
41
|
-
grounder: Grounder | None = None,
|
|
42
|
-
identifiers_are_names: bool = False,
|
|
43
|
-
strict: bool = False,
|
|
44
|
-
) -> Iterable[tuple[str, str, str, str, str, str, str, str, float]]:
|
|
45
|
-
"""Iterate over prediction tuples for a given prefix."""
|
|
46
|
-
if grounder is None:
|
|
47
|
-
grounder = gilda.api.grounder
|
|
48
|
-
id_name_mapping = get_id_name_mapping(prefix, strict=strict)
|
|
49
|
-
it = tqdm(
|
|
50
|
-
id_name_mapping.items(), desc=f"[{prefix}] gilda tuples", unit_scale=True, unit="name"
|
|
51
|
-
)
|
|
52
|
-
for identifier, name in it:
|
|
53
|
-
for scored_match in grounder.ground(name):
|
|
54
|
-
target_prefix = scored_match.term.db.lower()
|
|
55
|
-
yield (
|
|
56
|
-
prefix,
|
|
57
|
-
normalize_identifier(prefix, identifier),
|
|
58
|
-
name,
|
|
59
|
-
relation,
|
|
60
|
-
target_prefix,
|
|
61
|
-
normalize_identifier(target_prefix, scored_match.term.id),
|
|
62
|
-
scored_match.term.entry_name,
|
|
63
|
-
"semapv:LexicalMatching",
|
|
64
|
-
round(scored_match.score, 3),
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
if identifiers_are_names:
|
|
68
|
-
it = tqdm(get_ids(prefix), desc=f"[{prefix}] gilda tuples", unit_scale=True, unit="id")
|
|
69
|
-
for identifier in it:
|
|
70
|
-
for scored_match in grounder.ground(identifier):
|
|
71
|
-
target_prefix = scored_match.term.db.lower()
|
|
72
|
-
yield (
|
|
73
|
-
prefix,
|
|
74
|
-
normalize_identifier(prefix, identifier),
|
|
75
|
-
identifier,
|
|
76
|
-
relation,
|
|
77
|
-
target_prefix,
|
|
78
|
-
normalize_identifier(target_prefix, scored_match.term.id),
|
|
79
|
-
scored_match.term.entry_name,
|
|
80
|
-
"semapv:LexicalMatching",
|
|
81
|
-
scored_match.score,
|
|
82
|
-
)
|
|
83
29
|
|
|
30
|
+
def get_grounder(*args: Any, **kwargs: Any) -> gilda.Grounder:
|
|
31
|
+
"""Get a grounder."""
|
|
32
|
+
warnings.warn("use pyobo.ner.get_grounder", DeprecationWarning, stacklevel=2)
|
|
33
|
+
import pyobo.ner
|
|
84
34
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
resource = bioregistry.get_resource(prefix)
|
|
88
|
-
if resource is None:
|
|
89
|
-
raise KeyError
|
|
90
|
-
return resource.miriam_standardize_identifier(identifier) or identifier
|
|
35
|
+
grounder = cast(ssslm.ner.GildaGrounder, pyobo.get_grounder(*args, **kwargs))
|
|
36
|
+
return grounder._grounder
|
|
91
37
|
|
|
92
38
|
|
|
93
|
-
def
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
)
|
|
103
|
-
"""Get a Gilda grounder for the given prefix(es)."""
|
|
104
|
-
unnamed = set() if unnamed is None else set(unnamed)
|
|
105
|
-
if isinstance(prefixes, str):
|
|
106
|
-
prefixes = [prefixes]
|
|
107
|
-
else:
|
|
108
|
-
prefixes = list(prefixes)
|
|
109
|
-
if versions is None:
|
|
110
|
-
versions = [None] * len(prefixes)
|
|
111
|
-
elif isinstance(versions, str):
|
|
112
|
-
versions = [versions]
|
|
113
|
-
elif isinstance(versions, dict):
|
|
114
|
-
versions = [versions.get(prefix) for prefix in prefixes]
|
|
115
|
-
else:
|
|
116
|
-
versions = list(versions)
|
|
117
|
-
if len(prefixes) != len(versions):
|
|
118
|
-
raise ValueError
|
|
119
|
-
|
|
120
|
-
terms: list[gilda.term.Term] = []
|
|
121
|
-
for prefix, version in zip(tqdm(prefixes, leave=False, disable=not progress), versions):
|
|
122
|
-
try:
|
|
123
|
-
p_terms = list(
|
|
124
|
-
get_gilda_terms(
|
|
125
|
-
prefix,
|
|
126
|
-
identifiers_are_names=prefix in unnamed,
|
|
127
|
-
version=version,
|
|
128
|
-
strict=strict,
|
|
129
|
-
skip_obsolete=skip_obsolete,
|
|
130
|
-
progress=progress,
|
|
131
|
-
)
|
|
132
|
-
)
|
|
133
|
-
except (NoBuildError, CalledProcessError):
|
|
134
|
-
continue
|
|
135
|
-
else:
|
|
136
|
-
terms.extend(p_terms)
|
|
137
|
-
terms = filter_out_duplicates(terms)
|
|
138
|
-
terms_dict = multidict((term.norm_text, term) for term in terms)
|
|
139
|
-
if grounder_cls is None:
|
|
140
|
-
return Grounder(terms_dict)
|
|
141
|
-
else:
|
|
142
|
-
return grounder_cls(terms_dict)
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
def _fast_term(
|
|
146
|
-
*,
|
|
147
|
-
text: str,
|
|
148
|
-
prefix: str,
|
|
149
|
-
identifier: str,
|
|
150
|
-
name: str,
|
|
151
|
-
status: str,
|
|
152
|
-
organism: str | None = None,
|
|
153
|
-
) -> gilda.term.Term | None:
|
|
154
|
-
try:
|
|
155
|
-
term = gilda.term.Term(
|
|
156
|
-
norm_text=normalize(text),
|
|
157
|
-
text=text,
|
|
158
|
-
db=prefix,
|
|
159
|
-
id=identifier,
|
|
160
|
-
entry_name=name,
|
|
161
|
-
status=status,
|
|
162
|
-
source=prefix,
|
|
163
|
-
organism=organism,
|
|
164
|
-
)
|
|
165
|
-
except ValueError:
|
|
166
|
-
return None
|
|
167
|
-
return term
|
|
39
|
+
def get_gilda_terms(prefix: str, *, skip_obsolete: bool = False, **kwargs) -> Iterable[gilda.Term]:
|
|
40
|
+
"""Get gilda terms."""
|
|
41
|
+
warnings.warn(
|
|
42
|
+
"use pyobo.get_literal_mappings() directly and convert to gilda yourself",
|
|
43
|
+
DeprecationWarning,
|
|
44
|
+
stacklevel=2,
|
|
45
|
+
)
|
|
46
|
+
yield from literal_mappings_to_gilda(
|
|
47
|
+
get_literal_mappings(prefix, skip_obsolete=skip_obsolete, **kwargs)
|
|
48
|
+
)
|
|
168
49
|
|
|
169
50
|
|
|
170
|
-
def
|
|
171
|
-
|
|
51
|
+
def get_gilda_term_subset(
|
|
52
|
+
source: str,
|
|
53
|
+
ancestors: str | Sequence[str],
|
|
172
54
|
*,
|
|
173
|
-
identifiers_are_names: bool = False,
|
|
174
|
-
version: str | None = None,
|
|
175
|
-
strict: bool = True,
|
|
176
55
|
skip_obsolete: bool = False,
|
|
177
|
-
|
|
178
|
-
) -> Iterable[gilda.
|
|
179
|
-
"""Get
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
it = tqdm(
|
|
185
|
-
id_to_name.items(),
|
|
186
|
-
desc=f"[{prefix}] mapping",
|
|
187
|
-
unit_scale=True,
|
|
188
|
-
unit="name",
|
|
189
|
-
disable=not progress,
|
|
56
|
+
**kwargs: Unpack[GetOntologyKwargs],
|
|
57
|
+
) -> Iterable[gilda.Term]:
|
|
58
|
+
"""Get a subset of terms."""
|
|
59
|
+
warnings.warn(
|
|
60
|
+
"use pyobo.get_literal_mappings_subset() directly and convert to gilda yourself",
|
|
61
|
+
DeprecationWarning,
|
|
62
|
+
stacklevel=2,
|
|
190
63
|
)
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
organism=id_to_species.get(identifier),
|
|
201
|
-
)
|
|
202
|
-
if term is not None:
|
|
203
|
-
yield term
|
|
204
|
-
|
|
205
|
-
id_to_synonyms = get_id_synonyms_mapping(prefix, version=version)
|
|
206
|
-
if id_to_synonyms:
|
|
207
|
-
it = tqdm(
|
|
208
|
-
id_to_synonyms.items(),
|
|
209
|
-
desc=f"[{prefix}] mapping",
|
|
210
|
-
unit_scale=True,
|
|
211
|
-
unit="synonym",
|
|
212
|
-
disable=not progress,
|
|
213
|
-
)
|
|
214
|
-
for identifier, synonyms in it:
|
|
215
|
-
if identifier in obsoletes:
|
|
216
|
-
continue
|
|
217
|
-
name = id_to_name[identifier]
|
|
218
|
-
for synonym in synonyms:
|
|
219
|
-
if not synonym:
|
|
220
|
-
continue
|
|
221
|
-
term = _fast_term(
|
|
222
|
-
text=synonym,
|
|
223
|
-
prefix=prefix,
|
|
224
|
-
identifier=identifier,
|
|
225
|
-
name=name,
|
|
226
|
-
status="synonym",
|
|
227
|
-
organism=id_to_species.get(identifier),
|
|
228
|
-
)
|
|
229
|
-
if term is not None:
|
|
230
|
-
yield term
|
|
231
|
-
|
|
232
|
-
if identifiers_are_names:
|
|
233
|
-
it = tqdm(
|
|
234
|
-
get_ids(prefix),
|
|
235
|
-
desc=f"[{prefix}] mapping",
|
|
236
|
-
unit_scale=True,
|
|
237
|
-
unit="id",
|
|
238
|
-
disable=not progress,
|
|
64
|
+
if isinstance(ancestors, str):
|
|
65
|
+
ancestors = [ancestors]
|
|
66
|
+
|
|
67
|
+
yield from literal_mappings_to_gilda(
|
|
68
|
+
get_literal_mappings_subset(
|
|
69
|
+
source,
|
|
70
|
+
ancestors=[Reference.from_curie(a) for a in ancestors],
|
|
71
|
+
skip_obsolete=skip_obsolete,
|
|
72
|
+
**kwargs,
|
|
239
73
|
)
|
|
240
|
-
|
|
241
|
-
if identifier in obsoletes:
|
|
242
|
-
continue
|
|
243
|
-
term = _fast_term(
|
|
244
|
-
text=identifier,
|
|
245
|
-
prefix=prefix,
|
|
246
|
-
identifier=identifier,
|
|
247
|
-
name=identifier,
|
|
248
|
-
status="name",
|
|
249
|
-
organism=id_to_species.get(identifier),
|
|
250
|
-
)
|
|
251
|
-
if term is not None:
|
|
252
|
-
yield term
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
def get_gilda_term_subset(
|
|
256
|
-
source: str, ancestors: str | list[str], **kwargs
|
|
257
|
-
) -> Iterable[gilda.term.Term]:
|
|
258
|
-
"""Get a subset of terms."""
|
|
259
|
-
subset = {
|
|
260
|
-
descendant
|
|
261
|
-
for parent_curie in _ensure_list(ancestors)
|
|
262
|
-
for descendant in get_descendants(*parent_curie.split(":")) or []
|
|
263
|
-
}
|
|
264
|
-
for term in get_gilda_terms(source, **kwargs):
|
|
265
|
-
if bioregistry.curie_to_str(term.db, term.id) in subset:
|
|
266
|
-
yield term
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def _ensure_list(s: str | list[str]) -> list[str]:
|
|
270
|
-
if isinstance(s, str):
|
|
271
|
-
return [s]
|
|
272
|
-
return s
|
|
74
|
+
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Extract registry information."""
|
|
2
|
+
|
|
3
|
+
from .api import (
|
|
4
|
+
DefaultCoercionError,
|
|
5
|
+
EmptyStringError,
|
|
6
|
+
NotCURIEError,
|
|
7
|
+
ParseError,
|
|
8
|
+
ParseValidationError,
|
|
9
|
+
UnparsableIRIError,
|
|
10
|
+
UnregisteredPrefixError,
|
|
11
|
+
_is_valid_identifier,
|
|
12
|
+
_parse_str_or_curie_or_uri_helper,
|
|
13
|
+
standardize_ec,
|
|
14
|
+
wrap_norm_prefix,
|
|
15
|
+
)
|
|
16
|
+
from .preprocessing import get_rules
|
|
17
|
+
from .relations import ground_relation
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"DefaultCoercionError",
|
|
21
|
+
"EmptyStringError",
|
|
22
|
+
"NotCURIEError",
|
|
23
|
+
"ParseError",
|
|
24
|
+
"ParseValidationError",
|
|
25
|
+
"UnparsableIRIError",
|
|
26
|
+
"UnregisteredPrefixError",
|
|
27
|
+
"_is_valid_identifier",
|
|
28
|
+
"_parse_str_or_curie_or_uri_helper",
|
|
29
|
+
"get_rules",
|
|
30
|
+
"ground_relation",
|
|
31
|
+
"standardize_ec",
|
|
32
|
+
"wrap_norm_prefix",
|
|
33
|
+
]
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""Utilities for handling prefixes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from functools import lru_cache, wraps
|
|
7
|
+
from typing import Annotated, ClassVar
|
|
8
|
+
|
|
9
|
+
import bioregistry
|
|
10
|
+
import click
|
|
11
|
+
from bioregistry import NormalizedNamableReference as Reference
|
|
12
|
+
from bioregistry.constants import FailureReturnType
|
|
13
|
+
from curies import ReferenceTuple
|
|
14
|
+
from curies.preprocessing import BlocklistError, PreprocessingConverter
|
|
15
|
+
from pydantic import ValidationError
|
|
16
|
+
from typing_extensions import Doc
|
|
17
|
+
|
|
18
|
+
from .preprocessing import get_rules
|
|
19
|
+
from .relations import ground_relation
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"DefaultCoercionError",
|
|
23
|
+
"EmptyStringError",
|
|
24
|
+
"NotCURIEError",
|
|
25
|
+
"ParseError",
|
|
26
|
+
"ParseValidationError",
|
|
27
|
+
"UnparsableIRIError",
|
|
28
|
+
"UnregisteredPrefixError",
|
|
29
|
+
"_parse_str_or_curie_or_uri_helper",
|
|
30
|
+
"standardize_ec",
|
|
31
|
+
"wrap_norm_prefix",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
Line = Annotated[str | None, Doc("""The OBO line where the parsing happened""")]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ParseError(BaseException):
|
|
41
|
+
"""Raised on a missing prefix."""
|
|
42
|
+
|
|
43
|
+
message: ClassVar[str]
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
curie: str,
|
|
48
|
+
*,
|
|
49
|
+
context: str | None,
|
|
50
|
+
ontology_prefix: str | None = None,
|
|
51
|
+
node: Reference | None = None,
|
|
52
|
+
predicate: Reference | None = None,
|
|
53
|
+
line: Line = None,
|
|
54
|
+
) -> None:
|
|
55
|
+
"""Initialize the error."""
|
|
56
|
+
self.curie = curie
|
|
57
|
+
self.context = context
|
|
58
|
+
self.ontology_prefix = ontology_prefix
|
|
59
|
+
self.node = node
|
|
60
|
+
self.predicate = predicate
|
|
61
|
+
self.line = line
|
|
62
|
+
|
|
63
|
+
def __str__(self) -> str:
|
|
64
|
+
s = ""
|
|
65
|
+
if self.node:
|
|
66
|
+
if self.predicate:
|
|
67
|
+
s += f"[{self.node.curie} - {self.predicate.curie}] "
|
|
68
|
+
else:
|
|
69
|
+
s += f"[{self.node.curie}] "
|
|
70
|
+
elif self.ontology_prefix:
|
|
71
|
+
s += f"[{self.ontology_prefix}] "
|
|
72
|
+
s += f"{self.message} {click.style(self.curie, fg='cyan')}"
|
|
73
|
+
if self.context:
|
|
74
|
+
s += f" in {self.context}"
|
|
75
|
+
if self.line and self.line != self.curie:
|
|
76
|
+
s += f" in {click.style(self.line, fg='yellow')}"
|
|
77
|
+
return s
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class ParseValidationError(ParseError):
|
|
81
|
+
"""Raised on a validation error."""
|
|
82
|
+
|
|
83
|
+
message = "failed Pydantic validation"
|
|
84
|
+
|
|
85
|
+
def __init__(self, *args, exc: ValidationError, **kwargs) -> None:
|
|
86
|
+
"""Initialize the error."""
|
|
87
|
+
super().__init__(*args, **kwargs)
|
|
88
|
+
self.exc = exc
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class UnregisteredPrefixError(ParseError):
|
|
92
|
+
"""Raised on a missing prefix."""
|
|
93
|
+
|
|
94
|
+
message = "unregistered prefix in"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class UnparsableIRIError(ParseError):
|
|
98
|
+
"""Raised on a an unparsable IRI."""
|
|
99
|
+
|
|
100
|
+
message = "couldn't parse IRI"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class EmptyStringError(ParseError):
|
|
104
|
+
"""Raised on a an empty string."""
|
|
105
|
+
|
|
106
|
+
message = "is empty"
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class NotCURIEError(ParseError):
|
|
110
|
+
"""Raised on a text that can't be parsed as a CURIE."""
|
|
111
|
+
|
|
112
|
+
message = "not a CURIE"
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class DefaultCoercionError(ParseError):
|
|
116
|
+
"""Raised on a text that can't be coerced into a default reference."""
|
|
117
|
+
|
|
118
|
+
message = "can't be coerced into a default reference"
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _is_uri(s: str) -> bool:
|
|
122
|
+
return s.startswith("http:") or s.startswith("https:")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _preclean_uri(s: str) -> str:
|
|
126
|
+
s = s.strip().removeprefix(r"url\:").removeprefix(r"uri\:")
|
|
127
|
+
s = s.strip().removeprefix(r"URL\:").removeprefix(r"URI\:")
|
|
128
|
+
s = s.strip().removeprefix("url:").removeprefix("uri:")
|
|
129
|
+
s = s.removeprefix("URL:").removeprefix("URI:")
|
|
130
|
+
s = s.removeprefix("WWW:").removeprefix("www:").lstrip()
|
|
131
|
+
s = s.replace("http\\:", "http:")
|
|
132
|
+
s = s.replace("https\\:", "https:")
|
|
133
|
+
s = s.rstrip("/")
|
|
134
|
+
return s
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@lru_cache(1)
|
|
138
|
+
def _get_converter() -> PreprocessingConverter:
|
|
139
|
+
return PreprocessingConverter(
|
|
140
|
+
converter=bioregistry.manager.converter,
|
|
141
|
+
rules=get_rules(),
|
|
142
|
+
preclean=_preclean_uri,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _parse_str_or_curie_or_uri_helper(
|
|
147
|
+
str_or_curie_or_uri: str,
|
|
148
|
+
*,
|
|
149
|
+
ontology_prefix: str | None = None,
|
|
150
|
+
node: Reference | None = None,
|
|
151
|
+
predicate: Reference | None = None,
|
|
152
|
+
upgrade: bool = True,
|
|
153
|
+
line: str | None = None,
|
|
154
|
+
name: str | None = None,
|
|
155
|
+
context: str | None = None,
|
|
156
|
+
) -> Reference | ParseError | BlocklistError:
|
|
157
|
+
"""Parse a string that looks like a CURIE.
|
|
158
|
+
|
|
159
|
+
:param str_or_curie_or_uri: A compact uniform resource identifier (CURIE)
|
|
160
|
+
:param ontology_prefix: The ontology in which the CURIE appears
|
|
161
|
+
|
|
162
|
+
:returns: A parse tuple or a tuple of None, None if not able to parse and not strict
|
|
163
|
+
|
|
164
|
+
- Normalizes the namespace
|
|
165
|
+
- Checks against a blacklist for the entire curie, for the namespace, and for
|
|
166
|
+
suffixes.
|
|
167
|
+
"""
|
|
168
|
+
str_or_curie_or_uri = _preclean_uri(str_or_curie_or_uri)
|
|
169
|
+
if not str_or_curie_or_uri:
|
|
170
|
+
return EmptyStringError(
|
|
171
|
+
str_or_curie_or_uri,
|
|
172
|
+
ontology_prefix=ontology_prefix,
|
|
173
|
+
node=node,
|
|
174
|
+
predicate=predicate,
|
|
175
|
+
line=line,
|
|
176
|
+
context=context,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
rules = get_rules()
|
|
180
|
+
|
|
181
|
+
if upgrade:
|
|
182
|
+
# Remap the curie with the full list
|
|
183
|
+
if r1 := rules.remap_full(
|
|
184
|
+
str_or_curie_or_uri, reference_cls=Reference, context=ontology_prefix
|
|
185
|
+
):
|
|
186
|
+
return r1
|
|
187
|
+
|
|
188
|
+
# Remap node's prefix (if necessary)
|
|
189
|
+
str_or_curie_or_uri = rules.remap_prefix(str_or_curie_or_uri, context=ontology_prefix)
|
|
190
|
+
|
|
191
|
+
if r2 := ground_relation(str_or_curie_or_uri):
|
|
192
|
+
return r2
|
|
193
|
+
|
|
194
|
+
if rules.str_is_blocked(str_or_curie_or_uri, context=ontology_prefix):
|
|
195
|
+
return BlocklistError()
|
|
196
|
+
|
|
197
|
+
if _is_uri(str_or_curie_or_uri):
|
|
198
|
+
rt = bioregistry.parse_iri(
|
|
199
|
+
str_or_curie_or_uri, on_failure_return_type=FailureReturnType.single
|
|
200
|
+
)
|
|
201
|
+
if rt is None:
|
|
202
|
+
return UnparsableIRIError(
|
|
203
|
+
str_or_curie_or_uri,
|
|
204
|
+
ontology_prefix=ontology_prefix,
|
|
205
|
+
node=node,
|
|
206
|
+
predicate=predicate,
|
|
207
|
+
line=line,
|
|
208
|
+
context=context,
|
|
209
|
+
)
|
|
210
|
+
try:
|
|
211
|
+
rv = Reference.model_validate(
|
|
212
|
+
{"prefix": rt.prefix, "identifier": rt.identifier, "name": name}
|
|
213
|
+
)
|
|
214
|
+
except ValidationError as exc:
|
|
215
|
+
return ParseValidationError(
|
|
216
|
+
str_or_curie_or_uri,
|
|
217
|
+
ontology_prefix=ontology_prefix,
|
|
218
|
+
node=node,
|
|
219
|
+
predicate=predicate,
|
|
220
|
+
line=line,
|
|
221
|
+
context=context,
|
|
222
|
+
exc=exc,
|
|
223
|
+
)
|
|
224
|
+
else:
|
|
225
|
+
return rv
|
|
226
|
+
|
|
227
|
+
prefix, delimiter, identifier = str_or_curie_or_uri.partition(":")
|
|
228
|
+
if not delimiter:
|
|
229
|
+
return NotCURIEError(
|
|
230
|
+
str_or_curie_or_uri,
|
|
231
|
+
ontology_prefix=ontology_prefix,
|
|
232
|
+
node=node,
|
|
233
|
+
predicate=predicate,
|
|
234
|
+
line=line,
|
|
235
|
+
context=context,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
norm_node_prefix = bioregistry.normalize_prefix(prefix)
|
|
239
|
+
if not norm_node_prefix:
|
|
240
|
+
return UnregisteredPrefixError(
|
|
241
|
+
str_or_curie_or_uri,
|
|
242
|
+
ontology_prefix=ontology_prefix,
|
|
243
|
+
node=node,
|
|
244
|
+
predicate=predicate,
|
|
245
|
+
line=line,
|
|
246
|
+
context=context,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
identifier = bioregistry.standardize_identifier(norm_node_prefix, identifier)
|
|
250
|
+
try:
|
|
251
|
+
rv = Reference.model_validate(
|
|
252
|
+
{"prefix": norm_node_prefix, "identifier": identifier, "name": name}
|
|
253
|
+
)
|
|
254
|
+
except ValidationError as exc:
|
|
255
|
+
return ParseValidationError(
|
|
256
|
+
str_or_curie_or_uri,
|
|
257
|
+
ontology_prefix=ontology_prefix,
|
|
258
|
+
node=node,
|
|
259
|
+
predicate=predicate,
|
|
260
|
+
line=line,
|
|
261
|
+
exc=exc,
|
|
262
|
+
context=context,
|
|
263
|
+
)
|
|
264
|
+
else:
|
|
265
|
+
return rv
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def wrap_norm_prefix(f):
|
|
269
|
+
"""Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
|
|
270
|
+
|
|
271
|
+
@wraps(f)
|
|
272
|
+
def _wrapped(prefix: str | Reference | ReferenceTuple, *args, **kwargs):
|
|
273
|
+
if isinstance(prefix, str):
|
|
274
|
+
norm_prefix = bioregistry.normalize_prefix(prefix)
|
|
275
|
+
if norm_prefix is None:
|
|
276
|
+
raise ValueError(f"Invalid prefix: {prefix}")
|
|
277
|
+
prefix = norm_prefix
|
|
278
|
+
elif isinstance(prefix, Reference):
|
|
279
|
+
norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
|
|
280
|
+
if norm_prefix is None:
|
|
281
|
+
raise ValueError(f"Invalid prefix: {prefix.prefix}")
|
|
282
|
+
prefix = Reference(prefix=norm_prefix, identifier=prefix.identifier)
|
|
283
|
+
elif isinstance(prefix, ReferenceTuple):
|
|
284
|
+
norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
|
|
285
|
+
if norm_prefix is None:
|
|
286
|
+
raise ValueError(f"Invalid prefix: {prefix.prefix}")
|
|
287
|
+
prefix = ReferenceTuple(norm_prefix, prefix.identifier)
|
|
288
|
+
else:
|
|
289
|
+
raise TypeError
|
|
290
|
+
return f(prefix, *args, **kwargs)
|
|
291
|
+
|
|
292
|
+
return _wrapped
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def standardize_ec(ec: str) -> str:
|
|
296
|
+
"""Standardize an EC code identifier by removing all trailing dashes and dots."""
|
|
297
|
+
ec = ec.strip().replace(" ", "")
|
|
298
|
+
for _ in range(4):
|
|
299
|
+
ec = ec.rstrip("-").rstrip(".")
|
|
300
|
+
return ec
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _is_valid_identifier(curie_or_uri: str) -> bool:
|
|
304
|
+
# TODO this needs more careful implementation
|
|
305
|
+
return bool(curie_or_uri.strip()) and " " not in curie_or_uri
|