pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -117
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +107 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +210 -160
  20. pyobo/cli/database_utils.py +155 -0
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +209 -191
  25. pyobo/gilda_utils.py +52 -250
  26. pyobo/identifier_utils/__init__.py +33 -0
  27. pyobo/identifier_utils/api.py +305 -0
  28. pyobo/identifier_utils/preprocessing.json +873 -0
  29. pyobo/identifier_utils/preprocessing.py +27 -0
  30. pyobo/identifier_utils/relations/__init__.py +8 -0
  31. pyobo/identifier_utils/relations/api.py +162 -0
  32. pyobo/identifier_utils/relations/data.json +5824 -0
  33. pyobo/identifier_utils/relations/data_owl.json +57 -0
  34. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  35. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  36. pyobo/mocks.py +9 -6
  37. pyobo/ner/__init__.py +9 -0
  38. pyobo/ner/api.py +72 -0
  39. pyobo/ner/normalizer.py +33 -0
  40. pyobo/obographs.py +48 -40
  41. pyobo/plugins.py +5 -4
  42. pyobo/py.typed +0 -0
  43. pyobo/reader.py +1354 -395
  44. pyobo/reader_utils.py +155 -0
  45. pyobo/resource_utils.py +42 -22
  46. pyobo/resources/__init__.py +0 -0
  47. pyobo/resources/goc.py +75 -0
  48. pyobo/resources/goc.tsv +188 -0
  49. pyobo/resources/ncbitaxon.py +4 -5
  50. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  51. pyobo/resources/ro.py +3 -2
  52. pyobo/resources/ro.tsv +0 -0
  53. pyobo/resources/so.py +0 -0
  54. pyobo/resources/so.tsv +0 -0
  55. pyobo/sources/README.md +12 -8
  56. pyobo/sources/__init__.py +52 -29
  57. pyobo/sources/agrovoc.py +0 -0
  58. pyobo/sources/antibodyregistry.py +11 -12
  59. pyobo/sources/bigg/__init__.py +13 -0
  60. pyobo/sources/bigg/bigg_compartment.py +81 -0
  61. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  62. pyobo/sources/bigg/bigg_model.py +46 -0
  63. pyobo/sources/bigg/bigg_reaction.py +77 -0
  64. pyobo/sources/biogrid.py +1 -2
  65. pyobo/sources/ccle.py +7 -12
  66. pyobo/sources/cgnc.py +9 -6
  67. pyobo/sources/chebi.py +1 -1
  68. pyobo/sources/chembl/__init__.py +9 -0
  69. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  70. pyobo/sources/chembl/chembl_target.py +160 -0
  71. pyobo/sources/civic_gene.py +55 -15
  72. pyobo/sources/clinicaltrials.py +160 -0
  73. pyobo/sources/complexportal.py +24 -24
  74. pyobo/sources/conso.py +14 -22
  75. pyobo/sources/cpt.py +0 -0
  76. pyobo/sources/credit.py +1 -9
  77. pyobo/sources/cvx.py +27 -5
  78. pyobo/sources/depmap.py +9 -12
  79. pyobo/sources/dictybase_gene.py +2 -7
  80. pyobo/sources/drugbank/__init__.py +9 -0
  81. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  82. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  83. pyobo/sources/drugcentral.py +17 -13
  84. pyobo/sources/expasy.py +31 -34
  85. pyobo/sources/famplex.py +13 -18
  86. pyobo/sources/flybase.py +8 -13
  87. pyobo/sources/gard.py +62 -0
  88. pyobo/sources/geonames/__init__.py +9 -0
  89. pyobo/sources/geonames/features.py +28 -0
  90. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  91. pyobo/sources/geonames/utils.py +115 -0
  92. pyobo/sources/gmt_utils.py +6 -7
  93. pyobo/sources/go.py +20 -13
  94. pyobo/sources/gtdb.py +154 -0
  95. pyobo/sources/gwascentral/__init__.py +9 -0
  96. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  97. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  98. pyobo/sources/hgnc/__init__.py +9 -0
  99. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  100. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  101. pyobo/sources/icd/__init__.py +9 -0
  102. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  103. pyobo/sources/icd/icd11.py +148 -0
  104. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  105. pyobo/sources/interpro.py +4 -9
  106. pyobo/sources/itis.py +0 -5
  107. pyobo/sources/kegg/__init__.py +0 -0
  108. pyobo/sources/kegg/api.py +16 -38
  109. pyobo/sources/kegg/genes.py +9 -20
  110. pyobo/sources/kegg/genome.py +1 -7
  111. pyobo/sources/kegg/pathway.py +9 -21
  112. pyobo/sources/mesh.py +58 -24
  113. pyobo/sources/mgi.py +3 -10
  114. pyobo/sources/mirbase/__init__.py +11 -0
  115. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  116. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  117. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  118. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  119. pyobo/sources/msigdb.py +74 -39
  120. pyobo/sources/ncbi/__init__.py +9 -0
  121. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  122. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  123. pyobo/sources/nih_reporter.py +60 -0
  124. pyobo/sources/nlm/__init__.py +9 -0
  125. pyobo/sources/nlm/nlm_catalog.py +48 -0
  126. pyobo/sources/nlm/nlm_publisher.py +36 -0
  127. pyobo/sources/nlm/utils.py +116 -0
  128. pyobo/sources/npass.py +6 -8
  129. pyobo/sources/omim_ps.py +11 -4
  130. pyobo/sources/pathbank.py +4 -8
  131. pyobo/sources/pfam/__init__.py +9 -0
  132. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  133. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  134. pyobo/sources/pharmgkb/__init__.py +15 -0
  135. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  136. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  137. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  138. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  139. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  140. pyobo/sources/pharmgkb/utils.py +86 -0
  141. pyobo/sources/pid.py +1 -6
  142. pyobo/sources/pombase.py +6 -10
  143. pyobo/sources/pubchem.py +4 -9
  144. pyobo/sources/reactome.py +5 -11
  145. pyobo/sources/rgd.py +11 -16
  146. pyobo/sources/rhea.py +37 -36
  147. pyobo/sources/ror.py +69 -42
  148. pyobo/sources/selventa/__init__.py +0 -0
  149. pyobo/sources/selventa/schem.py +4 -7
  150. pyobo/sources/selventa/scomp.py +1 -6
  151. pyobo/sources/selventa/sdis.py +4 -7
  152. pyobo/sources/selventa/sfam.py +1 -6
  153. pyobo/sources/sgd.py +6 -11
  154. pyobo/sources/signor/__init__.py +7 -0
  155. pyobo/sources/signor/download.py +41 -0
  156. pyobo/sources/signor/signor_complexes.py +105 -0
  157. pyobo/sources/slm.py +12 -15
  158. pyobo/sources/umls/__init__.py +7 -1
  159. pyobo/sources/umls/__main__.py +0 -0
  160. pyobo/sources/umls/get_synonym_types.py +20 -4
  161. pyobo/sources/umls/sty.py +57 -0
  162. pyobo/sources/umls/synonym_types.tsv +1 -1
  163. pyobo/sources/umls/umls.py +18 -22
  164. pyobo/sources/unimod.py +46 -0
  165. pyobo/sources/uniprot/__init__.py +1 -1
  166. pyobo/sources/uniprot/uniprot.py +40 -32
  167. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  168. pyobo/sources/utils.py +3 -2
  169. pyobo/sources/wikipathways.py +7 -10
  170. pyobo/sources/zfin.py +5 -10
  171. pyobo/ssg/__init__.py +12 -16
  172. pyobo/ssg/base.html +0 -0
  173. pyobo/ssg/index.html +26 -13
  174. pyobo/ssg/term.html +12 -2
  175. pyobo/ssg/typedef.html +0 -0
  176. pyobo/struct/__init__.py +54 -8
  177. pyobo/struct/functional/__init__.py +1 -0
  178. pyobo/struct/functional/dsl.py +2572 -0
  179. pyobo/struct/functional/macros.py +423 -0
  180. pyobo/struct/functional/obo_to_functional.py +385 -0
  181. pyobo/struct/functional/ontology.py +272 -0
  182. pyobo/struct/functional/utils.py +112 -0
  183. pyobo/struct/reference.py +331 -136
  184. pyobo/struct/struct.py +1484 -657
  185. pyobo/struct/struct_utils.py +1078 -0
  186. pyobo/struct/typedef.py +162 -210
  187. pyobo/struct/utils.py +12 -5
  188. pyobo/struct/vocabulary.py +138 -0
  189. pyobo/utils/__init__.py +0 -0
  190. pyobo/utils/cache.py +16 -15
  191. pyobo/utils/io.py +51 -41
  192. pyobo/utils/iter.py +5 -5
  193. pyobo/utils/misc.py +41 -53
  194. pyobo/utils/ndex_utils.py +0 -0
  195. pyobo/utils/path.py +73 -70
  196. pyobo/version.py +3 -3
  197. pyobo-0.12.1.dist-info/METADATA +671 -0
  198. pyobo-0.12.1.dist-info/RECORD +201 -0
  199. pyobo-0.12.1.dist-info/WHEEL +4 -0
  200. {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
  201. pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
  202. pyobo/aws.py +0 -162
  203. pyobo/cli/aws.py +0 -47
  204. pyobo/identifier_utils.py +0 -142
  205. pyobo/normalizer.py +0 -232
  206. pyobo/registries/__init__.py +0 -16
  207. pyobo/registries/metaregistry.json +0 -507
  208. pyobo/registries/metaregistry.py +0 -135
  209. pyobo/sources/icd11.py +0 -105
  210. pyobo/xrefdb/__init__.py +0 -1
  211. pyobo/xrefdb/canonicalizer.py +0 -214
  212. pyobo/xrefdb/priority.py +0 -59
  213. pyobo/xrefdb/sources/__init__.py +0 -60
  214. pyobo/xrefdb/sources/biomappings.py +0 -36
  215. pyobo/xrefdb/sources/cbms2019.py +0 -91
  216. pyobo/xrefdb/sources/chembl.py +0 -83
  217. pyobo/xrefdb/sources/compath.py +0 -82
  218. pyobo/xrefdb/sources/famplex.py +0 -64
  219. pyobo/xrefdb/sources/gilda.py +0 -50
  220. pyobo/xrefdb/sources/intact.py +0 -113
  221. pyobo/xrefdb/sources/ncit.py +0 -133
  222. pyobo/xrefdb/sources/pubchem.py +0 -27
  223. pyobo/xrefdb/sources/wikidata.py +0 -116
  224. pyobo/xrefdb/xrefs_pipeline.py +0 -180
  225. pyobo-0.11.2.dist-info/METADATA +0 -711
  226. pyobo-0.11.2.dist-info/RECORD +0 -157
  227. pyobo-0.11.2.dist-info/WHEEL +0 -5
  228. pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/gilda_utils.py CHANGED
@@ -2,271 +2,73 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import logging
6
- from collections.abc import Iterable
7
- from subprocess import CalledProcessError
5
+ import warnings
6
+ from collections.abc import Iterable, Sequence
7
+ from typing import TYPE_CHECKING, Any, cast
8
8
 
9
- import bioregistry
10
- import gilda.api
11
- import gilda.term
12
- from gilda.grounder import Grounder
13
- from gilda.process import normalize
14
- from gilda.term import filter_out_duplicates
15
- from tqdm.auto import tqdm
9
+ import ssslm
10
+ from ssslm import literal_mappings_to_gilda
11
+ from typing_extensions import Unpack
16
12
 
17
- from pyobo import (
18
- get_descendants,
19
- get_id_name_mapping,
20
- get_id_species_mapping,
21
- get_id_synonyms_mapping,
22
- get_ids,
23
- get_obsolete,
13
+ from pyobo.api import (
14
+ get_literal_mappings,
15
+ get_literal_mappings_subset,
24
16
  )
25
- from pyobo.getters import NoBuildError
26
- from pyobo.utils.io import multidict
17
+ from pyobo.constants import GetOntologyKwargs
18
+ from pyobo.struct.reference import Reference
19
+
20
+ if TYPE_CHECKING:
21
+ import gilda
27
22
 
28
23
  __all__ = [
29
- "iter_gilda_prediction_tuples",
30
- "get_grounder",
24
+ "get_gilda_term_subset",
31
25
  "get_gilda_terms",
26
+ "get_grounder",
32
27
  ]
33
28
 
34
- logger = logging.getLogger(__name__)
35
-
36
-
37
- def iter_gilda_prediction_tuples(
38
- prefix: str,
39
- relation: str = "skos:exactMatch",
40
- *,
41
- grounder: Grounder | None = None,
42
- identifiers_are_names: bool = False,
43
- strict: bool = False,
44
- ) -> Iterable[tuple[str, str, str, str, str, str, str, str, float]]:
45
- """Iterate over prediction tuples for a given prefix."""
46
- if grounder is None:
47
- grounder = gilda.api.grounder
48
- id_name_mapping = get_id_name_mapping(prefix, strict=strict)
49
- it = tqdm(
50
- id_name_mapping.items(), desc=f"[{prefix}] gilda tuples", unit_scale=True, unit="name"
51
- )
52
- for identifier, name in it:
53
- for scored_match in grounder.ground(name):
54
- target_prefix = scored_match.term.db.lower()
55
- yield (
56
- prefix,
57
- normalize_identifier(prefix, identifier),
58
- name,
59
- relation,
60
- target_prefix,
61
- normalize_identifier(target_prefix, scored_match.term.id),
62
- scored_match.term.entry_name,
63
- "semapv:LexicalMatching",
64
- round(scored_match.score, 3),
65
- )
66
-
67
- if identifiers_are_names:
68
- it = tqdm(get_ids(prefix), desc=f"[{prefix}] gilda tuples", unit_scale=True, unit="id")
69
- for identifier in it:
70
- for scored_match in grounder.ground(identifier):
71
- target_prefix = scored_match.term.db.lower()
72
- yield (
73
- prefix,
74
- normalize_identifier(prefix, identifier),
75
- identifier,
76
- relation,
77
- target_prefix,
78
- normalize_identifier(target_prefix, scored_match.term.id),
79
- scored_match.term.entry_name,
80
- "semapv:LexicalMatching",
81
- scored_match.score,
82
- )
83
29
 
30
+ def get_grounder(*args: Any, **kwargs: Any) -> gilda.Grounder:
31
+ """Get a grounder."""
32
+ warnings.warn("use pyobo.ner.get_grounder", DeprecationWarning, stacklevel=2)
33
+ import pyobo.ner
84
34
 
85
- def normalize_identifier(prefix: str, identifier: str) -> str:
86
- """Normalize the identifier."""
87
- resource = bioregistry.get_resource(prefix)
88
- if resource is None:
89
- raise KeyError
90
- return resource.miriam_standardize_identifier(identifier) or identifier
35
+ grounder = cast(ssslm.ner.GildaGrounder, pyobo.get_grounder(*args, **kwargs))
36
+ return grounder._grounder
91
37
 
92
38
 
93
- def get_grounder(
94
- prefixes: str | Iterable[str],
95
- *,
96
- unnamed: Iterable[str] | None = None,
97
- grounder_cls: type[Grounder] | None = None,
98
- versions: None | str | Iterable[str | None] | dict[str, str] = None,
99
- strict: bool = True,
100
- skip_obsolete: bool = False,
101
- progress: bool = True,
102
- ) -> Grounder:
103
- """Get a Gilda grounder for the given prefix(es)."""
104
- unnamed = set() if unnamed is None else set(unnamed)
105
- if isinstance(prefixes, str):
106
- prefixes = [prefixes]
107
- else:
108
- prefixes = list(prefixes)
109
- if versions is None:
110
- versions = [None] * len(prefixes)
111
- elif isinstance(versions, str):
112
- versions = [versions]
113
- elif isinstance(versions, dict):
114
- versions = [versions.get(prefix) for prefix in prefixes]
115
- else:
116
- versions = list(versions)
117
- if len(prefixes) != len(versions):
118
- raise ValueError
119
-
120
- terms: list[gilda.term.Term] = []
121
- for prefix, version in zip(tqdm(prefixes, leave=False, disable=not progress), versions):
122
- try:
123
- p_terms = list(
124
- get_gilda_terms(
125
- prefix,
126
- identifiers_are_names=prefix in unnamed,
127
- version=version,
128
- strict=strict,
129
- skip_obsolete=skip_obsolete,
130
- progress=progress,
131
- )
132
- )
133
- except (NoBuildError, CalledProcessError):
134
- continue
135
- else:
136
- terms.extend(p_terms)
137
- terms = filter_out_duplicates(terms)
138
- terms_dict = multidict((term.norm_text, term) for term in terms)
139
- if grounder_cls is None:
140
- return Grounder(terms_dict)
141
- else:
142
- return grounder_cls(terms_dict)
143
-
144
-
145
- def _fast_term(
146
- *,
147
- text: str,
148
- prefix: str,
149
- identifier: str,
150
- name: str,
151
- status: str,
152
- organism: str | None = None,
153
- ) -> gilda.term.Term | None:
154
- try:
155
- term = gilda.term.Term(
156
- norm_text=normalize(text),
157
- text=text,
158
- db=prefix,
159
- id=identifier,
160
- entry_name=name,
161
- status=status,
162
- source=prefix,
163
- organism=organism,
164
- )
165
- except ValueError:
166
- return None
167
- return term
39
+ def get_gilda_terms(prefix: str, *, skip_obsolete: bool = False, **kwargs) -> Iterable[gilda.Term]:
40
+ """Get gilda terms."""
41
+ warnings.warn(
42
+ "use pyobo.get_literal_mappings() directly and convert to gilda yourself",
43
+ DeprecationWarning,
44
+ stacklevel=2,
45
+ )
46
+ yield from literal_mappings_to_gilda(
47
+ get_literal_mappings(prefix, skip_obsolete=skip_obsolete, **kwargs)
48
+ )
168
49
 
169
50
 
170
- def get_gilda_terms(
171
- prefix: str,
51
+ def get_gilda_term_subset(
52
+ source: str,
53
+ ancestors: str | Sequence[str],
172
54
  *,
173
- identifiers_are_names: bool = False,
174
- version: str | None = None,
175
- strict: bool = True,
176
55
  skip_obsolete: bool = False,
177
- progress: bool = True,
178
- ) -> Iterable[gilda.term.Term]:
179
- """Get gilda terms for the given namespace."""
180
- id_to_name = get_id_name_mapping(prefix, version=version, strict=strict)
181
- id_to_species = get_id_species_mapping(prefix, version=version, strict=strict)
182
- obsoletes = get_obsolete(prefix, version=version, strict=strict) if skip_obsolete else set()
183
-
184
- it = tqdm(
185
- id_to_name.items(),
186
- desc=f"[{prefix}] mapping",
187
- unit_scale=True,
188
- unit="name",
189
- disable=not progress,
56
+ **kwargs: Unpack[GetOntologyKwargs],
57
+ ) -> Iterable[gilda.Term]:
58
+ """Get a subset of terms."""
59
+ warnings.warn(
60
+ "use pyobo.get_literal_mappings_subset() directly and convert to gilda yourself",
61
+ DeprecationWarning,
62
+ stacklevel=2,
190
63
  )
191
- for identifier, name in it:
192
- if identifier in obsoletes:
193
- continue
194
- term = _fast_term(
195
- text=name,
196
- prefix=prefix,
197
- identifier=identifier,
198
- name=name,
199
- status="name",
200
- organism=id_to_species.get(identifier),
201
- )
202
- if term is not None:
203
- yield term
204
-
205
- id_to_synonyms = get_id_synonyms_mapping(prefix, version=version)
206
- if id_to_synonyms:
207
- it = tqdm(
208
- id_to_synonyms.items(),
209
- desc=f"[{prefix}] mapping",
210
- unit_scale=True,
211
- unit="synonym",
212
- disable=not progress,
213
- )
214
- for identifier, synonyms in it:
215
- if identifier in obsoletes:
216
- continue
217
- name = id_to_name[identifier]
218
- for synonym in synonyms:
219
- if not synonym:
220
- continue
221
- term = _fast_term(
222
- text=synonym,
223
- prefix=prefix,
224
- identifier=identifier,
225
- name=name,
226
- status="synonym",
227
- organism=id_to_species.get(identifier),
228
- )
229
- if term is not None:
230
- yield term
231
-
232
- if identifiers_are_names:
233
- it = tqdm(
234
- get_ids(prefix),
235
- desc=f"[{prefix}] mapping",
236
- unit_scale=True,
237
- unit="id",
238
- disable=not progress,
64
+ if isinstance(ancestors, str):
65
+ ancestors = [ancestors]
66
+
67
+ yield from literal_mappings_to_gilda(
68
+ get_literal_mappings_subset(
69
+ source,
70
+ ancestors=[Reference.from_curie(a) for a in ancestors],
71
+ skip_obsolete=skip_obsolete,
72
+ **kwargs,
239
73
  )
240
- for identifier in it:
241
- if identifier in obsoletes:
242
- continue
243
- term = _fast_term(
244
- text=identifier,
245
- prefix=prefix,
246
- identifier=identifier,
247
- name=identifier,
248
- status="name",
249
- organism=id_to_species.get(identifier),
250
- )
251
- if term is not None:
252
- yield term
253
-
254
-
255
- def get_gilda_term_subset(
256
- source: str, ancestors: str | list[str], **kwargs
257
- ) -> Iterable[gilda.term.Term]:
258
- """Get a subset of terms."""
259
- subset = {
260
- descendant
261
- for parent_curie in _ensure_list(ancestors)
262
- for descendant in get_descendants(*parent_curie.split(":")) or []
263
- }
264
- for term in get_gilda_terms(source, **kwargs):
265
- if bioregistry.curie_to_str(term.db, term.id) in subset:
266
- yield term
267
-
268
-
269
- def _ensure_list(s: str | list[str]) -> list[str]:
270
- if isinstance(s, str):
271
- return [s]
272
- return s
74
+ )
@@ -0,0 +1,33 @@
1
+ """Extract registry information."""
2
+
3
+ from .api import (
4
+ DefaultCoercionError,
5
+ EmptyStringError,
6
+ NotCURIEError,
7
+ ParseError,
8
+ ParseValidationError,
9
+ UnparsableIRIError,
10
+ UnregisteredPrefixError,
11
+ _is_valid_identifier,
12
+ _parse_str_or_curie_or_uri_helper,
13
+ standardize_ec,
14
+ wrap_norm_prefix,
15
+ )
16
+ from .preprocessing import get_rules
17
+ from .relations import ground_relation
18
+
19
+ __all__ = [
20
+ "DefaultCoercionError",
21
+ "EmptyStringError",
22
+ "NotCURIEError",
23
+ "ParseError",
24
+ "ParseValidationError",
25
+ "UnparsableIRIError",
26
+ "UnregisteredPrefixError",
27
+ "_is_valid_identifier",
28
+ "_parse_str_or_curie_or_uri_helper",
29
+ "get_rules",
30
+ "ground_relation",
31
+ "standardize_ec",
32
+ "wrap_norm_prefix",
33
+ ]
@@ -0,0 +1,305 @@
1
+ """Utilities for handling prefixes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from functools import lru_cache, wraps
7
+ from typing import Annotated, ClassVar
8
+
9
+ import bioregistry
10
+ import click
11
+ from bioregistry import NormalizedNamableReference as Reference
12
+ from bioregistry.constants import FailureReturnType
13
+ from curies import ReferenceTuple
14
+ from curies.preprocessing import BlocklistError, PreprocessingConverter
15
+ from pydantic import ValidationError
16
+ from typing_extensions import Doc
17
+
18
+ from .preprocessing import get_rules
19
+ from .relations import ground_relation
20
+
21
+ __all__ = [
22
+ "DefaultCoercionError",
23
+ "EmptyStringError",
24
+ "NotCURIEError",
25
+ "ParseError",
26
+ "ParseValidationError",
27
+ "UnparsableIRIError",
28
+ "UnregisteredPrefixError",
29
+ "_parse_str_or_curie_or_uri_helper",
30
+ "standardize_ec",
31
+ "wrap_norm_prefix",
32
+ ]
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ Line = Annotated[str | None, Doc("""The OBO line where the parsing happened""")]
38
+
39
+
40
+ class ParseError(BaseException):
41
+ """Raised on a missing prefix."""
42
+
43
+ message: ClassVar[str]
44
+
45
+ def __init__(
46
+ self,
47
+ curie: str,
48
+ *,
49
+ context: str | None,
50
+ ontology_prefix: str | None = None,
51
+ node: Reference | None = None,
52
+ predicate: Reference | None = None,
53
+ line: Line = None,
54
+ ) -> None:
55
+ """Initialize the error."""
56
+ self.curie = curie
57
+ self.context = context
58
+ self.ontology_prefix = ontology_prefix
59
+ self.node = node
60
+ self.predicate = predicate
61
+ self.line = line
62
+
63
+ def __str__(self) -> str:
64
+ s = ""
65
+ if self.node:
66
+ if self.predicate:
67
+ s += f"[{self.node.curie} - {self.predicate.curie}] "
68
+ else:
69
+ s += f"[{self.node.curie}] "
70
+ elif self.ontology_prefix:
71
+ s += f"[{self.ontology_prefix}] "
72
+ s += f"{self.message} {click.style(self.curie, fg='cyan')}"
73
+ if self.context:
74
+ s += f" in {self.context}"
75
+ if self.line and self.line != self.curie:
76
+ s += f" in {click.style(self.line, fg='yellow')}"
77
+ return s
78
+
79
+
80
+ class ParseValidationError(ParseError):
81
+ """Raised on a validation error."""
82
+
83
+ message = "failed Pydantic validation"
84
+
85
+ def __init__(self, *args, exc: ValidationError, **kwargs) -> None:
86
+ """Initialize the error."""
87
+ super().__init__(*args, **kwargs)
88
+ self.exc = exc
89
+
90
+
91
+ class UnregisteredPrefixError(ParseError):
92
+ """Raised on a missing prefix."""
93
+
94
+ message = "unregistered prefix in"
95
+
96
+
97
+ class UnparsableIRIError(ParseError):
98
+ """Raised on a an unparsable IRI."""
99
+
100
+ message = "couldn't parse IRI"
101
+
102
+
103
+ class EmptyStringError(ParseError):
104
+ """Raised on a an empty string."""
105
+
106
+ message = "is empty"
107
+
108
+
109
+ class NotCURIEError(ParseError):
110
+ """Raised on a text that can't be parsed as a CURIE."""
111
+
112
+ message = "not a CURIE"
113
+
114
+
115
+ class DefaultCoercionError(ParseError):
116
+ """Raised on a text that can't be coerced into a default reference."""
117
+
118
+ message = "can't be coerced into a default reference"
119
+
120
+
121
+ def _is_uri(s: str) -> bool:
122
+ return s.startswith("http:") or s.startswith("https:")
123
+
124
+
125
+ def _preclean_uri(s: str) -> str:
126
+ s = s.strip().removeprefix(r"url\:").removeprefix(r"uri\:")
127
+ s = s.strip().removeprefix(r"URL\:").removeprefix(r"URI\:")
128
+ s = s.strip().removeprefix("url:").removeprefix("uri:")
129
+ s = s.removeprefix("URL:").removeprefix("URI:")
130
+ s = s.removeprefix("WWW:").removeprefix("www:").lstrip()
131
+ s = s.replace("http\\:", "http:")
132
+ s = s.replace("https\\:", "https:")
133
+ s = s.rstrip("/")
134
+ return s
135
+
136
+
137
+ @lru_cache(1)
138
+ def _get_converter() -> PreprocessingConverter:
139
+ return PreprocessingConverter(
140
+ converter=bioregistry.manager.converter,
141
+ rules=get_rules(),
142
+ preclean=_preclean_uri,
143
+ )
144
+
145
+
146
+ def _parse_str_or_curie_or_uri_helper(
147
+ str_or_curie_or_uri: str,
148
+ *,
149
+ ontology_prefix: str | None = None,
150
+ node: Reference | None = None,
151
+ predicate: Reference | None = None,
152
+ upgrade: bool = True,
153
+ line: str | None = None,
154
+ name: str | None = None,
155
+ context: str | None = None,
156
+ ) -> Reference | ParseError | BlocklistError:
157
+ """Parse a string that looks like a CURIE.
158
+
159
+ :param str_or_curie_or_uri: A compact uniform resource identifier (CURIE)
160
+ :param ontology_prefix: The ontology in which the CURIE appears
161
+
162
+ :returns: A parse tuple or a tuple of None, None if not able to parse and not strict
163
+
164
+ - Normalizes the namespace
165
+ - Checks against a blacklist for the entire curie, for the namespace, and for
166
+ suffixes.
167
+ """
168
+ str_or_curie_or_uri = _preclean_uri(str_or_curie_or_uri)
169
+ if not str_or_curie_or_uri:
170
+ return EmptyStringError(
171
+ str_or_curie_or_uri,
172
+ ontology_prefix=ontology_prefix,
173
+ node=node,
174
+ predicate=predicate,
175
+ line=line,
176
+ context=context,
177
+ )
178
+
179
+ rules = get_rules()
180
+
181
+ if upgrade:
182
+ # Remap the curie with the full list
183
+ if r1 := rules.remap_full(
184
+ str_or_curie_or_uri, reference_cls=Reference, context=ontology_prefix
185
+ ):
186
+ return r1
187
+
188
+ # Remap node's prefix (if necessary)
189
+ str_or_curie_or_uri = rules.remap_prefix(str_or_curie_or_uri, context=ontology_prefix)
190
+
191
+ if r2 := ground_relation(str_or_curie_or_uri):
192
+ return r2
193
+
194
+ if rules.str_is_blocked(str_or_curie_or_uri, context=ontology_prefix):
195
+ return BlocklistError()
196
+
197
+ if _is_uri(str_or_curie_or_uri):
198
+ rt = bioregistry.parse_iri(
199
+ str_or_curie_or_uri, on_failure_return_type=FailureReturnType.single
200
+ )
201
+ if rt is None:
202
+ return UnparsableIRIError(
203
+ str_or_curie_or_uri,
204
+ ontology_prefix=ontology_prefix,
205
+ node=node,
206
+ predicate=predicate,
207
+ line=line,
208
+ context=context,
209
+ )
210
+ try:
211
+ rv = Reference.model_validate(
212
+ {"prefix": rt.prefix, "identifier": rt.identifier, "name": name}
213
+ )
214
+ except ValidationError as exc:
215
+ return ParseValidationError(
216
+ str_or_curie_or_uri,
217
+ ontology_prefix=ontology_prefix,
218
+ node=node,
219
+ predicate=predicate,
220
+ line=line,
221
+ context=context,
222
+ exc=exc,
223
+ )
224
+ else:
225
+ return rv
226
+
227
+ prefix, delimiter, identifier = str_or_curie_or_uri.partition(":")
228
+ if not delimiter:
229
+ return NotCURIEError(
230
+ str_or_curie_or_uri,
231
+ ontology_prefix=ontology_prefix,
232
+ node=node,
233
+ predicate=predicate,
234
+ line=line,
235
+ context=context,
236
+ )
237
+
238
+ norm_node_prefix = bioregistry.normalize_prefix(prefix)
239
+ if not norm_node_prefix:
240
+ return UnregisteredPrefixError(
241
+ str_or_curie_or_uri,
242
+ ontology_prefix=ontology_prefix,
243
+ node=node,
244
+ predicate=predicate,
245
+ line=line,
246
+ context=context,
247
+ )
248
+
249
+ identifier = bioregistry.standardize_identifier(norm_node_prefix, identifier)
250
+ try:
251
+ rv = Reference.model_validate(
252
+ {"prefix": norm_node_prefix, "identifier": identifier, "name": name}
253
+ )
254
+ except ValidationError as exc:
255
+ return ParseValidationError(
256
+ str_or_curie_or_uri,
257
+ ontology_prefix=ontology_prefix,
258
+ node=node,
259
+ predicate=predicate,
260
+ line=line,
261
+ exc=exc,
262
+ context=context,
263
+ )
264
+ else:
265
+ return rv
266
+
267
+
268
+ def wrap_norm_prefix(f):
269
+ """Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
270
+
271
+ @wraps(f)
272
+ def _wrapped(prefix: str | Reference | ReferenceTuple, *args, **kwargs):
273
+ if isinstance(prefix, str):
274
+ norm_prefix = bioregistry.normalize_prefix(prefix)
275
+ if norm_prefix is None:
276
+ raise ValueError(f"Invalid prefix: {prefix}")
277
+ prefix = norm_prefix
278
+ elif isinstance(prefix, Reference):
279
+ norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
280
+ if norm_prefix is None:
281
+ raise ValueError(f"Invalid prefix: {prefix.prefix}")
282
+ prefix = Reference(prefix=norm_prefix, identifier=prefix.identifier)
283
+ elif isinstance(prefix, ReferenceTuple):
284
+ norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
285
+ if norm_prefix is None:
286
+ raise ValueError(f"Invalid prefix: {prefix.prefix}")
287
+ prefix = ReferenceTuple(norm_prefix, prefix.identifier)
288
+ else:
289
+ raise TypeError
290
+ return f(prefix, *args, **kwargs)
291
+
292
+ return _wrapped
293
+
294
+
295
+ def standardize_ec(ec: str) -> str:
296
+ """Standardize an EC code identifier by removing all trailing dashes and dots."""
297
+ ec = ec.strip().replace(" ", "")
298
+ for _ in range(4):
299
+ ec = ec.rstrip("-").rstrip(".")
300
+ return ec
301
+
302
+
303
+ def _is_valid_identifier(curie_or_uri: str) -> bool:
304
+ # TODO this needs more careful implementation
305
+ return bool(curie_or_uri.strip()) and " " not in curie_or_uri