pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
  203. pyobo/apps/__init__.py +0 -3
  204. pyobo/apps/cli.py +0 -24
  205. pyobo/apps/gilda/__init__.py +0 -3
  206. pyobo/apps/gilda/__main__.py +0 -8
  207. pyobo/apps/gilda/app.py +0 -48
  208. pyobo/apps/gilda/cli.py +0 -36
  209. pyobo/apps/gilda/templates/base.html +0 -33
  210. pyobo/apps/gilda/templates/home.html +0 -11
  211. pyobo/apps/gilda/templates/matches.html +0 -32
  212. pyobo/apps/mapper/__init__.py +0 -3
  213. pyobo/apps/mapper/__main__.py +0 -11
  214. pyobo/apps/mapper/cli.py +0 -37
  215. pyobo/apps/mapper/mapper.py +0 -187
  216. pyobo/apps/mapper/templates/base.html +0 -35
  217. pyobo/apps/mapper/templates/mapper_home.html +0 -64
  218. pyobo/aws.py +0 -162
  219. pyobo/cli/aws.py +0 -47
  220. pyobo/identifier_utils.py +0 -142
  221. pyobo/normalizer.py +0 -232
  222. pyobo/registries/__init__.py +0 -16
  223. pyobo/registries/metaregistry.json +0 -507
  224. pyobo/registries/metaregistry.py +0 -135
  225. pyobo/sources/icd11.py +0 -105
  226. pyobo/xrefdb/__init__.py +0 -1
  227. pyobo/xrefdb/canonicalizer.py +0 -214
  228. pyobo/xrefdb/priority.py +0 -59
  229. pyobo/xrefdb/sources/__init__.py +0 -60
  230. pyobo/xrefdb/sources/biomappings.py +0 -36
  231. pyobo/xrefdb/sources/cbms2019.py +0 -91
  232. pyobo/xrefdb/sources/chembl.py +0 -83
  233. pyobo/xrefdb/sources/compath.py +0 -82
  234. pyobo/xrefdb/sources/famplex.py +0 -64
  235. pyobo/xrefdb/sources/gilda.py +0 -50
  236. pyobo/xrefdb/sources/intact.py +0 -113
  237. pyobo/xrefdb/sources/ncit.py +0 -133
  238. pyobo/xrefdb/sources/pubchem.py +0 -27
  239. pyobo/xrefdb/sources/wikidata.py +0 -116
  240. pyobo-0.11.1.dist-info/RECORD +0 -173
  241. pyobo-0.11.1.dist-info/WHEEL +0 -5
  242. pyobo-0.11.1.dist-info/top_level.txt +0 -1
pyobo/gilda_utils.py CHANGED
@@ -3,63 +3,67 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import logging
6
- from collections.abc import Iterable
7
- from subprocess import CalledProcessError
6
+ import warnings
7
+ from collections.abc import Iterable, Sequence
8
+ from typing import TYPE_CHECKING, Any, cast
8
9
 
9
10
  import bioregistry
10
- import gilda.api
11
- import gilda.term
12
- from gilda.grounder import Grounder
13
- from gilda.process import normalize
14
- from gilda.term import filter_out_duplicates
11
+ import ssslm
12
+ from ssslm import GildaGrounder, literal_mappings_to_gilda
15
13
  from tqdm.auto import tqdm
14
+ from typing_extensions import Unpack
16
15
 
17
- from pyobo import (
18
- get_descendants,
16
+ from pyobo.api import (
19
17
  get_id_name_mapping,
20
- get_id_species_mapping,
21
- get_id_synonyms_mapping,
22
18
  get_ids,
23
- get_obsolete,
19
+ get_literal_mappings,
20
+ get_literal_mappings_subset,
24
21
  )
25
- from pyobo.getters import NoBuildError
26
- from pyobo.utils.io import multidict
22
+ from pyobo.constants import GetOntologyKwargs
23
+ from pyobo.struct.reference import Reference
24
+
25
+ if TYPE_CHECKING:
26
+ import gilda
27
27
 
28
28
  __all__ = [
29
- "iter_gilda_prediction_tuples",
30
29
  "get_grounder",
31
- "get_gilda_terms",
30
+ "iter_gilda_prediction_tuples",
32
31
  ]
33
32
 
34
33
  logger = logging.getLogger(__name__)
35
34
 
36
35
 
36
+ # TODO the only place this is used is in Biomappings -
37
+ # might be better to directly move it there
37
38
  def iter_gilda_prediction_tuples(
38
39
  prefix: str,
39
40
  relation: str = "skos:exactMatch",
40
41
  *,
41
- grounder: Grounder | None = None,
42
+ grounder: gilda.Grounder | None = None,
42
43
  identifiers_are_names: bool = False,
43
44
  strict: bool = False,
44
45
  ) -> Iterable[tuple[str, str, str, str, str, str, str, str, float]]:
45
46
  """Iterate over prediction tuples for a given prefix."""
46
47
  if grounder is None:
48
+ import gilda.api
49
+
47
50
  grounder = gilda.api.grounder
51
+ grounder_ = GildaGrounder(grounder)
48
52
  id_name_mapping = get_id_name_mapping(prefix, strict=strict)
49
53
  it = tqdm(
50
54
  id_name_mapping.items(), desc=f"[{prefix}] gilda tuples", unit_scale=True, unit="name"
51
55
  )
52
56
  for identifier, name in it:
53
- for scored_match in grounder.ground(name):
54
- target_prefix = scored_match.term.db.lower()
57
+ norm_identifier = _normalize_identifier(prefix, identifier)
58
+ for scored_match in grounder_.get_matches(name):
55
59
  yield (
56
60
  prefix,
57
- normalize_identifier(prefix, identifier),
61
+ norm_identifier,
58
62
  name,
59
63
  relation,
60
- target_prefix,
61
- normalize_identifier(target_prefix, scored_match.term.id),
62
- scored_match.term.entry_name,
64
+ scored_match.prefix,
65
+ _normalize_identifier(scored_match.prefix, scored_match.identifier),
66
+ name,
63
67
  "semapv:LexicalMatching",
64
68
  round(scored_match.score, 3),
65
69
  )
@@ -67,22 +71,22 @@ def iter_gilda_prediction_tuples(
67
71
  if identifiers_are_names:
68
72
  it = tqdm(get_ids(prefix), desc=f"[{prefix}] gilda tuples", unit_scale=True, unit="id")
69
73
  for identifier in it:
70
- for scored_match in grounder.ground(identifier):
71
- target_prefix = scored_match.term.db.lower()
74
+ norm_identifier = _normalize_identifier(prefix, identifier)
75
+ for scored_match in grounder_.get_matches(identifier):
72
76
  yield (
73
77
  prefix,
74
- normalize_identifier(prefix, identifier),
78
+ norm_identifier,
75
79
  identifier,
76
80
  relation,
77
- target_prefix,
78
- normalize_identifier(target_prefix, scored_match.term.id),
79
- scored_match.term.entry_name,
81
+ scored_match.prefix,
82
+ _normalize_identifier(scored_match.prefix, scored_match.identifier),
83
+ identifier,
80
84
  "semapv:LexicalMatching",
81
85
  scored_match.score,
82
86
  )
83
87
 
84
88
 
85
- def normalize_identifier(prefix: str, identifier: str) -> str:
89
+ def _normalize_identifier(prefix: str, identifier: str) -> str:
86
90
  """Normalize the identifier."""
87
91
  resource = bioregistry.get_resource(prefix)
88
92
  if resource is None:
@@ -90,183 +94,58 @@ def normalize_identifier(prefix: str, identifier: str) -> str:
90
94
  return resource.miriam_standardize_identifier(identifier) or identifier
91
95
 
92
96
 
93
- def get_grounder(
94
- prefixes: str | Iterable[str],
95
- *,
96
- unnamed: Iterable[str] | None = None,
97
- grounder_cls: type[Grounder] | None = None,
98
- versions: None | str | Iterable[str | None] | dict[str, str] = None,
99
- strict: bool = True,
100
- skip_obsolete: bool = False,
101
- progress: bool = True,
102
- ) -> Grounder:
103
- """Get a Gilda grounder for the given prefix(es)."""
104
- unnamed = set() if unnamed is None else set(unnamed)
105
- if isinstance(prefixes, str):
106
- prefixes = [prefixes]
107
- else:
108
- prefixes = list(prefixes)
109
- if versions is None:
110
- versions = [None] * len(prefixes)
111
- elif isinstance(versions, str):
112
- versions = [versions]
113
- elif isinstance(versions, dict):
114
- versions = [versions.get(prefix) for prefix in prefixes]
115
- else:
116
- versions = list(versions)
117
- if len(prefixes) != len(versions):
118
- raise ValueError
119
-
120
- terms: list[gilda.term.Term] = []
121
- for prefix, version in zip(tqdm(prefixes, leave=False, disable=not progress), versions):
122
- try:
123
- p_terms = list(
124
- get_gilda_terms(
125
- prefix,
126
- identifiers_are_names=prefix in unnamed,
127
- version=version,
128
- strict=strict,
129
- skip_obsolete=skip_obsolete,
130
- progress=progress,
131
- )
132
- )
133
- except (NoBuildError, CalledProcessError):
134
- continue
135
- else:
136
- terms.extend(p_terms)
137
- terms = filter_out_duplicates(terms)
138
- terms_dict = multidict((term.norm_text, term) for term in terms)
139
- if grounder_cls is None:
140
- return Grounder(terms_dict)
141
- else:
142
- return grounder_cls(terms_dict)
97
+ def normalize_identifier(prefix: str, identifier: str) -> str:
98
+ """Normalize the identifier."""
99
+ warnings.warn(
100
+ "normalization to MIRIAM is deprecated, please update to using Bioregistry standard identifiers",
101
+ DeprecationWarning,
102
+ stacklevel=2,
103
+ )
104
+ return _normalize_identifier(prefix, identifier)
143
105
 
144
106
 
145
- def _fast_term(
146
- *,
147
- text: str,
148
- prefix: str,
149
- identifier: str,
150
- name: str,
151
- status: str,
152
- organism: str | None = None,
153
- ) -> gilda.term.Term | None:
154
- try:
155
- term = gilda.term.Term(
156
- norm_text=normalize(text),
157
- text=text,
158
- db=prefix,
159
- id=identifier,
160
- entry_name=name,
161
- status=status,
162
- source=prefix,
163
- organism=organism,
164
- )
165
- except ValueError:
166
- return None
167
- return term
107
+ def get_grounder(*args: Any, **kwargs: Any) -> gilda.Grounder:
108
+ """Get a grounder."""
109
+ warnings.warn("use pyobo.ner.get_grounder", DeprecationWarning, stacklevel=2)
110
+ import pyobo.ner
168
111
 
112
+ grounder = cast(ssslm.ner.GildaGrounder, pyobo.get_grounder(*args, **kwargs))
113
+ return grounder._grounder
169
114
 
170
- def get_gilda_terms(
171
- prefix: str,
172
- *,
173
- identifiers_are_names: bool = False,
174
- version: str | None = None,
175
- strict: bool = True,
176
- skip_obsolete: bool = False,
177
- progress: bool = True,
178
- ) -> Iterable[gilda.term.Term]:
179
- """Get gilda terms for the given namespace."""
180
- id_to_name = get_id_name_mapping(prefix, version=version, strict=strict)
181
- id_to_species = get_id_species_mapping(prefix, version=version, strict=strict)
182
- obsoletes = get_obsolete(prefix, version=version, strict=strict) if skip_obsolete else set()
183
115
 
184
- it = tqdm(
185
- id_to_name.items(),
186
- desc=f"[{prefix}] mapping",
187
- unit_scale=True,
188
- unit="name",
189
- disable=not progress,
116
+ def get_gilda_terms(prefix: str, *, skip_obsolete: bool = False, **kwargs) -> Iterable[gilda.Term]:
117
+ """Get gilda terms."""
118
+ warnings.warn(
119
+ "use pyobo.get_literal_mappings() directly and convert to gilda yourself",
120
+ DeprecationWarning,
121
+ stacklevel=2,
122
+ )
123
+ yield from literal_mappings_to_gilda(
124
+ get_literal_mappings(prefix, skip_obsolete=skip_obsolete, **kwargs)
190
125
  )
191
- for identifier, name in it:
192
- if identifier in obsoletes:
193
- continue
194
- term = _fast_term(
195
- text=name,
196
- prefix=prefix,
197
- identifier=identifier,
198
- name=name,
199
- status="name",
200
- organism=id_to_species.get(identifier),
201
- )
202
- if term is not None:
203
- yield term
204
-
205
- id_to_synonyms = get_id_synonyms_mapping(prefix, version=version)
206
- if id_to_synonyms:
207
- it = tqdm(
208
- id_to_synonyms.items(),
209
- desc=f"[{prefix}] mapping",
210
- unit_scale=True,
211
- unit="synonym",
212
- disable=not progress,
213
- )
214
- for identifier, synonyms in it:
215
- if identifier in obsoletes:
216
- continue
217
- name = id_to_name[identifier]
218
- for synonym in synonyms:
219
- if not synonym:
220
- continue
221
- term = _fast_term(
222
- text=synonym,
223
- prefix=prefix,
224
- identifier=identifier,
225
- name=name,
226
- status="synonym",
227
- organism=id_to_species.get(identifier),
228
- )
229
- if term is not None:
230
- yield term
231
-
232
- if identifiers_are_names:
233
- it = tqdm(
234
- get_ids(prefix),
235
- desc=f"[{prefix}] mapping",
236
- unit_scale=True,
237
- unit="id",
238
- disable=not progress,
239
- )
240
- for identifier in it:
241
- if identifier in obsoletes:
242
- continue
243
- term = _fast_term(
244
- text=identifier,
245
- prefix=prefix,
246
- identifier=identifier,
247
- name=identifier,
248
- status="name",
249
- organism=id_to_species.get(identifier),
250
- )
251
- if term is not None:
252
- yield term
253
126
 
254
127
 
255
128
  def get_gilda_term_subset(
256
- source: str, ancestors: str | list[str], **kwargs
257
- ) -> Iterable[gilda.term.Term]:
129
+ source: str,
130
+ ancestors: str | Sequence[str],
131
+ *,
132
+ skip_obsolete: bool = False,
133
+ **kwargs: Unpack[GetOntologyKwargs],
134
+ ) -> Iterable[gilda.Term]:
258
135
  """Get a subset of terms."""
259
- subset = {
260
- descendant
261
- for parent_curie in _ensure_list(ancestors)
262
- for descendant in get_descendants(*parent_curie.split(":")) or []
263
- }
264
- for term in get_gilda_terms(source, **kwargs):
265
- if bioregistry.curie_to_str(term.db, term.id) in subset:
266
- yield term
267
-
268
-
269
- def _ensure_list(s: str | list[str]) -> list[str]:
270
- if isinstance(s, str):
271
- return [s]
272
- return s
136
+ warnings.warn(
137
+ "use pyobo.get_literal_mappings_subset() directly and convert to gilda yourself",
138
+ DeprecationWarning,
139
+ stacklevel=2,
140
+ )
141
+ if isinstance(ancestors, str):
142
+ ancestors = [ancestors]
143
+
144
+ yield from literal_mappings_to_gilda(
145
+ get_literal_mappings_subset(
146
+ source,
147
+ ancestors=[Reference.from_curie(a) for a in ancestors],
148
+ skip_obsolete=skip_obsolete,
149
+ **kwargs,
150
+ )
151
+ )
@@ -0,0 +1,41 @@
1
+ """Extract registry information."""
2
+
3
+ from .api import (
4
+ BlacklistedError,
5
+ DefaultCoercionError,
6
+ EmptyStringError,
7
+ NotCURIEError,
8
+ ParseError,
9
+ ParseValidationError,
10
+ UnparsableIRIError,
11
+ UnregisteredPrefixError,
12
+ _is_valid_identifier,
13
+ _parse_str_or_curie_or_uri_helper,
14
+ standardize_ec,
15
+ wrap_norm_prefix,
16
+ )
17
+ from .preprocessing import (
18
+ remap_full,
19
+ remap_prefix,
20
+ str_is_blacklisted,
21
+ )
22
+ from .relations import ground_relation
23
+
24
+ __all__ = [
25
+ "BlacklistedError",
26
+ "DefaultCoercionError",
27
+ "EmptyStringError",
28
+ "NotCURIEError",
29
+ "ParseError",
30
+ "ParseValidationError",
31
+ "UnparsableIRIError",
32
+ "UnregisteredPrefixError",
33
+ "_is_valid_identifier",
34
+ "_parse_str_or_curie_or_uri_helper",
35
+ "ground_relation",
36
+ "remap_full",
37
+ "remap_prefix",
38
+ "standardize_ec",
39
+ "str_is_blacklisted",
40
+ "wrap_norm_prefix",
41
+ ]
@@ -0,0 +1,296 @@
1
+ """Utilities for handling prefixes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from functools import wraps
7
+ from typing import Annotated, ClassVar
8
+
9
+ import bioregistry
10
+ import click
11
+ from bioregistry import NormalizedNamableReference as Reference
12
+ from bioregistry.constants import FailureReturnType
13
+ from curies import ReferenceTuple
14
+ from pydantic import ValidationError
15
+ from typing_extensions import Doc
16
+
17
+ from .preprocessing import remap_full, remap_prefix, str_is_blacklisted
18
+ from .relations import ground_relation
19
+
20
+ __all__ = [
21
+ "BlacklistedError",
22
+ "DefaultCoercionError",
23
+ "EmptyStringError",
24
+ "NotCURIEError",
25
+ "ParseError",
26
+ "ParseValidationError",
27
+ "UnparsableIRIError",
28
+ "UnregisteredPrefixError",
29
+ "_parse_str_or_curie_or_uri_helper",
30
+ "standardize_ec",
31
+ "wrap_norm_prefix",
32
+ ]
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class BlacklistedError(ValueError):
38
+ """A sentinel for blacklisted strings."""
39
+
40
+
41
+ Line = Annotated[str | None, Doc("""The OBO line where the parsing happened""")]
42
+
43
+
44
+ class ParseError(BaseException):
45
+ """Raised on a missing prefix."""
46
+
47
+ message: ClassVar[str]
48
+
49
+ def __init__(
50
+ self,
51
+ curie: str,
52
+ *,
53
+ context: str | None,
54
+ ontology_prefix: str | None = None,
55
+ node: Reference | None = None,
56
+ predicate: Reference | None = None,
57
+ line: Line = None,
58
+ ) -> None:
59
+ """Initialize the error."""
60
+ self.curie = curie
61
+ self.context = context
62
+ self.ontology_prefix = ontology_prefix
63
+ self.node = node
64
+ self.predicate = predicate
65
+ self.line = line
66
+
67
+ def __str__(self) -> str:
68
+ s = ""
69
+ if self.node:
70
+ if self.predicate:
71
+ s += f"[{self.node.curie} - {self.predicate.curie}] "
72
+ else:
73
+ s += f"[{self.node.curie}] "
74
+ elif self.ontology_prefix:
75
+ s += f"[{self.ontology_prefix}] "
76
+ s += f"{self.message} {click.style(self.curie, fg='cyan')}"
77
+ if self.context:
78
+ s += f" in {self.context}"
79
+ if self.line and self.line != self.curie:
80
+ s += f" in {click.style(self.line, fg='yellow')}"
81
+ return s
82
+
83
+
84
+ class ParseValidationError(ParseError):
85
+ """Raised on a validation error."""
86
+
87
+ message = "failed Pydantic validation"
88
+
89
+ def __init__(self, *args, exc: ValidationError, **kwargs) -> None:
90
+ """Initialize the error."""
91
+ super().__init__(*args, **kwargs)
92
+ self.exc = exc
93
+
94
+
95
+ class UnregisteredPrefixError(ParseError):
96
+ """Raised on a missing prefix."""
97
+
98
+ message = "unregistered prefix in"
99
+
100
+
101
+ class UnparsableIRIError(ParseError):
102
+ """Raised on a an unparsable IRI."""
103
+
104
+ message = "couldn't parse IRI"
105
+
106
+
107
+ class EmptyStringError(ParseError):
108
+ """Raised on a an empty string."""
109
+
110
+ message = "is empty"
111
+
112
+
113
+ class NotCURIEError(ParseError):
114
+ """Raised on a text that can't be parsed as a CURIE."""
115
+
116
+ message = "not a CURIE"
117
+
118
+
119
+ class DefaultCoercionError(ParseError):
120
+ """Raised on a text that can't be coerced into a default reference."""
121
+
122
+ message = "can't be coerced into a default reference"
123
+
124
+
125
+ def _is_uri(s: str) -> bool:
126
+ return s.startswith("http:") or s.startswith("https:")
127
+
128
+
129
+ def _preclean_uri(s: str) -> str:
130
+ s = s.strip().removeprefix(r"url\:").removeprefix(r"uri\:")
131
+ s = s.strip().removeprefix(r"URL\:").removeprefix(r"URI\:")
132
+ s = s.strip().removeprefix("url:").removeprefix("uri:")
133
+ s = s.removeprefix("URL:").removeprefix("URI:")
134
+ s = s.removeprefix("WWW:").removeprefix("www:").lstrip()
135
+ s = s.replace("http\\:", "http:")
136
+ s = s.replace("https\\:", "https:")
137
+ s = s.rstrip("/")
138
+ return s
139
+
140
+
141
+ def _parse_str_or_curie_or_uri_helper(
142
+ str_or_curie_or_uri: str,
143
+ *,
144
+ ontology_prefix: str | None = None,
145
+ node: Reference | None = None,
146
+ predicate: Reference | None = None,
147
+ upgrade: bool = True,
148
+ line: str | None = None,
149
+ name: str | None = None,
150
+ context: str | None = None,
151
+ ) -> Reference | ParseError | BlacklistedError:
152
+ """Parse a string that looks like a CURIE.
153
+
154
+ :param str_or_curie_or_uri: A compact uniform resource identifier (CURIE)
155
+ :param ontology_prefix: The ontology in which the CURIE appears
156
+
157
+ :returns: A parse tuple or a tuple of None, None if not able to parse and not strict
158
+
159
+ - Normalizes the namespace
160
+ - Checks against a blacklist for the entire curie, for the namespace, and for
161
+ suffixes.
162
+ """
163
+ str_or_curie_or_uri = _preclean_uri(str_or_curie_or_uri)
164
+ if not str_or_curie_or_uri:
165
+ return EmptyStringError(
166
+ str_or_curie_or_uri,
167
+ ontology_prefix=ontology_prefix,
168
+ node=node,
169
+ predicate=predicate,
170
+ line=line,
171
+ context=context,
172
+ )
173
+
174
+ if upgrade:
175
+ # Remap the curie with the full list
176
+ if r1 := remap_full(str_or_curie_or_uri, ontology_prefix=ontology_prefix):
177
+ return r1
178
+
179
+ # Remap node's prefix (if necessary)
180
+ str_or_curie_or_uri = remap_prefix(str_or_curie_or_uri, ontology_prefix=ontology_prefix)
181
+
182
+ if r2 := ground_relation(str_or_curie_or_uri):
183
+ return r2
184
+
185
+ if str_is_blacklisted(str_or_curie_or_uri, ontology_prefix=ontology_prefix):
186
+ return BlacklistedError()
187
+
188
+ if _is_uri(str_or_curie_or_uri):
189
+ rt = bioregistry.parse_iri(
190
+ str_or_curie_or_uri, on_failure_return_type=FailureReturnType.single
191
+ )
192
+ if rt is None:
193
+ return UnparsableIRIError(
194
+ str_or_curie_or_uri,
195
+ ontology_prefix=ontology_prefix,
196
+ node=node,
197
+ predicate=predicate,
198
+ line=line,
199
+ context=context,
200
+ )
201
+ try:
202
+ rv = Reference.model_validate(
203
+ {"prefix": rt.prefix, "identifier": rt.identifier, "name": name}
204
+ )
205
+ except ValidationError as exc:
206
+ return ParseValidationError(
207
+ str_or_curie_or_uri,
208
+ ontology_prefix=ontology_prefix,
209
+ node=node,
210
+ predicate=predicate,
211
+ line=line,
212
+ context=context,
213
+ exc=exc,
214
+ )
215
+ else:
216
+ return rv
217
+
218
+ prefix, delimiter, identifier = str_or_curie_or_uri.partition(":")
219
+ if not delimiter:
220
+ return NotCURIEError(
221
+ str_or_curie_or_uri,
222
+ ontology_prefix=ontology_prefix,
223
+ node=node,
224
+ predicate=predicate,
225
+ line=line,
226
+ context=context,
227
+ )
228
+
229
+ norm_node_prefix = bioregistry.normalize_prefix(prefix)
230
+ if not norm_node_prefix:
231
+ return UnregisteredPrefixError(
232
+ str_or_curie_or_uri,
233
+ ontology_prefix=ontology_prefix,
234
+ node=node,
235
+ predicate=predicate,
236
+ line=line,
237
+ context=context,
238
+ )
239
+
240
+ identifier = bioregistry.standardize_identifier(norm_node_prefix, identifier)
241
+ try:
242
+ rv = Reference.model_validate(
243
+ {"prefix": norm_node_prefix, "identifier": identifier, "name": name}
244
+ )
245
+ except ValidationError as exc:
246
+ return ParseValidationError(
247
+ str_or_curie_or_uri,
248
+ ontology_prefix=ontology_prefix,
249
+ node=node,
250
+ predicate=predicate,
251
+ line=line,
252
+ exc=exc,
253
+ context=context,
254
+ )
255
+ else:
256
+ return rv
257
+
258
+
259
+ def wrap_norm_prefix(f):
260
+ """Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
261
+
262
+ @wraps(f)
263
+ def _wrapped(prefix: str | Reference | ReferenceTuple, *args, **kwargs):
264
+ if isinstance(prefix, str):
265
+ norm_prefix = bioregistry.normalize_prefix(prefix)
266
+ if norm_prefix is None:
267
+ raise ValueError(f"Invalid prefix: {prefix}")
268
+ prefix = norm_prefix
269
+ elif isinstance(prefix, Reference):
270
+ norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
271
+ if norm_prefix is None:
272
+ raise ValueError(f"Invalid prefix: {prefix.prefix}")
273
+ prefix = Reference(prefix=norm_prefix, identifier=prefix.identifier)
274
+ elif isinstance(prefix, ReferenceTuple):
275
+ norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
276
+ if norm_prefix is None:
277
+ raise ValueError(f"Invalid prefix: {prefix.prefix}")
278
+ prefix = ReferenceTuple(norm_prefix, prefix.identifier)
279
+ else:
280
+ raise TypeError
281
+ return f(prefix, *args, **kwargs)
282
+
283
+ return _wrapped
284
+
285
+
286
+ def standardize_ec(ec: str) -> str:
287
+ """Standardize an EC code identifier by removing all trailing dashes and dots."""
288
+ ec = ec.strip().replace(" ", "")
289
+ for _ in range(4):
290
+ ec = ec.rstrip("-").rstrip(".")
291
+ return ec
292
+
293
+
294
+ def _is_valid_identifier(curie_or_uri: str) -> bool:
295
+ # TODO this needs more careful implementation
296
+ return bool(curie_or_uri.strip()) and " " not in curie_or_uri