pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -117
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +107 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +210 -160
  20. pyobo/cli/database_utils.py +155 -0
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +209 -191
  25. pyobo/gilda_utils.py +52 -250
  26. pyobo/identifier_utils/__init__.py +33 -0
  27. pyobo/identifier_utils/api.py +305 -0
  28. pyobo/identifier_utils/preprocessing.json +873 -0
  29. pyobo/identifier_utils/preprocessing.py +27 -0
  30. pyobo/identifier_utils/relations/__init__.py +8 -0
  31. pyobo/identifier_utils/relations/api.py +162 -0
  32. pyobo/identifier_utils/relations/data.json +5824 -0
  33. pyobo/identifier_utils/relations/data_owl.json +57 -0
  34. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  35. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  36. pyobo/mocks.py +9 -6
  37. pyobo/ner/__init__.py +9 -0
  38. pyobo/ner/api.py +72 -0
  39. pyobo/ner/normalizer.py +33 -0
  40. pyobo/obographs.py +48 -40
  41. pyobo/plugins.py +5 -4
  42. pyobo/py.typed +0 -0
  43. pyobo/reader.py +1354 -395
  44. pyobo/reader_utils.py +155 -0
  45. pyobo/resource_utils.py +42 -22
  46. pyobo/resources/__init__.py +0 -0
  47. pyobo/resources/goc.py +75 -0
  48. pyobo/resources/goc.tsv +188 -0
  49. pyobo/resources/ncbitaxon.py +4 -5
  50. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  51. pyobo/resources/ro.py +3 -2
  52. pyobo/resources/ro.tsv +0 -0
  53. pyobo/resources/so.py +0 -0
  54. pyobo/resources/so.tsv +0 -0
  55. pyobo/sources/README.md +12 -8
  56. pyobo/sources/__init__.py +52 -29
  57. pyobo/sources/agrovoc.py +0 -0
  58. pyobo/sources/antibodyregistry.py +11 -12
  59. pyobo/sources/bigg/__init__.py +13 -0
  60. pyobo/sources/bigg/bigg_compartment.py +81 -0
  61. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  62. pyobo/sources/bigg/bigg_model.py +46 -0
  63. pyobo/sources/bigg/bigg_reaction.py +77 -0
  64. pyobo/sources/biogrid.py +1 -2
  65. pyobo/sources/ccle.py +7 -12
  66. pyobo/sources/cgnc.py +9 -6
  67. pyobo/sources/chebi.py +1 -1
  68. pyobo/sources/chembl/__init__.py +9 -0
  69. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  70. pyobo/sources/chembl/chembl_target.py +160 -0
  71. pyobo/sources/civic_gene.py +55 -15
  72. pyobo/sources/clinicaltrials.py +160 -0
  73. pyobo/sources/complexportal.py +24 -24
  74. pyobo/sources/conso.py +14 -22
  75. pyobo/sources/cpt.py +0 -0
  76. pyobo/sources/credit.py +1 -9
  77. pyobo/sources/cvx.py +27 -5
  78. pyobo/sources/depmap.py +9 -12
  79. pyobo/sources/dictybase_gene.py +2 -7
  80. pyobo/sources/drugbank/__init__.py +9 -0
  81. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  82. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  83. pyobo/sources/drugcentral.py +17 -13
  84. pyobo/sources/expasy.py +31 -34
  85. pyobo/sources/famplex.py +13 -18
  86. pyobo/sources/flybase.py +8 -13
  87. pyobo/sources/gard.py +62 -0
  88. pyobo/sources/geonames/__init__.py +9 -0
  89. pyobo/sources/geonames/features.py +28 -0
  90. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  91. pyobo/sources/geonames/utils.py +115 -0
  92. pyobo/sources/gmt_utils.py +6 -7
  93. pyobo/sources/go.py +20 -13
  94. pyobo/sources/gtdb.py +154 -0
  95. pyobo/sources/gwascentral/__init__.py +9 -0
  96. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  97. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  98. pyobo/sources/hgnc/__init__.py +9 -0
  99. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  100. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  101. pyobo/sources/icd/__init__.py +9 -0
  102. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  103. pyobo/sources/icd/icd11.py +148 -0
  104. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  105. pyobo/sources/interpro.py +4 -9
  106. pyobo/sources/itis.py +0 -5
  107. pyobo/sources/kegg/__init__.py +0 -0
  108. pyobo/sources/kegg/api.py +16 -38
  109. pyobo/sources/kegg/genes.py +9 -20
  110. pyobo/sources/kegg/genome.py +1 -7
  111. pyobo/sources/kegg/pathway.py +9 -21
  112. pyobo/sources/mesh.py +58 -24
  113. pyobo/sources/mgi.py +3 -10
  114. pyobo/sources/mirbase/__init__.py +11 -0
  115. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  116. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  117. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  118. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  119. pyobo/sources/msigdb.py +74 -39
  120. pyobo/sources/ncbi/__init__.py +9 -0
  121. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  122. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  123. pyobo/sources/nih_reporter.py +60 -0
  124. pyobo/sources/nlm/__init__.py +9 -0
  125. pyobo/sources/nlm/nlm_catalog.py +48 -0
  126. pyobo/sources/nlm/nlm_publisher.py +36 -0
  127. pyobo/sources/nlm/utils.py +116 -0
  128. pyobo/sources/npass.py +6 -8
  129. pyobo/sources/omim_ps.py +11 -4
  130. pyobo/sources/pathbank.py +4 -8
  131. pyobo/sources/pfam/__init__.py +9 -0
  132. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  133. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  134. pyobo/sources/pharmgkb/__init__.py +15 -0
  135. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  136. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  137. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  138. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  139. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  140. pyobo/sources/pharmgkb/utils.py +86 -0
  141. pyobo/sources/pid.py +1 -6
  142. pyobo/sources/pombase.py +6 -10
  143. pyobo/sources/pubchem.py +4 -9
  144. pyobo/sources/reactome.py +5 -11
  145. pyobo/sources/rgd.py +11 -16
  146. pyobo/sources/rhea.py +37 -36
  147. pyobo/sources/ror.py +69 -42
  148. pyobo/sources/selventa/__init__.py +0 -0
  149. pyobo/sources/selventa/schem.py +4 -7
  150. pyobo/sources/selventa/scomp.py +1 -6
  151. pyobo/sources/selventa/sdis.py +4 -7
  152. pyobo/sources/selventa/sfam.py +1 -6
  153. pyobo/sources/sgd.py +6 -11
  154. pyobo/sources/signor/__init__.py +7 -0
  155. pyobo/sources/signor/download.py +41 -0
  156. pyobo/sources/signor/signor_complexes.py +105 -0
  157. pyobo/sources/slm.py +12 -15
  158. pyobo/sources/umls/__init__.py +7 -1
  159. pyobo/sources/umls/__main__.py +0 -0
  160. pyobo/sources/umls/get_synonym_types.py +20 -4
  161. pyobo/sources/umls/sty.py +57 -0
  162. pyobo/sources/umls/synonym_types.tsv +1 -1
  163. pyobo/sources/umls/umls.py +18 -22
  164. pyobo/sources/unimod.py +46 -0
  165. pyobo/sources/uniprot/__init__.py +1 -1
  166. pyobo/sources/uniprot/uniprot.py +40 -32
  167. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  168. pyobo/sources/utils.py +3 -2
  169. pyobo/sources/wikipathways.py +7 -10
  170. pyobo/sources/zfin.py +5 -10
  171. pyobo/ssg/__init__.py +12 -16
  172. pyobo/ssg/base.html +0 -0
  173. pyobo/ssg/index.html +26 -13
  174. pyobo/ssg/term.html +12 -2
  175. pyobo/ssg/typedef.html +0 -0
  176. pyobo/struct/__init__.py +54 -8
  177. pyobo/struct/functional/__init__.py +1 -0
  178. pyobo/struct/functional/dsl.py +2572 -0
  179. pyobo/struct/functional/macros.py +423 -0
  180. pyobo/struct/functional/obo_to_functional.py +385 -0
  181. pyobo/struct/functional/ontology.py +272 -0
  182. pyobo/struct/functional/utils.py +112 -0
  183. pyobo/struct/reference.py +331 -136
  184. pyobo/struct/struct.py +1484 -657
  185. pyobo/struct/struct_utils.py +1078 -0
  186. pyobo/struct/typedef.py +162 -210
  187. pyobo/struct/utils.py +12 -5
  188. pyobo/struct/vocabulary.py +138 -0
  189. pyobo/utils/__init__.py +0 -0
  190. pyobo/utils/cache.py +16 -15
  191. pyobo/utils/io.py +51 -41
  192. pyobo/utils/iter.py +5 -5
  193. pyobo/utils/misc.py +41 -53
  194. pyobo/utils/ndex_utils.py +0 -0
  195. pyobo/utils/path.py +73 -70
  196. pyobo/version.py +3 -3
  197. pyobo-0.12.1.dist-info/METADATA +671 -0
  198. pyobo-0.12.1.dist-info/RECORD +201 -0
  199. pyobo-0.12.1.dist-info/WHEEL +4 -0
  200. {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
  201. pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
  202. pyobo/aws.py +0 -162
  203. pyobo/cli/aws.py +0 -47
  204. pyobo/identifier_utils.py +0 -142
  205. pyobo/normalizer.py +0 -232
  206. pyobo/registries/__init__.py +0 -16
  207. pyobo/registries/metaregistry.json +0 -507
  208. pyobo/registries/metaregistry.py +0 -135
  209. pyobo/sources/icd11.py +0 -105
  210. pyobo/xrefdb/__init__.py +0 -1
  211. pyobo/xrefdb/canonicalizer.py +0 -214
  212. pyobo/xrefdb/priority.py +0 -59
  213. pyobo/xrefdb/sources/__init__.py +0 -60
  214. pyobo/xrefdb/sources/biomappings.py +0 -36
  215. pyobo/xrefdb/sources/cbms2019.py +0 -91
  216. pyobo/xrefdb/sources/chembl.py +0 -83
  217. pyobo/xrefdb/sources/compath.py +0 -82
  218. pyobo/xrefdb/sources/famplex.py +0 -64
  219. pyobo/xrefdb/sources/gilda.py +0 -50
  220. pyobo/xrefdb/sources/intact.py +0 -113
  221. pyobo/xrefdb/sources/ncit.py +0 -133
  222. pyobo/xrefdb/sources/pubchem.py +0 -27
  223. pyobo/xrefdb/sources/wikidata.py +0 -116
  224. pyobo/xrefdb/xrefs_pipeline.py +0 -180
  225. pyobo-0.11.2.dist-info/METADATA +0 -711
  226. pyobo-0.11.2.dist-info/RECORD +0 -157
  227. pyobo-0.11.2.dist-info/WHEEL +0 -5
  228. pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/normalizer.py DELETED
@@ -1,232 +0,0 @@
1
- """Use synonyms from OBO to normalize names."""
2
-
3
- import logging
4
- from abc import ABC, abstractmethod
5
- from collections.abc import Iterable, Mapping
6
- from dataclasses import dataclass
7
- from functools import lru_cache
8
- from typing import Optional, Union
9
-
10
- import bioregistry
11
-
12
- from .api import names
13
- from .utils.io import multisetdict
14
-
15
- __all__ = [
16
- "ground",
17
- "Normalizer",
18
- "OboNormalizer",
19
- "MultiNormalizer",
20
- "NormalizationResult",
21
- ]
22
-
23
- logger = logging.getLogger(__name__)
24
-
25
- NormalizationSuccess = tuple[str, str, str]
26
- NormalizationFailure = tuple[None, None, str]
27
- NormalizationResult = Union[NormalizationSuccess, NormalizationFailure]
28
-
29
-
30
- class Normalizer(ABC):
31
- """A normalizer."""
32
-
33
- id_to_name: dict[str, str]
34
- id_to_synonyms: dict[str, list[str]]
35
-
36
- #: A mapping from all synonyms to the set of identifiers that they point to.
37
- #: In a perfect world, each would only be a single element.
38
- synonym_to_identifiers_mapping: dict[str, set[str]]
39
- #: A mapping from normalized names to the actual ones that they came from
40
- norm_name_to_name: dict[str, set[str]]
41
-
42
- def __init__(
43
- self,
44
- id_to_name: dict[str, str],
45
- id_to_synonyms: dict[str, list[str]],
46
- remove_prefix: Optional[str] = None,
47
- ) -> None:
48
- """Initialize the normalizer.
49
-
50
- :param id_to_name: An identifier to name dictionary.
51
- :param id_to_synonyms: An identifier to list of synonyms dictionary.
52
- :param remove_prefix: A prefix to be removed from the identifiers. Useful for nomenclatures like ChEBI.
53
- """
54
- self.id_to_name = id_to_name
55
- self.id_to_synonyms = id_to_synonyms
56
- self.synonym_to_identifiers_mapping = multisetdict(
57
- self._iterate_synonyms_to_identifiers(
58
- id_to_name=self.id_to_name,
59
- id_to_synonyms=self.id_to_synonyms,
60
- remove_prefix=remove_prefix,
61
- )
62
- )
63
- self.norm_name_to_name = self._get_norm_name_to_names(self.synonym_to_identifiers_mapping)
64
-
65
- @classmethod
66
- def _get_norm_name_to_names(cls, synonyms: Iterable[str]) -> dict[str, set[str]]:
67
- return multisetdict((cls._normalize_text(synonym), synonym) for synonym in synonyms)
68
-
69
- @staticmethod
70
- def _normalize_text(text: str) -> str:
71
- text = text.strip().strip('"').strip("'").lower()
72
- text = normalize_dashes(text)
73
- text = text.replace("-", "") # remove all dashes
74
- text = text.replace(" ", "") # remove all spaces
75
- return text
76
-
77
- @staticmethod
78
- def _iterate_synonyms_to_identifiers(
79
- *,
80
- id_to_name: Mapping[str, str],
81
- id_to_synonyms: Mapping[str, Iterable[str]],
82
- remove_prefix: Optional[str] = None,
83
- ) -> Iterable[tuple[str, str]]:
84
- if remove_prefix is not None:
85
- remove_prefix = f'{remove_prefix.lower().rstrip(":")}:'
86
-
87
- # Add name
88
- for identifier, name in id_to_name.items():
89
- if remove_prefix and identifier.lower().startswith(remove_prefix):
90
- identifier = identifier[len(remove_prefix) :]
91
-
92
- yield name, identifier
93
-
94
- # Add synonyms
95
- for identifier, synonyms in id_to_synonyms.items():
96
- if remove_prefix and identifier.lower().startswith(remove_prefix):
97
- identifier = identifier[len(remove_prefix) :]
98
-
99
- for synonym in synonyms:
100
- # it might overwrite but this is probably always due to alternate ids
101
- yield synonym, identifier
102
-
103
- def get_names(self, query: str) -> list[str]:
104
- """Get all names to which the query text maps."""
105
- norm_text = self._normalize_text(query)
106
- return list(self.norm_name_to_name.get(norm_text, []))
107
-
108
- @abstractmethod
109
- def normalize(self, query: str) -> NormalizationResult:
110
- """Try and normalize a name to a identifier and canonical name."""
111
- raise NotImplementedError
112
-
113
-
114
- @lru_cache
115
- def get_normalizer(prefix: str) -> Normalizer:
116
- """Get an OBO normalizer."""
117
- norm_prefix = bioregistry.normalize_prefix(prefix)
118
- if norm_prefix is None:
119
- raise ValueError(f"unhandled prefix: {prefix}")
120
- logger.info("getting obo normalizer for %s", norm_prefix)
121
- normalizer = OboNormalizer(norm_prefix)
122
- logger.debug(
123
- "normalizer for %s with %s name lookups",
124
- normalizer.prefix,
125
- len(normalizer.norm_name_to_name),
126
- )
127
- return normalizer
128
-
129
-
130
- def ground(prefix: Union[str, Iterable[str]], query: str) -> NormalizationResult:
131
- """Normalize a string given the prefix's labels and synonyms.
132
-
133
- :param prefix: If a string, only grounds against that namespace. If a list, will try grounding
134
- against all in that order
135
- :param query: The string to try grounding
136
- """
137
- if isinstance(prefix, str):
138
- normalizer = get_normalizer(prefix)
139
- return normalizer.normalize(query)
140
- else:
141
- for p in prefix:
142
- norm_prefix, identifier, name = ground(p, query)
143
- if norm_prefix and identifier and name:
144
- return norm_prefix, identifier, name
145
- return None, None, query
146
-
147
-
148
- class OboNormalizer(Normalizer):
149
- """A utility for normalizing by names."""
150
-
151
- def __init__(self, prefix: str) -> None:
152
- """Initialize the normalizer by an ontology's Bioregistry prefix."""
153
- self.prefix = prefix
154
- self._len_prefix = len(prefix)
155
- id_to_name = names.get_id_name_mapping(prefix)
156
- id_to_synonyms = names.get_id_synonyms_mapping(prefix)
157
- super().__init__(
158
- id_to_name=dict(id_to_name),
159
- id_to_synonyms=dict(id_to_synonyms),
160
- remove_prefix=prefix,
161
- )
162
-
163
- def __repr__(self) -> str:
164
- return f'OboNormalizer(prefix="{self.prefix}")'
165
-
166
- def normalize(self, query: str) -> NormalizationResult:
167
- """Try and normalize a name to a identifier and canonical name."""
168
- names = self.get_names(query)
169
- if not names:
170
- return None, None, query
171
-
172
- for name in names:
173
- identifiers = self.synonym_to_identifiers_mapping[name]
174
- for identifier in identifiers:
175
- if identifier in self.id_to_name:
176
- return self.prefix, identifier, self.id_to_name[identifier]
177
- logger.warning(f"Could not find valid identifier for {name} from {identifiers}")
178
-
179
- # maybe it happens that one can't be found?
180
- logger.warning(f"was able to look up name {query}->{names} but not find fresh identifier")
181
- return None, None, query
182
-
183
-
184
- @dataclass
185
- class MultiNormalizer:
186
- """Multiple normalizers together.
187
-
188
- If you're looking for taxa of exotic plants, you might use:
189
-
190
- >>> from pyobo.normalizer import MultiNormalizer
191
- >>> normalizer = MultiNormalizer(prefixes=["ncbitaxon", "itis"])
192
- >>> normalizer.normalize("Homo sapiens")
193
- ('ncbitaxon', '9606', 'Homo sapiens')
194
- >>> normalizer.normalize("Abies bifolia") # variety not listed in NCBI
195
- ('itis', '507501', 'Abies bifolia')
196
- >>> normalizer.normalize("vulcan") # nice try, nerds
197
- (None, None, None)
198
- """
199
-
200
- #: The normalizers for each prefix
201
- normalizers: list[Normalizer]
202
-
203
- @staticmethod
204
- def from_prefixes(prefixes: list[str]) -> "MultiNormalizer":
205
- """Instantiate normalizers based on the given prefixes, in preferred order.."""
206
- return MultiNormalizer([get_normalizer(prefix) for prefix in prefixes])
207
-
208
- def normalize(self, query: str) -> NormalizationResult:
209
- """Try and normalize a canonical name using multiple normalizers."""
210
- for normalizer in self.normalizers:
211
- prefix, identifier, name = normalizer.normalize(query)
212
- if prefix and identifier and name: # all not empty
213
- return prefix, identifier, name
214
- return None, None, query
215
-
216
-
217
- # See: https://en.wikipedia.org/wiki/Dash
218
- FIGURE_DASH = b"\xe2\x80\x92".decode("utf-8")
219
- EN_DASH = b"\xe2\x80\x93".decode("utf-8")
220
- EM_DASH = b"\xe2\x80\x94".decode("utf-8")
221
- HORIZONAL_BAR = b"\xe2\x80\x95".decode("utf-8")
222
- NORMAL_DASH = "-"
223
-
224
-
225
- def normalize_dashes(s: str) -> str:
226
- """Normalize dashes in a string."""
227
- return (
228
- s.replace(FIGURE_DASH, NORMAL_DASH)
229
- .replace(EN_DASH, NORMAL_DASH)
230
- .replace(EM_DASH, NORMAL_DASH)
231
- .replace(HORIZONAL_BAR, NORMAL_DASH)
232
- )
@@ -1,16 +0,0 @@
1
- """Extract registry information."""
2
-
3
- from .metaregistry import ( # noqa: F401
4
- curie_has_blacklisted_prefix,
5
- curie_has_blacklisted_suffix,
6
- curie_is_blacklisted,
7
- get_remappings_full,
8
- get_remappings_prefix,
9
- get_xrefs_blacklist,
10
- get_xrefs_prefix_blacklist,
11
- get_xrefs_suffix_blacklist,
12
- has_no_download,
13
- iter_cached_obo,
14
- remap_full,
15
- remap_prefix,
16
- )