pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -117
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +107 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +210 -160
  20. pyobo/cli/database_utils.py +155 -0
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +209 -191
  25. pyobo/gilda_utils.py +52 -250
  26. pyobo/identifier_utils/__init__.py +33 -0
  27. pyobo/identifier_utils/api.py +305 -0
  28. pyobo/identifier_utils/preprocessing.json +873 -0
  29. pyobo/identifier_utils/preprocessing.py +27 -0
  30. pyobo/identifier_utils/relations/__init__.py +8 -0
  31. pyobo/identifier_utils/relations/api.py +162 -0
  32. pyobo/identifier_utils/relations/data.json +5824 -0
  33. pyobo/identifier_utils/relations/data_owl.json +57 -0
  34. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  35. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  36. pyobo/mocks.py +9 -6
  37. pyobo/ner/__init__.py +9 -0
  38. pyobo/ner/api.py +72 -0
  39. pyobo/ner/normalizer.py +33 -0
  40. pyobo/obographs.py +48 -40
  41. pyobo/plugins.py +5 -4
  42. pyobo/py.typed +0 -0
  43. pyobo/reader.py +1354 -395
  44. pyobo/reader_utils.py +155 -0
  45. pyobo/resource_utils.py +42 -22
  46. pyobo/resources/__init__.py +0 -0
  47. pyobo/resources/goc.py +75 -0
  48. pyobo/resources/goc.tsv +188 -0
  49. pyobo/resources/ncbitaxon.py +4 -5
  50. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  51. pyobo/resources/ro.py +3 -2
  52. pyobo/resources/ro.tsv +0 -0
  53. pyobo/resources/so.py +0 -0
  54. pyobo/resources/so.tsv +0 -0
  55. pyobo/sources/README.md +12 -8
  56. pyobo/sources/__init__.py +52 -29
  57. pyobo/sources/agrovoc.py +0 -0
  58. pyobo/sources/antibodyregistry.py +11 -12
  59. pyobo/sources/bigg/__init__.py +13 -0
  60. pyobo/sources/bigg/bigg_compartment.py +81 -0
  61. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  62. pyobo/sources/bigg/bigg_model.py +46 -0
  63. pyobo/sources/bigg/bigg_reaction.py +77 -0
  64. pyobo/sources/biogrid.py +1 -2
  65. pyobo/sources/ccle.py +7 -12
  66. pyobo/sources/cgnc.py +9 -6
  67. pyobo/sources/chebi.py +1 -1
  68. pyobo/sources/chembl/__init__.py +9 -0
  69. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  70. pyobo/sources/chembl/chembl_target.py +160 -0
  71. pyobo/sources/civic_gene.py +55 -15
  72. pyobo/sources/clinicaltrials.py +160 -0
  73. pyobo/sources/complexportal.py +24 -24
  74. pyobo/sources/conso.py +14 -22
  75. pyobo/sources/cpt.py +0 -0
  76. pyobo/sources/credit.py +1 -9
  77. pyobo/sources/cvx.py +27 -5
  78. pyobo/sources/depmap.py +9 -12
  79. pyobo/sources/dictybase_gene.py +2 -7
  80. pyobo/sources/drugbank/__init__.py +9 -0
  81. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  82. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  83. pyobo/sources/drugcentral.py +17 -13
  84. pyobo/sources/expasy.py +31 -34
  85. pyobo/sources/famplex.py +13 -18
  86. pyobo/sources/flybase.py +8 -13
  87. pyobo/sources/gard.py +62 -0
  88. pyobo/sources/geonames/__init__.py +9 -0
  89. pyobo/sources/geonames/features.py +28 -0
  90. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  91. pyobo/sources/geonames/utils.py +115 -0
  92. pyobo/sources/gmt_utils.py +6 -7
  93. pyobo/sources/go.py +20 -13
  94. pyobo/sources/gtdb.py +154 -0
  95. pyobo/sources/gwascentral/__init__.py +9 -0
  96. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  97. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  98. pyobo/sources/hgnc/__init__.py +9 -0
  99. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  100. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  101. pyobo/sources/icd/__init__.py +9 -0
  102. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  103. pyobo/sources/icd/icd11.py +148 -0
  104. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  105. pyobo/sources/interpro.py +4 -9
  106. pyobo/sources/itis.py +0 -5
  107. pyobo/sources/kegg/__init__.py +0 -0
  108. pyobo/sources/kegg/api.py +16 -38
  109. pyobo/sources/kegg/genes.py +9 -20
  110. pyobo/sources/kegg/genome.py +1 -7
  111. pyobo/sources/kegg/pathway.py +9 -21
  112. pyobo/sources/mesh.py +58 -24
  113. pyobo/sources/mgi.py +3 -10
  114. pyobo/sources/mirbase/__init__.py +11 -0
  115. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  116. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  117. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  118. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  119. pyobo/sources/msigdb.py +74 -39
  120. pyobo/sources/ncbi/__init__.py +9 -0
  121. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  122. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  123. pyobo/sources/nih_reporter.py +60 -0
  124. pyobo/sources/nlm/__init__.py +9 -0
  125. pyobo/sources/nlm/nlm_catalog.py +48 -0
  126. pyobo/sources/nlm/nlm_publisher.py +36 -0
  127. pyobo/sources/nlm/utils.py +116 -0
  128. pyobo/sources/npass.py +6 -8
  129. pyobo/sources/omim_ps.py +11 -4
  130. pyobo/sources/pathbank.py +4 -8
  131. pyobo/sources/pfam/__init__.py +9 -0
  132. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  133. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  134. pyobo/sources/pharmgkb/__init__.py +15 -0
  135. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  136. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  137. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  138. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  139. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  140. pyobo/sources/pharmgkb/utils.py +86 -0
  141. pyobo/sources/pid.py +1 -6
  142. pyobo/sources/pombase.py +6 -10
  143. pyobo/sources/pubchem.py +4 -9
  144. pyobo/sources/reactome.py +5 -11
  145. pyobo/sources/rgd.py +11 -16
  146. pyobo/sources/rhea.py +37 -36
  147. pyobo/sources/ror.py +69 -42
  148. pyobo/sources/selventa/__init__.py +0 -0
  149. pyobo/sources/selventa/schem.py +4 -7
  150. pyobo/sources/selventa/scomp.py +1 -6
  151. pyobo/sources/selventa/sdis.py +4 -7
  152. pyobo/sources/selventa/sfam.py +1 -6
  153. pyobo/sources/sgd.py +6 -11
  154. pyobo/sources/signor/__init__.py +7 -0
  155. pyobo/sources/signor/download.py +41 -0
  156. pyobo/sources/signor/signor_complexes.py +105 -0
  157. pyobo/sources/slm.py +12 -15
  158. pyobo/sources/umls/__init__.py +7 -1
  159. pyobo/sources/umls/__main__.py +0 -0
  160. pyobo/sources/umls/get_synonym_types.py +20 -4
  161. pyobo/sources/umls/sty.py +57 -0
  162. pyobo/sources/umls/synonym_types.tsv +1 -1
  163. pyobo/sources/umls/umls.py +18 -22
  164. pyobo/sources/unimod.py +46 -0
  165. pyobo/sources/uniprot/__init__.py +1 -1
  166. pyobo/sources/uniprot/uniprot.py +40 -32
  167. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  168. pyobo/sources/utils.py +3 -2
  169. pyobo/sources/wikipathways.py +7 -10
  170. pyobo/sources/zfin.py +5 -10
  171. pyobo/ssg/__init__.py +12 -16
  172. pyobo/ssg/base.html +0 -0
  173. pyobo/ssg/index.html +26 -13
  174. pyobo/ssg/term.html +12 -2
  175. pyobo/ssg/typedef.html +0 -0
  176. pyobo/struct/__init__.py +54 -8
  177. pyobo/struct/functional/__init__.py +1 -0
  178. pyobo/struct/functional/dsl.py +2572 -0
  179. pyobo/struct/functional/macros.py +423 -0
  180. pyobo/struct/functional/obo_to_functional.py +385 -0
  181. pyobo/struct/functional/ontology.py +272 -0
  182. pyobo/struct/functional/utils.py +112 -0
  183. pyobo/struct/reference.py +331 -136
  184. pyobo/struct/struct.py +1484 -657
  185. pyobo/struct/struct_utils.py +1078 -0
  186. pyobo/struct/typedef.py +162 -210
  187. pyobo/struct/utils.py +12 -5
  188. pyobo/struct/vocabulary.py +138 -0
  189. pyobo/utils/__init__.py +0 -0
  190. pyobo/utils/cache.py +16 -15
  191. pyobo/utils/io.py +51 -41
  192. pyobo/utils/iter.py +5 -5
  193. pyobo/utils/misc.py +41 -53
  194. pyobo/utils/ndex_utils.py +0 -0
  195. pyobo/utils/path.py +73 -70
  196. pyobo/version.py +3 -3
  197. pyobo-0.12.1.dist-info/METADATA +671 -0
  198. pyobo-0.12.1.dist-info/RECORD +201 -0
  199. pyobo-0.12.1.dist-info/WHEEL +4 -0
  200. {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
  201. pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
  202. pyobo/aws.py +0 -162
  203. pyobo/cli/aws.py +0 -47
  204. pyobo/identifier_utils.py +0 -142
  205. pyobo/normalizer.py +0 -232
  206. pyobo/registries/__init__.py +0 -16
  207. pyobo/registries/metaregistry.json +0 -507
  208. pyobo/registries/metaregistry.py +0 -135
  209. pyobo/sources/icd11.py +0 -105
  210. pyobo/xrefdb/__init__.py +0 -1
  211. pyobo/xrefdb/canonicalizer.py +0 -214
  212. pyobo/xrefdb/priority.py +0 -59
  213. pyobo/xrefdb/sources/__init__.py +0 -60
  214. pyobo/xrefdb/sources/biomappings.py +0 -36
  215. pyobo/xrefdb/sources/cbms2019.py +0 -91
  216. pyobo/xrefdb/sources/chembl.py +0 -83
  217. pyobo/xrefdb/sources/compath.py +0 -82
  218. pyobo/xrefdb/sources/famplex.py +0 -64
  219. pyobo/xrefdb/sources/gilda.py +0 -50
  220. pyobo/xrefdb/sources/intact.py +0 -113
  221. pyobo/xrefdb/sources/ncit.py +0 -133
  222. pyobo/xrefdb/sources/pubchem.py +0 -27
  223. pyobo/xrefdb/sources/wikidata.py +0 -116
  224. pyobo/xrefdb/xrefs_pipeline.py +0 -180
  225. pyobo-0.11.2.dist-info/METADATA +0 -711
  226. pyobo-0.11.2.dist-info/RECORD +0 -157
  227. pyobo-0.11.2.dist-info/WHEEL +0 -5
  228. pyobo-0.11.2.dist-info/top_level.txt +0 -1
@@ -1,507 +0,0 @@
1
- {
2
- "blacklists": {
3
- "full": [
4
- "ClinGen:AminoacidopathyGeneCurationPanel",
5
- "KEGG",
6
- "NIST",
7
- "ICD9CM_2005:<new dbxref>",
8
- "XX:<new dbxref>",
9
- "XX:<new xref>",
10
- "STRUCTURE_Formula",
11
- "United:Kingdom",
12
- "United:States",
13
- "Harvard-Oxford:Atlas",
14
- "South:Korea",
15
- "Codex:\\:260",
16
- "Europe:\\:260",
17
- "BioGRID:curators",
18
- "POC:curators",
19
- "CGP:curators",
20
- "DiffAvg:",
21
- "DiffFormula:",
22
- "DiffMono:",
23
- "Formula:",
24
- "MassAvg:",
25
- "MassMono:",
26
- "Origin:",
27
- "Source:",
28
- "TermSpec:",
29
- "FormalCharge:",
30
- "snap:Quality",
31
- "depicted:by",
32
- "http:http\\://www.pacificbiosciences.com/pdf/WP_Detecting_DNA_Base_Modifications_Using_SMRT_Sequencing.pdf",
33
- "XX:www.ensembl.org/info/genome/variation/predicted_data.html#consequences",
34
- "http:www.ensembl.org/info/genome/variation/predicted_data.html#consequences",
35
- "EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html",
36
- "\\:has_start_point",
37
- "DDB:pf",
38
- "TS:0",
39
- "CTD:curators",
40
- "IEDB:RV",
41
- "Tail:fat",
42
- "Pituitary:gland",
43
- "Compound:eye",
44
- "Lymph:node",
45
- "Lamina:propria",
46
- "Follicular:fluid",
47
- "dph:GOC",
48
- "gOC:dph",
49
- "gOC:dph",
50
- "ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd",
51
- "HPO:PCS",
52
- "HPO:ICE",
53
- "IEDB:BP",
54
- "PomBane:vw",
55
- "PomBase\\:mah",
56
- {
57
- "type": "group",
58
- "from": "mpath",
59
- "text": [
60
- "Pathbase: Curation",
61
- "Pathbase: Pathology Committee"
62
- ]
63
- },
64
- {
65
- "type": "group",
66
- "from": "hp",
67
- "reason": "curator names appearing in definition and synonym provenance strings",
68
- "text": [
69
- "LMU:mgriese",
70
- "HPO:sdoelken",
71
- "HPO:curators",
72
- "Sanford:krageth",
73
- "HPO:probinson",
74
- "UToronto:chum",
75
- "PhenoTips:CHum"
76
- ]
77
- },
78
- {
79
- "type": "group",
80
- "text": [
81
- "KL:KL",
82
- "PT:PT",
83
- "EM:EM"
84
- ],
85
- "reason": "nonsense attribution",
86
- "from": "idomal"
87
- },
88
- {
89
- "type": "group",
90
- "text": [
91
- "NS:NS",
92
- "Walter_Reed:taxonomy"
93
- ],
94
- "from": "miro"
95
- },
96
- "PLATY:A.H.L.Fischer",
97
- "URL:http\\://digitalcommons.unl.edu/cgi/viewcontent.cgi?article=1007&context=onlinedictinvertzoology",
98
- "NIFSTD:NeuroNames_abbrevSource",
99
- {
100
- "type": "group",
101
- "text": [
102
- "OBOL:accepted",
103
- "OBOL:automatic",
104
- "Follicular:antrum",
105
- "Leydig's:organ",
106
- "Dermal:denticle",
107
- "Serous:membrane",
108
- "Pinhole:eye",
109
- "Vaginal:fornix",
110
- "Duct:(anatomy)",
111
- "Organ:(anatomy)"
112
- ],
113
- "from": "uberon"
114
- },
115
- "TAIR:TED",
116
- "CS:0",
117
- "ADL:FTT",
118
- "Getty:TGN",
119
- {
120
- "type": "group",
121
- "text": [
122
- "USGS:SDTS",
123
- "NASA:earthrealm",
124
- "Genomes:fissure",
125
- "METAR:FZRA",
126
- "INTERNAS:WS2",
127
- "Uranium:mining"
128
- ],
129
- "from": "envo"
130
- },
131
- {
132
- "type": "group",
133
- "text": [
134
- "FlyPNS:PNSdescription0.html",
135
- "FlyPNS:PNSdescription.html#lbd",
136
- "FlyPNS:PNSdescription.html#ltd",
137
- "FlyPNS:PNSnomenclature.html",
138
- "FlyPNS:PNSdescription.html#dh1"
139
- ],
140
- "from": "fbbt"
141
- },
142
- {
143
- "type": "group",
144
- "text": [
145
- "LifO:0000005"
146
- ],
147
- "from": "ino",
148
- "reason": "random ad-hoc term. should probably just skip this entire ontology"
149
- },
150
- {
151
- "type": "group",
152
- "text": [
153
- "GeneReviews:NBK331",
154
- "GeneReviews:NBK5191",
155
- "GeneReveiws:NBK350"
156
- ],
157
- "from": "maxo",
158
- "comment": "these actually correspond to NCBI books"
159
- },
160
- {
161
- "type": "group",
162
- "text": [
163
- "value-type:xsd\\:string"
164
- ],
165
- "from": "ms"
166
- },
167
- {
168
- "type": "group",
169
- "text": [
170
- "POC:Maria_Alejandra_Gandolfo",
171
- "NIG:Yukiko_Yamazaki",
172
- "NYBG:Brandon_Sinn",
173
- "NYBG:Dario_Cavaliere"
174
- ],
175
- "from": "po"
176
- },
177
- {
178
- "type": "group",
179
- "text": [
180
- "Gramene:pankaj_jaiswal",
181
- "scale:6",
182
- "Gramene:cwt6"
183
- ],
184
- "from": "to"
185
- },
186
- {
187
- "type": "group",
188
- "text": [
189
- "active:in",
190
- "actively:involves",
191
- "aggregate:statistic",
192
- "associated:with",
193
- "affected:by",
194
- "base:coordinate",
195
- "biomarker:for",
196
- "capable:of",
197
- "caused:by",
198
- "coexpressed:with",
199
- "association:type",
200
- "association:slot",
201
- "close:match",
202
- "completed:by",
203
- "coexists:with",
204
- "colocalizes:with",
205
- "available:from"
206
- ],
207
- "from": "biolink"
208
- },
209
- {
210
- "type": "group",
211
- "text": [
212
- "TTOCurator:Conway_etal_2008",
213
- "TTOCurator:CavenderAndCoburn1992",
214
- "TTOCurator:Vari1995"
215
- ],
216
- "from": "tto"
217
- }
218
- ],
219
- "resource_prefix": {
220
- "ogg": [
221
- "IMGT/GENE-DB:"
222
- ],
223
- "vo": [
224
- "NCBITaxon_VO"
225
- ],
226
- "duo": [
227
- "topic:"
228
- ],
229
- "fbbt": [
230
- "larvalbrain_axon_tract:",
231
- "larvalbrain_neuropil:",
232
- "FlyPNS:PNS"
233
- ],
234
- "envo": [
235
- "SPIRE:",
236
- "TODO^^^",
237
- "IUCN-PACS:",
238
- "http://www.britannica.com"
239
- ],
240
- "mco": [
241
- "colombos:"
242
- ],
243
- "efo": [
244
- "Germplasm:",
245
- "BilaDO:",
246
- "OGES:"
247
- ],
248
- "idomal": [
249
- "IRAC:",
250
- "MOSBP"
251
- ],
252
- "mondo": [
253
- "url:https\\://rarediseases.info.nih.gov",
254
- "ICD10EXP:"
255
- ],
256
- "mod": [
257
- "Remap:",
258
- "UnMod:",
259
- "UniMo:"
260
- ],
261
- "hao": [
262
- "DUMMY_TYPDEF:"
263
- ],
264
- "iceo": [
265
- "OTCS:"
266
- ],
267
- "uberon": [
268
- "GAID:",
269
- "OpenCyc:",
270
- "BM:",
271
- "BSA:",
272
- "XtroDO:",
273
- "nlx_subcell",
274
- "OGEM:",
275
- "ANISEED:",
276
- "BILS:",
277
- "ABA:",
278
- "MAP:"
279
- ],
280
- "cellosaurus": [
281
- "FCDI",
282
- "CCTCC:",
283
- "IARC_TP53:",
284
- "KCB:",
285
- "PharmacoDB:",
286
- "MCCL:MCC:",
287
- "IBRC",
288
- "ISCR",
289
- "IZSLER",
290
- "MCCL",
291
- "NCBI_Iran",
292
- "NCI-DTP",
293
- "NISES",
294
- "RSCB",
295
- "SKY/M-FISH/CGH",
296
- "LINCS_HMS",
297
- "CCLV",
298
- "Cosmic-CLP:",
299
- "PubChem_Cell_line:CVCL_",
300
- "Rockland:",
301
- "CancerTools:",
302
- "Innoprot:"
303
- ],
304
- "oba": [
305
- "AUTO:patterns"
306
- ],
307
- "pato": [
308
- "ilxtr:"
309
- ],
310
- "plana": [
311
- "Optic:",
312
- "NIF:Subcellular\\:sao"
313
- ],
314
- "po": [
315
- "FNA:",
316
- "PlantSystematics_image_archive:",
317
- "CO_125:",
318
- "SOY:",
319
- "TAIR:",
320
- "OB_SF2_PO:",
321
- "OBO-SF2_PO:",
322
- "OBO_SF3_PO:",
323
- "OBOS_SF_PO:",
324
- "OB_SF_PO:",
325
- "OBO-SF_PO:",
326
- "OBO_PO_SF:",
327
- "PO_REF",
328
- "Maize",
329
- "GO_GIT"
330
- ],
331
- "pr": [
332
- "ImmPort:",
333
- "TLR:",
334
- "HIstome_ptm_Hs:"
335
- ],
336
- "to": [
337
- "TO_GIT",
338
- "GIT_TO",
339
- "OBO_S2F_TO",
340
- "GCP:GCP_"
341
- ],
342
- "tto": [
343
- "CASGEN:"
344
- ],
345
- "wb": [
346
- "WBPaper:"
347
- ],
348
- "cl": [
349
- "WikipediaVersioned"
350
- ]
351
- },
352
- "prefix": [
353
- "Image:",
354
- "Category",
355
- "http://",
356
- "https://",
357
- "http://dbpedia.org",
358
- "https://github.com",
359
- "PERSON",
360
- "similar to",
361
- "modelled on",
362
- "SUBMITTER",
363
- "STRUCTURE_ChemicalName_IUPAC",
364
- "STRUCTURE_Formula",
365
- "stedman",
366
- "From_Merriam-Webster's_Online_Dictionary_at_www.Merriam-Webster.com",
367
- "value-type:",
368
- "binary-data-type:MS\\",
369
- "PECO_GIT",
370
- "OBO_SF2_PECO",
371
- "id-validation-regexp: ",
372
- "id-validation-regex: ",
373
- "search-url: ",
374
- "regexp: ",
375
- "Germplasm:",
376
- "IUPAC:",
377
- "IUPHAR:GPCRListForward?",
378
- "GOC:",
379
- "goc:",
380
- "GIOC:",
381
- "MONDORULE:",
382
- "MTH:",
383
- "FBC:",
384
- "RSC:",
385
- "DDB:",
386
- "http:www",
387
- "NCBITaxon_Union",
388
- "PhenoScape:",
389
- "INFOODs:",
390
- "NLCD:",
391
- "TEMP:",
392
- "PO_GIT:",
393
- "URL:http"
394
- ],
395
- "suffix": [
396
- ".jpg",
397
- ".svg",
398
- ".png"
399
- ]
400
- },
401
- "remappings": {
402
- "full": {
403
- "CHEBI:133245 MetaCyc:quercetin 3-rhamnoside-7-rhamnoside": "CHEBI:133245",
404
- "CHEBI:16531 MetaCyc:renillar luciferin": "CHEBI:16531",
405
- "CHEBI:49553 PDBeChem:Copper(II) chloride": "CHEBI:49553",
406
- "CHEBI:77450 MetaCyc:dicarboxylic acid monoamide \"SUBMITTER\"": "CHEBI:77450",
407
- "CHEBI:84990 MetaCyc:<locant>gamma</locant>-carboxy-<stereo>L</stereo>-glutamate residue \"SUBMITTER\"": "CHEBI:84990",
408
- "LIPIDMAPSLMFA01030141": "LIPIDMAPS:LMFA01030141",
409
- "LIPIDMAPSLMFA01030152": "LIPIDMAPS:LMFA01030152",
410
- "SNOMEDCT274897005": "SNOMEDCT:274897005",
411
- "GIOC:vw": "GOC:vw",
412
- ":has_start_point": "has_start_point",
413
- "dc-creator": "dc:creator",
414
- "PMI:17498297": "PMID:17498297",
415
- "HPO:SKOEHLER": "orcid:0000-0002-5316-1399",
416
- "HPO:skoehler": "orcid:0000-0002-5316-1399",
417
- "UBERON:cjm": "orcid:0000-0002-6601-2165",
418
- "part:of": "BFO:0000050",
419
- "bearer:of": "RO:0000053",
420
- "inheres:in": "RO:0000052",
421
- "inheres:in-obso": "RO:0000052",
422
- "inheres:in_part_of": "RO:0002314",
423
- "role:of": "RO:0000081",
424
- "unfolds:in": "BFO:0000066",
425
- "PATOC:MAH": "orcid:0000-0003-4148-4606",
426
- "SBN:9780070316607": "ISBN:9780070316607",
427
- "dc-contributor": "dc:contributor",
428
- "dc-license": "dc:license",
429
- "seeAlso": "rdfs:seeAlso"
430
- },
431
- "prefix": {
432
- "ATCC number: ": "ATCC:",
433
- "ATTC: ": "ATCC:",
434
- "BAO_": "BAO:",
435
- "TKG:TKG ": "TKG:",
436
- "KCB:KCB ": "KCB:",
437
- "CVCL_": "cellosaurus:CVCL_",
438
- "EGA:EGAS": "ega.study:EGAS",
439
- "EGA:phs": "ega.study:phs",
440
- "EGA:EGAD": "ega.study:EGAD",
441
- "KEGG COMPOUND": "KEGG.COMPOUND",
442
- "KEGG DRUG": "KEGG.DRUG",
443
- "LIPID MAPS:": "LIPIDMAPS:",
444
- "MedlinePlus: ": "MedlinePlus:",
445
- "NIST Chemistry WebBook": "NIST",
446
- "PMID: ": "PMID:",
447
- "SNOMEDCT: ": "SNOMEDCT:",
448
- "UMLS CUI:": "UMLS:",
449
- "http://linkedlifedata.com/resource/umls/id/": "UMLS:",
450
- "URL: ": "URL:",
451
- "VFB:FBbt_": "FBbt:",
452
- "chembl target:CHEMBL": "chembl.target:CHEMBL",
453
- "protein ontology:PR\\:": "PR:PR:",
454
- "http://orcid.org/": "orcid:",
455
- "http:http\\://en.wikipedia.org/wiki/": "wikipedia.en:",
456
- "http://en.wikipedia.org/wiki/": "wikipedia.en:",
457
- "http:en.wikipedia.org/wiki/": "wikipedia.en:",
458
- "http:http\\:en.wikipedia.org/wiki/": "wikipedia.en:",
459
- "WIKI:https\\://en.wikipedia.org/wiki/": "wikipedia.en:",
460
- "BioRXiv:https\\://doi.org/": "doi:",
461
- "NIF_Subcellular:sao": "SAO:",
462
- "NIF_Subcellular:nlx_subcell_": "NLXANAT",
463
- "NIF_Subcellular:birnlex": "neurolex:birnlex",
464
- "NIFSTD:birnlex": "neurolex:birnlex",
465
- "NIF_Subcellular:nlx": "neurolex:nlx",
466
- "NIFSTD:nifext_": "neurolex:nifext_",
467
- "NIFSTD:sao": "SAO:",
468
- "NIFSTD:nlx_dys_": "NLXDYS:",
469
- "URL:http\\://en.wikipedia.org/wiki/": "wikipedia.en:",
470
- "url: \"https://en.wikipedia.org/wiki/": "wikipedia.en:",
471
- "url:https\\://www.ncbi.nlm.nih.gov/pubmed/": "pubmed:",
472
- "url:https\\://pubmed.ncbi.nlm.nih.gov/": "pubmed:",
473
- "URL:http\\://": "http://",
474
- "URL: http\\://": "http://",
475
- "url:http\\://": "http://",
476
- "url:https\\://ghr.nlm.nih.gov/condition/": "ghr:",
477
- "NIF_Subcellular:FMA_": "FMA:",
478
- "Wikipeda:": "wikipedia:",
479
- "has:": "has_",
480
- "NSH:D0": "MESH:D0",
481
- "MONDOLEX:": "MONDO:",
482
- "unirot:": "uniprot:",
483
- "http:www": "http://www",
484
- "ECGOntology:<new dbxref> \"": "ECGOntology:",
485
- "GNOme: \"GNO:": "glygen",
486
- "UniProt: \"": "uniprot:",
487
- "Unimod: \"Unimod:": "unimod:",
488
- "OMIMl:": "OMIM:",
489
- "CREDIT_00": "CREDIT:00",
490
- "CCLV:CCLV-RIE ": "CCLV:",
491
- "APOLLO:SV_": "APOLLO_SV:",
492
- "GEMET:http\\://www.eionet.europa.eu/gemet/concept/": "GEMET:",
493
- "FBdv_root:": "FBdv:",
494
- "NCBI:NBK": "ncbibook:",
495
- "MicrO.owl/MICRO:": "MICRO:",
496
- "MicrO.owl/TEMP:": "MICRO:",
497
- "MicrO.owl/Temp:": "MICRO:",
498
- "MicrO.owl/temp:": "MICRO:",
499
- "xsd\\:": "xsd:",
500
- "OGI.owl:": "ogi:",
501
- "PANTHER:PTHR": "panther.family:PTHR",
502
- "vo/ontorat/PR:": "PR:",
503
- "DC:0000": "diseaseclass:0000",
504
- "TS-": "caloha:"
505
- }
506
- }
507
- }
@@ -1,135 +0,0 @@
1
- """Load the manually curated metaregistry."""
2
-
3
- import itertools as itt
4
- import json
5
- import os
6
- from collections.abc import Iterable, Mapping
7
- from functools import lru_cache
8
- from pathlib import Path
9
-
10
- import bioregistry
11
-
12
- from ..constants import GLOBAL_SKIP, RAW_DIRECTORY
13
-
14
- HERE = Path(__file__).parent.resolve()
15
- CURATED_REGISTRY_PATH = HERE.joinpath("metaregistry.json")
16
- CURATED_REGISTRY = json.loads(CURATED_REGISTRY_PATH.read_text())
17
-
18
-
19
- def has_no_download(prefix: str) -> bool:
20
- """Return if the prefix is not available."""
21
- prefix_norm = bioregistry.normalize_prefix(prefix)
22
- return prefix_norm is not None and prefix_norm in _no_download()
23
-
24
-
25
- @lru_cache(maxsize=1)
26
- def _no_download() -> set[str]:
27
- """Get the list of prefixes not available as OBO."""
28
- return {
29
- prefix
30
- for prefix in bioregistry.read_registry()
31
- if bioregistry.get_obo_download(prefix) is None
32
- and bioregistry.get_owl_download(prefix) is None
33
- }
34
-
35
-
36
- def curie_has_blacklisted_prefix(curie: str) -> bool:
37
- """Check if the CURIE string has a blacklisted prefix."""
38
- return any(curie.startswith(x) for x in get_xrefs_prefix_blacklist())
39
-
40
-
41
- @lru_cache(maxsize=1)
42
- def get_xrefs_prefix_blacklist() -> set[str]:
43
- """Get the set of blacklisted xref prefixes."""
44
- #: Xrefs starting with these prefixes will be ignored
45
- prefixes = set(
46
- itt.chain.from_iterable(CURATED_REGISTRY["blacklists"]["resource_prefix"].values())
47
- ) | set(CURATED_REGISTRY["blacklists"]["prefix"])
48
- nonsense = {
49
- prefix
50
- for prefix in prefixes
51
- if bioregistry.normalize_prefix(prefix.rstrip(":")) is not None
52
- }
53
- if nonsense:
54
- raise ValueError(
55
- f"The following prefixes were blacklisted but are in the bioregistry: {nonsense}"
56
- )
57
- return prefixes
58
-
59
-
60
- def curie_has_blacklisted_suffix(curie: str) -> bool:
61
- """Check if the CURIE string has a blacklisted suffix."""
62
- return any(curie.endswith(suffix) for suffix in get_xrefs_suffix_blacklist())
63
-
64
-
65
- @lru_cache(maxsize=1)
66
- def get_xrefs_suffix_blacklist() -> set[str]:
67
- """Get the set of blacklisted xref suffixes."""
68
- #: Xrefs ending with these suffixes will be ignored
69
- return set(CURATED_REGISTRY["blacklists"]["suffix"])
70
-
71
-
72
- def curie_is_blacklisted(curie: str) -> bool:
73
- """Check if the full CURIE string is blacklisted."""
74
- return curie in get_xrefs_blacklist()
75
-
76
-
77
- @lru_cache(maxsize=1)
78
- def get_xrefs_blacklist() -> set[str]:
79
- """Get the set of blacklisted xrefs."""
80
- rv = set()
81
- for x in CURATED_REGISTRY["blacklists"]["full"]:
82
- if isinstance(x, str):
83
- rv.add(x)
84
- elif isinstance(x, dict):
85
- if x.get("type") == "group":
86
- rv.update(x["text"])
87
- elif "text" in x:
88
- rv.add(x["text"])
89
- else:
90
- raise ValueError("invalid schema")
91
- else:
92
- raise TypeError
93
- return rv
94
-
95
-
96
- @lru_cache(maxsize=1)
97
- def get_remappings_full() -> Mapping[str, str]:
98
- """Get the remappings for xrefs based on the entire xref database."""
99
- return CURATED_REGISTRY["remappings"]["full"]
100
-
101
-
102
- def remap_full(x: str) -> str:
103
- """Remap the string if possible otherwise return it."""
104
- return get_remappings_full().get(x, x)
105
-
106
-
107
- @lru_cache(maxsize=1)
108
- def get_remappings_prefix() -> Mapping[str, str]:
109
- """Get the remappings for xrefs based on the prefix.
110
-
111
- .. note:: Doesn't take into account the semicolon `:`
112
- """
113
- return CURATED_REGISTRY["remappings"]["prefix"]
114
-
115
-
116
- def remap_prefix(curie: str) -> str:
117
- """Remap a prefix."""
118
- for old_prefix, new_prefix in get_remappings_prefix().items():
119
- if curie.startswith(old_prefix):
120
- return new_prefix + curie[len(old_prefix) :]
121
- return curie
122
-
123
-
124
- def iter_cached_obo() -> Iterable[tuple[str, str]]:
125
- """Iterate over cached OBO paths."""
126
- for prefix in os.listdir(RAW_DIRECTORY):
127
- if prefix in GLOBAL_SKIP or has_no_download(prefix) or bioregistry.is_deprecated(prefix):
128
- continue
129
- d = RAW_DIRECTORY.joinpath(prefix)
130
- if not os.path.isdir(d):
131
- continue
132
- for x in os.listdir(d):
133
- if x.endswith(".obo"):
134
- p = os.path.join(d, x)
135
- yield prefix, p