pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
  203. pyobo/apps/__init__.py +0 -3
  204. pyobo/apps/cli.py +0 -24
  205. pyobo/apps/gilda/__init__.py +0 -3
  206. pyobo/apps/gilda/__main__.py +0 -8
  207. pyobo/apps/gilda/app.py +0 -48
  208. pyobo/apps/gilda/cli.py +0 -36
  209. pyobo/apps/gilda/templates/base.html +0 -33
  210. pyobo/apps/gilda/templates/home.html +0 -11
  211. pyobo/apps/gilda/templates/matches.html +0 -32
  212. pyobo/apps/mapper/__init__.py +0 -3
  213. pyobo/apps/mapper/__main__.py +0 -11
  214. pyobo/apps/mapper/cli.py +0 -37
  215. pyobo/apps/mapper/mapper.py +0 -187
  216. pyobo/apps/mapper/templates/base.html +0 -35
  217. pyobo/apps/mapper/templates/mapper_home.html +0 -64
  218. pyobo/aws.py +0 -162
  219. pyobo/cli/aws.py +0 -47
  220. pyobo/identifier_utils.py +0 -142
  221. pyobo/normalizer.py +0 -232
  222. pyobo/registries/__init__.py +0 -16
  223. pyobo/registries/metaregistry.json +0 -507
  224. pyobo/registries/metaregistry.py +0 -135
  225. pyobo/sources/icd11.py +0 -105
  226. pyobo/xrefdb/__init__.py +0 -1
  227. pyobo/xrefdb/canonicalizer.py +0 -214
  228. pyobo/xrefdb/priority.py +0 -59
  229. pyobo/xrefdb/sources/__init__.py +0 -60
  230. pyobo/xrefdb/sources/biomappings.py +0 -36
  231. pyobo/xrefdb/sources/cbms2019.py +0 -91
  232. pyobo/xrefdb/sources/chembl.py +0 -83
  233. pyobo/xrefdb/sources/compath.py +0 -82
  234. pyobo/xrefdb/sources/famplex.py +0 -64
  235. pyobo/xrefdb/sources/gilda.py +0 -50
  236. pyobo/xrefdb/sources/intact.py +0 -113
  237. pyobo/xrefdb/sources/ncit.py +0 -133
  238. pyobo/xrefdb/sources/pubchem.py +0 -27
  239. pyobo/xrefdb/sources/wikidata.py +0 -116
  240. pyobo-0.11.1.dist-info/RECORD +0 -173
  241. pyobo-0.11.1.dist-info/WHEEL +0 -5
  242. pyobo-0.11.1.dist-info/top_level.txt +0 -1
@@ -1,187 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- """PyOBO's Mapping Service.
4
-
5
- Run with ``python -m pyobo.apps.mapper``.
6
- """
7
-
8
- import logging
9
- from functools import lru_cache
10
- from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
11
-
12
- import bioregistry
13
- import pandas as pd
14
- from flasgger import Swagger
15
- from flask import (
16
- Blueprint,
17
- Flask,
18
- abort,
19
- current_app,
20
- jsonify,
21
- render_template,
22
- url_for,
23
- )
24
- from flask_bootstrap import VERSION_BOOTSTRAP, Bootstrap
25
- from werkzeug.local import LocalProxy
26
-
27
- from pyobo import Canonicalizer
28
- from pyobo.constants import PROVENANCE, SOURCE_PREFIX, TARGET_PREFIX
29
- from pyobo.identifier_utils import normalize_curie
30
- from pyobo.resource_utils import ensure_inspector_javert_df
31
-
32
- __all__ = [
33
- "get_app",
34
- ]
35
-
36
- logger = logging.getLogger(__name__)
37
-
38
- summary_df = LocalProxy(lambda: current_app.config["summary"])
39
- canonicalizer: Canonicalizer = LocalProxy(lambda: current_app.config["canonicalizer"])
40
-
41
-
42
- @lru_cache()
43
- def _single_source_shortest_path(curie: str) -> Optional[Mapping[str, List[Mapping[str, str]]]]:
44
- return canonicalizer.single_source_shortest_path(curie=curie)
45
-
46
-
47
- @lru_cache()
48
- def _all_shortest_paths(source_curie: str, target_curie: str) -> List[List[Mapping[str, str]]]:
49
- return canonicalizer.all_shortest_paths(source_curie=source_curie, target_curie=target_curie)
50
-
51
-
52
- #: The blueprint that gets added to the app
53
- search_blueprint = Blueprint("search", __name__)
54
-
55
-
56
- @search_blueprint.route("/")
57
- def home():
58
- """Show the home page."""
59
- return render_template("mapper_home.html")
60
-
61
-
62
- @search_blueprint.route("/mappings/<curie>")
63
- def single_source_mappings(curie: str):
64
- """Return all length xrefs from the given identifier."""
65
- if curie not in canonicalizer.graph:
66
- return jsonify(
67
- success=False,
68
- query=dict(curie=curie),
69
- message="could not find curie",
70
- )
71
- return jsonify(_single_source_shortest_path(curie))
72
-
73
-
74
- @search_blueprint.route("/mappings/<source_curie>/<target_curie>")
75
- def all_mappings(source_curie: str, target_curie: str):
76
- """Return all shortest paths of xrefs between the two identifiers."""
77
- if source_curie not in canonicalizer.graph:
78
- return jsonify(
79
- success=False,
80
- query=dict(source_curie=source_curie, target_curie=target_curie),
81
- message="could not find source curie",
82
- )
83
- if target_curie not in canonicalizer.graph:
84
- return jsonify(
85
- success=False,
86
- query=dict(source_curie=source_curie, target_curie=target_curie),
87
- message="could not find target curie",
88
- )
89
-
90
- return jsonify(_all_shortest_paths(source_curie, target_curie))
91
-
92
-
93
- @search_blueprint.route("/mappings/summarize")
94
- def summarize():
95
- """Summarize the mappings."""
96
- return summary_df.to_html(index=False)
97
-
98
-
99
- @search_blueprint.route("/mappings/summarize_by/<prefix>")
100
- def summarize_one(prefix: str):
101
- """Summarize the mappings."""
102
- norm_prefix = bioregistry.normalize_prefix(prefix)
103
- if norm_prefix is None:
104
- return abort(500, f"invalid prefix: {prefix}")
105
- in_df = summary_df.loc[summary_df[TARGET_PREFIX] == norm_prefix, [SOURCE_PREFIX, "count"]]
106
- out_df = summary_df.loc[summary_df[SOURCE_PREFIX] == norm_prefix, [TARGET_PREFIX, "count"]]
107
- return f"""
108
- <h1>Incoming Mappings to {norm_prefix}</h1>
109
- {in_df.to_html(index=False)}
110
- <h1>Outgoing Mappings from {norm_prefix}</h1>
111
- {out_df.to_html(index=False)}
112
- """
113
-
114
-
115
- @search_blueprint.route("/canonicalize/<curie>")
116
- def canonicalize(curie: str):
117
- """Return the best CURIE."""
118
- # TODO maybe normalize the curie first?
119
- norm_prefix, norm_identifier = normalize_curie(curie)
120
- if norm_prefix is None or norm_identifier is None:
121
- return jsonify(
122
- query=curie,
123
- normalizable=False,
124
- )
125
-
126
- norm_curie = f"{norm_prefix}:{norm_identifier}"
127
-
128
- rv: Dict[str, Any] = dict(query=curie)
129
- if norm_curie != curie:
130
- rv["norm_curie"] = norm_curie
131
-
132
- if norm_curie not in canonicalizer.graph:
133
- rv["found"] = False
134
- else:
135
- result_curie = canonicalizer.canonicalize(norm_curie)
136
- rv.update(
137
- found=True,
138
- result=result_curie,
139
- mappings=url_for(
140
- f".{all_mappings.__name__}",
141
- source_curie=norm_curie,
142
- target_curie=result_curie,
143
- ),
144
- )
145
-
146
- return jsonify(rv)
147
-
148
-
149
- def get_app(paths: Union[None, str, Iterable[str]] = None) -> Flask:
150
- """Build the Flask app."""
151
- app = Flask(__name__)
152
- Swagger(app)
153
-
154
- logger.info("using bootstrap_flask %s", VERSION_BOOTSTRAP)
155
- Bootstrap(app)
156
-
157
- if paths is None:
158
- df = ensure_inspector_javert_df()
159
- elif isinstance(paths, str):
160
- df = pd.read_csv(paths, sep="\t", dtype=str)
161
- else:
162
- df = pd.concat(pd.read_csv(path, sep="\t", dtype=str) for path in paths)
163
-
164
- app.config["summary"] = summarize_xref_df(df)
165
- app.config["summary_provenances"] = summarize_xref_provenances_df(df)
166
- # TODO allow for specification of priorities in the canonicalizer
167
- app.config["canonicalizer"] = Canonicalizer.from_df(df)
168
- app.register_blueprint(search_blueprint)
169
- return app
170
-
171
-
172
- def summarize_xref_df(df: pd.DataFrame) -> pd.DataFrame:
173
- """Get all meta-mappings."""
174
- return _summarize(df, [SOURCE_PREFIX, TARGET_PREFIX])
175
-
176
-
177
- def summarize_xref_provenances_df(df: pd.DataFrame) -> pd.DataFrame:
178
- """Get all meta-mappings."""
179
- return _summarize(df, [SOURCE_PREFIX, TARGET_PREFIX, PROVENANCE])
180
-
181
-
182
- def _summarize(df: pd.DataFrame, columns) -> pd.DataFrame:
183
- """Get all meta-mappings."""
184
- rv = df[columns].groupby(columns).size().reset_index()
185
- rv.columns = [*columns, "count"]
186
- rv.sort_values("count", inplace=True, ascending=False)
187
- return rv
@@ -1,35 +0,0 @@
1
- <!doctype html>
2
- <html lang="en">
3
- <head>
4
- {% block head %}
5
- <!-- Required meta tags -->
6
- <meta charset="utf-8">
7
- <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
8
-
9
- {% block styles %}
10
- <!-- Bootstrap CSS -->
11
- {{ bootstrap.load_css() }}
12
- {% endblock %}
13
-
14
- <script src="https://kit.fontawesome.com/4c86883252.js" crossorigin="anonymous"></script>
15
-
16
- <title>{% block title %}{% endblock %}</title>
17
- {% endblock %}
18
- </head>
19
- <body>
20
- <div class="container" style="margin-top: 25px; margin-bottom: 25px">
21
- <div class="row justify-content-center">
22
- <div class="col-md-8 col-lg-8">
23
- <div class="card">
24
- <!-- Your page content -->
25
- {% block content %}{% endblock %}
26
- </div>
27
- </div>
28
- </div>
29
- </div>
30
- {% block scripts %}
31
- <!-- Optional JavaScript -->
32
- {{ bootstrap.load_js() }}
33
- {% endblock %}
34
- </body>
35
- </html>
@@ -1,64 +0,0 @@
1
- {% extends "base.html" %}
2
-
3
- {% block content %}
4
- <h5 class="card-header">Inspector Javert's Mapper</h5>
5
- <div class="card-body">
6
- <p class="card-text">
7
- This service resolves <a href="https://en.wikipedia.org/wiki/CURIE">CURIEs</a>
8
- to the best CURIE that's mapped to it.
9
- </p>
10
- <p>
11
- A summary of all of the xrefs can be found <a href="{{ url_for('.summarize') }}">here</a>.
12
- You can also look at a summary for a specific prefix like <a
13
- href="{{ url_for('.summarize_one', prefix='umls') }}"><code>UMLS</code></a>.
14
- </p>
15
- <ul>
16
- <li>
17
- Use the <code>/mappings</code> endpoint to look up equivalent entities,
18
- for example, <a
19
- href="{{ url_for('.single_source_mappings', curie='hgnc:6893') }}"><code>hgnc:6893</code></a>.
20
- </li>
21
- <li>
22
- Use the <code>/mappings</code> endpoint to look up all mappings between two entities,
23
- for example, <a
24
- href="{{ url_for('.all_mappings', source_curie='hgnc:6893', target_curie='ensembl:ENSG00000186868') }}"><code>hgnc:6893</code>
25
- and <code>ensembl:ENSG00000186868</code></a>.
26
- </li>
27
- </ul>
28
- </div>
29
- <table class="table">
30
- <thead>
31
- <tr>
32
- <th scope="col">CURIE</th>
33
- <th scope="col">Description</th>
34
- </tr>
35
- </thead>
36
- <tr>
37
- <td><a href="{{ url_for('.canonicalize', curie='hgnc:6893') }}">hgnc:6893</a></td>
38
- <td>✅ maps correct identifier to higher priority namespace (ncbigene)</td>
39
- </tr>
40
- <tr>
41
- <td><a href="{{ url_for('.canonicalize', curie='ncbigene:4137') }}">ncbigene:4137</a></td>
42
- <td>✅ already priority namespace</td>
43
- </tr>
44
- <tr>
45
- <td><a href="{{ url_for('.canonicalize', curie='DOID:00000') }}">DOID:00000</a></td>
46
- <td>❌ invalid identifier</td>
47
- </tr>
48
- <tr>
49
- <td><a href="{{ url_for('.canonicalize', curie='NNN:00000') }}">NNN:00000</a></td>
50
- <td>❌ invalid prefix</td>
51
- </tr>
52
- <tr>
53
- <td><a href="{{ url_for('.canonicalize', curie='wikidata:Q42') }}">wikidata:Q42</a></td>
54
- <td>❌ unmapped prefix</td>
55
- </tr>
56
- </table>
57
- <div class="card-footer text-center">
58
- Developed with ❤️ in 🇩🇪 by <a href="https://github.com/cthoyt">@cthoyt</a>.
59
-
60
- (<span class="text-muted"><a href="https://github.com/pyobo/pyobo">source code</a></span> |
61
- <span class="text-muted"><a
62
- href="https://cthoyt.com/2020/04/19/inspector-javerts-xref-database.html">blog post</a></span>)
63
- </div>
64
- {% endblock %}
pyobo/aws.py DELETED
@@ -1,162 +0,0 @@
1
- """Interface for caching data on AWS S3."""
2
-
3
- import logging
4
- import os
5
- from typing import Optional
6
-
7
- import boto3
8
- import humanize
9
- from tabulate import tabulate
10
-
11
- from pyobo import (
12
- get_id_name_mapping,
13
- get_id_synonyms_mapping,
14
- get_id_to_alts,
15
- get_properties_df,
16
- get_relations_df,
17
- get_xrefs_df,
18
- )
19
- from pyobo.api.utils import get_version
20
- from pyobo.constants import RAW_DIRECTORY
21
- from pyobo.registries import iter_cached_obo
22
- from pyobo.utils.path import prefix_cache_join
23
-
24
- __all__ = [
25
- "download_artifacts",
26
- "upload_artifacts",
27
- "upload_artifacts_for_prefix",
28
- "list_artifacts",
29
- ]
30
-
31
- logger = logging.getLogger(__name__)
32
-
33
-
34
- def download_artifacts(bucket: str, suffix: Optional[str] = None) -> None:
35
- """Download compiled parts from AWS.
36
-
37
- :param bucket: The name of the S3 bucket to download
38
- :param suffix: If specified, only download files with this suffix. Might
39
- be useful to specify ``suffix='names.tsv`` if you just want to run the
40
- name resolution service.
41
- """
42
- s3_client = boto3.client("s3")
43
- all_objects = s3_client.list_objects(Bucket=bucket)
44
- for entry in all_objects["Contents"]:
45
- key = entry["Key"]
46
- if suffix and not key.endswith(suffix):
47
- pass
48
- path = os.path.join(RAW_DIRECTORY, key)
49
- os.makedirs(os.path.dirname(path), exist_ok=True)
50
- if os.path.exists(path):
51
- continue # no need to download again
52
- logging.warning("downloading %s to %s", key, path)
53
- s3_client.download_file(bucket, key, path)
54
-
55
-
56
- def upload_artifacts(
57
- bucket: str,
58
- whitelist: Optional[set[str]] = None,
59
- blacklist: Optional[set[str]] = None,
60
- s3_client=None,
61
- ) -> None:
62
- """Upload all artifacts to AWS."""
63
- if s3_client is None:
64
- s3_client = boto3.client("s3")
65
- all_objects = s3_client.list_objects(Bucket=bucket)
66
- uploaded_prefixes = {entry["Key"].split("/")[0] for entry in all_objects["Contents"]}
67
-
68
- for prefix, _ in sorted(iter_cached_obo()):
69
- if prefix in uploaded_prefixes:
70
- continue
71
- if whitelist and prefix not in whitelist:
72
- continue
73
- if blacklist and prefix in blacklist:
74
- continue
75
- upload_artifacts_for_prefix(prefix=prefix, bucket=bucket, s3_client=s3_client)
76
-
77
-
78
- def upload_artifacts_for_prefix(
79
- *, prefix: str, bucket: str, s3_client=None, version: Optional[str] = None
80
- ):
81
- """Upload compiled parts for the given prefix to AWS."""
82
- if s3_client is None:
83
- s3_client = boto3.client("s3")
84
-
85
- if version is None:
86
- version = get_version(prefix)
87
-
88
- logger.info("[%s] getting id->name mapping", prefix)
89
- get_id_name_mapping(prefix)
90
- id_name_path = prefix_cache_join(prefix, name="names.tsv", version=version)
91
- if not id_name_path.exists():
92
- raise FileNotFoundError
93
- id_name_key = os.path.join(prefix, "cache", "names.tsv")
94
- logger.info("[%s] uploading id->name mapping", prefix)
95
- upload_file(path=id_name_path, bucket=bucket, key=id_name_key, s3_client=s3_client)
96
-
97
- logger.info("[%s] getting id->synonyms mapping", prefix)
98
- get_id_synonyms_mapping(prefix)
99
- id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=version)
100
- if not id_synonyms_path.exists():
101
- raise FileNotFoundError
102
- id_synonyms_key = os.path.join(prefix, "cache", "synonyms.tsv")
103
- logger.info("[%s] uploading id->synonyms mapping", prefix)
104
- upload_file(path=id_synonyms_path, bucket=bucket, key=id_synonyms_key, s3_client=s3_client)
105
-
106
- logger.info("[%s] getting xrefs", prefix)
107
- get_xrefs_df(prefix)
108
- xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=version)
109
- if not xrefs_path.exists():
110
- raise FileNotFoundError
111
- xrefs_key = os.path.join(prefix, "cache", "xrefs.tsv")
112
- logger.info("[%s] uploading xrefs", prefix)
113
- upload_file(path=xrefs_path, bucket=bucket, key=xrefs_key, s3_client=s3_client)
114
-
115
- logger.info("[%s] getting relations", prefix)
116
- get_relations_df(prefix)
117
- relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
118
- if not relations_path.exists():
119
- raise FileNotFoundError
120
- relations_key = os.path.join(prefix, "cache", "relations.tsv")
121
- logger.info("[%s] uploading relations", prefix)
122
- upload_file(path=relations_path, bucket=bucket, key=relations_key, s3_client=s3_client)
123
-
124
- logger.info("[%s] getting properties", prefix)
125
- get_properties_df(prefix)
126
- properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
127
- if not properties_path.exists():
128
- raise FileNotFoundError
129
- properties_key = os.path.join(prefix, "cache", "properties.tsv")
130
- logger.info("[%s] uploading properties", prefix)
131
- upload_file(path=properties_path, bucket=bucket, key=properties_key, s3_client=s3_client)
132
-
133
- logger.info("[%s] getting alternative identifiers", prefix)
134
- get_id_to_alts(prefix)
135
- alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version)
136
- if not alts_path.exists():
137
- raise FileNotFoundError
138
- alts_key = os.path.join(prefix, "cache", "alt_ids.tsv")
139
- logger.info("[%s] uploading alternative identifiers", prefix)
140
- upload_file(path=alts_path, bucket=bucket, key=alts_key)
141
-
142
-
143
- def upload_file(*, path, bucket, key, s3_client=None):
144
- """Upload a file to an S3 bucket.
145
-
146
- :param path: The local file path
147
- :param bucket: The name of the S3 bucket
148
- :param key: The relative file path to put on the S3 bucket
149
- """
150
- if s3_client is None:
151
- s3_client = boto3.client("s3")
152
- s3_client.upload_file(path, bucket, key)
153
-
154
-
155
- def list_artifacts(bucket: str) -> str:
156
- """List the files in a given bucket."""
157
- s3_client = boto3.client("s3")
158
- all_objects = s3_client.list_objects(Bucket=bucket)
159
- rows = [
160
- (entry["Key"], humanize.naturalsize(entry["Size"])) for entry in all_objects["Contents"]
161
- ]
162
- return tabulate(rows, headers=["File", "Size"])
pyobo/cli/aws.py DELETED
@@ -1,47 +0,0 @@
1
- """CLI for PyOBO's interface to S3."""
2
-
3
- import click
4
- from more_click import verbose_option
5
-
6
- from ..aws import download_artifacts, list_artifacts, upload_artifacts
7
-
8
- __all__ = [
9
- "main",
10
- ]
11
-
12
- bucket_argument = click.argument("bucket")
13
-
14
-
15
- @click.group(name="aws")
16
- def main():
17
- """S3 utilities."""
18
-
19
-
20
- @main.command()
21
- @bucket_argument
22
- @verbose_option
23
- def download(bucket):
24
- """Download all artifacts from the S3 bucket."""
25
- download_artifacts(bucket)
26
-
27
-
28
- @main.command()
29
- @bucket_argument
30
- @verbose_option
31
- @click.option("-w", "--whitelist", multiple=True)
32
- @click.option("-b", "--blacklist", multiple=True)
33
- def upload(bucket, whitelist, blacklist):
34
- """Download all artifacts from the S3 bucket."""
35
- upload_artifacts(bucket, whitelist=whitelist, blacklist=blacklist)
36
-
37
-
38
- @main.command()
39
- @bucket_argument
40
- @verbose_option
41
- def ls(bucket):
42
- """List all artifacts on the S3 bucket."""
43
- click.echo(list_artifacts(bucket))
44
-
45
-
46
- if __name__ == "__main__":
47
- main()
pyobo/identifier_utils.py DELETED
@@ -1,142 +0,0 @@
1
- """Utilities for handling prefixes."""
2
-
3
- from __future__ import annotations
4
-
5
- import logging
6
- from functools import wraps
7
-
8
- import bioregistry
9
- from curies import Reference, ReferenceTuple
10
-
11
- from .registries import (
12
- curie_has_blacklisted_prefix,
13
- curie_has_blacklisted_suffix,
14
- curie_is_blacklisted,
15
- remap_full,
16
- remap_prefix,
17
- )
18
-
19
- __all__ = [
20
- "normalize_curie",
21
- "wrap_norm_prefix",
22
- "standardize_ec",
23
- ]
24
-
25
- logger = logging.getLogger(__name__)
26
-
27
-
28
- class MissingPrefixError(ValueError):
29
- """Raised on a missing prefix."""
30
-
31
- reference: Reference | None
32
-
33
- def __init__(
34
- self, prefix: str, curie: str, xref: str | None = None, ontology: str | None = None
35
- ):
36
- """Initialize the error."""
37
- self.prefix = prefix
38
- self.curie = curie
39
- self.xref = xref
40
- self.ontology = ontology
41
- self.reference = None
42
-
43
- def __str__(self) -> str:
44
- s = ""
45
- if self.ontology:
46
- s += f"[{self.ontology}] "
47
- s += f"unhandled prefix {self.prefix} found in curie {self.curie}"
48
- if self.xref:
49
- s += f"/xref {self.xref}"
50
- if self.reference is not None:
51
- s += f" from {self.reference.curie}"
52
- return s
53
-
54
-
55
- def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True) -> str | None:
56
- """Normalize a namespace and return, if possible."""
57
- norm_prefix = bioregistry.normalize_prefix(prefix)
58
- if norm_prefix is not None:
59
- return norm_prefix
60
- elif strict:
61
- raise MissingPrefixError(prefix=prefix, curie=curie, xref=xref)
62
- else:
63
- return None
64
-
65
-
66
- BAD_CURIES = set()
67
-
68
-
69
- def normalize_curie(curie: str, *, strict: bool = True) -> tuple[str, str] | tuple[None, None]:
70
- """Parse a string that looks like a CURIE.
71
-
72
- :param curie: A compact uniform resource identifier (CURIE)
73
- :param strict: Should an exception be thrown if the CURIE can not be parsed w.r.t. the Bioregistry?
74
- :return: A parse tuple or a tuple of None, None if not able to parse and not strict
75
-
76
- - Normalizes the namespace
77
- - Checks against a blacklist for the entire curie, for the namespace, and for suffixes.
78
- """
79
- if curie_is_blacklisted(curie):
80
- return None, None
81
- if curie_has_blacklisted_prefix(curie):
82
- return None, None
83
- if curie_has_blacklisted_suffix(curie):
84
- return None, None
85
-
86
- # Remap the curie with the full list
87
- curie = remap_full(curie)
88
-
89
- # Remap node's prefix (if necessary)
90
- curie = remap_prefix(curie)
91
-
92
- try:
93
- head_ns, identifier = curie.split(":", 1)
94
- except ValueError: # skip nodes that don't look like normal CURIEs
95
- if curie not in BAD_CURIES:
96
- BAD_CURIES.add(curie)
97
- logger.debug(f"could not split CURIE on colon: {curie}")
98
- return None, None
99
-
100
- # remove redundant prefix
101
- if identifier.casefold().startswith(f"{head_ns.casefold()}:"):
102
- identifier = identifier[len(head_ns) + 1 :]
103
-
104
- norm_node_prefix = _normalize_prefix(head_ns, curie=curie, strict=strict)
105
- if not norm_node_prefix:
106
- return None, None
107
- return norm_node_prefix, identifier
108
-
109
-
110
- def wrap_norm_prefix(f):
111
- """Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
112
-
113
- @wraps(f)
114
- def _wrapped(prefix: str | Reference | ReferenceTuple, *args, **kwargs):
115
- if isinstance(prefix, str):
116
- norm_prefix = bioregistry.normalize_prefix(prefix)
117
- if norm_prefix is None:
118
- raise ValueError(f"Invalid prefix: {prefix}")
119
- prefix = norm_prefix
120
- elif isinstance(prefix, Reference):
121
- norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
122
- if norm_prefix is None:
123
- raise ValueError(f"Invalid prefix: {prefix.prefix}")
124
- prefix = Reference(prefix=norm_prefix, identifier=prefix.identifier)
125
- elif isinstance(prefix, ReferenceTuple):
126
- norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
127
- if norm_prefix is None:
128
- raise ValueError(f"Invalid prefix: {prefix.prefix}")
129
- prefix = ReferenceTuple(norm_prefix, prefix.identifier)
130
- else:
131
- raise TypeError
132
- return f(prefix, *args, **kwargs)
133
-
134
- return _wrapped
135
-
136
-
137
- def standardize_ec(ec: str) -> str:
138
- """Standardize an EC code identifier by removing all trailing dashes and dots."""
139
- ec = ec.strip().replace(" ", "")
140
- for _ in range(4):
141
- ec = ec.rstrip("-").rstrip(".")
142
- return ec