pyobo 0.12.0__py3-none-any.whl → 0.12.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/api/properties.py +8 -12
- pyobo/api/xrefs.py +1 -2
- pyobo/cli/database.py +30 -2
- pyobo/cli/database_utils.py +5 -11
- pyobo/getters.py +20 -79
- pyobo/gilda_utils.py +3 -80
- pyobo/identifier_utils/__init__.py +3 -10
- pyobo/identifier_utils/api.py +21 -12
- pyobo/obographs.py +11 -2
- pyobo/reader.py +13 -17
- pyobo/sources/cgnc.py +9 -1
- pyobo/sources/credit.py +17 -6
- pyobo/sources/flybase.py +5 -5
- pyobo/sources/omim_ps.py +4 -4
- pyobo/sources/pharmgkb/pharmgkb_gene.py +1 -1
- pyobo/struct/functional/ontology.py +3 -1
- pyobo/struct/reference.py +4 -4
- pyobo/struct/struct.py +112 -55
- pyobo/utils/cache.py +3 -4
- pyobo/utils/io.py +38 -14
- pyobo/utils/path.py +16 -19
- pyobo/version.py +1 -1
- {pyobo-0.12.0.dist-info → pyobo-0.12.2.dist-info}/METADATA +67 -118
- {pyobo-0.12.0.dist-info → pyobo-0.12.2.dist-info}/RECORD +164 -169
- {pyobo-0.12.0.dist-info → pyobo-0.12.2.dist-info}/WHEEL +1 -1
- pyobo/identifier_utils/model.py +0 -130
- pyobo/identifier_utils/preprocessing.json +0 -812
- pyobo/identifier_utils/preprocessing.py +0 -61
- pyobo/resources/goc.py +0 -75
- pyobo/resources/goc.tsv +0 -188
- {pyobo-0.12.0.dist-info → pyobo-0.12.2.dist-info}/entry_points.txt +0 -0
- {pyobo-0.12.0.dist-info → pyobo-0.12.2.dist-info}/licenses/LICENSE +0 -0
pyobo/utils/io.py
CHANGED
|
@@ -1,26 +1,28 @@
|
|
|
1
1
|
"""I/O utilities."""
|
|
2
2
|
|
|
3
3
|
import collections.abc
|
|
4
|
+
import contextlib
|
|
4
5
|
import csv
|
|
5
6
|
import gzip
|
|
6
7
|
import logging
|
|
7
8
|
from collections import defaultdict
|
|
8
|
-
from collections.abc import Iterable, Mapping
|
|
9
|
+
from collections.abc import Generator, Iterable, Mapping
|
|
9
10
|
from contextlib import contextmanager
|
|
10
11
|
from pathlib import Path
|
|
11
|
-
from typing import TypeVar
|
|
12
|
+
from typing import Literal, TextIO, TypeVar
|
|
12
13
|
|
|
13
14
|
import pandas as pd
|
|
14
15
|
from tqdm.auto import tqdm
|
|
15
16
|
|
|
16
17
|
__all__ = [
|
|
17
18
|
"get_reader",
|
|
18
|
-
"get_writer",
|
|
19
19
|
"multidict",
|
|
20
20
|
"multisetdict",
|
|
21
21
|
"open_map_tsv",
|
|
22
22
|
"open_multimap_tsv",
|
|
23
23
|
"open_reader",
|
|
24
|
+
"safe_open",
|
|
25
|
+
"safe_open_writer",
|
|
24
26
|
"write_iterable_tsv",
|
|
25
27
|
"write_map_tsv",
|
|
26
28
|
"write_multimap_tsv",
|
|
@@ -36,7 +38,7 @@ Y = TypeVar("Y")
|
|
|
36
38
|
def open_reader(path: str | Path, sep: str = "\t"):
|
|
37
39
|
"""Open a file and get a reader for it."""
|
|
38
40
|
path = Path(path)
|
|
39
|
-
with
|
|
41
|
+
with safe_open(path, read=True) as file:
|
|
40
42
|
yield get_reader(file, sep=sep)
|
|
41
43
|
|
|
42
44
|
|
|
@@ -45,16 +47,11 @@ def get_reader(x, sep: str = "\t"):
|
|
|
45
47
|
return csv.reader(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
|
|
46
48
|
|
|
47
49
|
|
|
48
|
-
def get_writer(x, sep: str = "\t"):
|
|
49
|
-
"""Get a :func:`csv.writer` with PyOBO default settings."""
|
|
50
|
-
return csv.writer(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
|
|
51
|
-
|
|
52
|
-
|
|
53
50
|
def open_map_tsv(
|
|
54
51
|
path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
|
|
55
52
|
) -> Mapping[str, str]:
|
|
56
53
|
"""Load a mapping TSV file into a dictionary."""
|
|
57
|
-
with
|
|
54
|
+
with safe_open(path, read=True) as file:
|
|
58
55
|
if has_header:
|
|
59
56
|
next(file) # throw away header
|
|
60
57
|
if use_tqdm:
|
|
@@ -84,9 +81,12 @@ def _help_multimap_tsv(
|
|
|
84
81
|
use_tqdm: bool = False,
|
|
85
82
|
has_header: bool = True,
|
|
86
83
|
) -> Iterable[tuple[str, str]]:
|
|
87
|
-
with
|
|
84
|
+
with safe_open(path, read=True) as file:
|
|
88
85
|
if has_header:
|
|
89
|
-
|
|
86
|
+
try:
|
|
87
|
+
next(file) # throw away header
|
|
88
|
+
except gzip.BadGzipFile as e:
|
|
89
|
+
raise ValueError(f"could not open file {path}") from e
|
|
90
90
|
if use_tqdm:
|
|
91
91
|
file = tqdm(file, desc=f"loading TSV from {path}")
|
|
92
92
|
yield from get_reader(file)
|
|
@@ -145,8 +145,32 @@ def write_iterable_tsv(
|
|
|
145
145
|
"""Write a mapping dictionary to a TSV file."""
|
|
146
146
|
it = (row for row in it if all(cell is not None for cell in row))
|
|
147
147
|
it = sorted(it)
|
|
148
|
-
with
|
|
149
|
-
writer = get_writer(file, sep=sep)
|
|
148
|
+
with safe_open_writer(path, delimiter=sep) as writer:
|
|
150
149
|
if header is not None:
|
|
151
150
|
writer.writerow(header)
|
|
152
151
|
writer.writerows(it)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@contextlib.contextmanager
|
|
155
|
+
def safe_open(
|
|
156
|
+
path: str | Path, read: bool, encoding: str | None = None
|
|
157
|
+
) -> Generator[TextIO, None, None]:
|
|
158
|
+
"""Safely open a file for reading or writing text."""
|
|
159
|
+
path = Path(path).expanduser().resolve()
|
|
160
|
+
mode: Literal["rt", "wt"] = "rt" if read else "wt"
|
|
161
|
+
if path.suffix.endswith(".gz"):
|
|
162
|
+
with gzip.open(path, mode=mode, encoding=encoding) as file:
|
|
163
|
+
yield file
|
|
164
|
+
else:
|
|
165
|
+
with open(path, mode=mode) as file:
|
|
166
|
+
yield file
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@contextlib.contextmanager
|
|
170
|
+
def safe_open_writer(f: str | Path | TextIO, *, delimiter: str = "\t"): # type:ignore
|
|
171
|
+
"""Open a CSV writer, wrapping :func:`csv.writer`."""
|
|
172
|
+
if isinstance(f, str | Path):
|
|
173
|
+
with safe_open(f, read=False) as file:
|
|
174
|
+
yield csv.writer(file, delimiter=delimiter)
|
|
175
|
+
else:
|
|
176
|
+
yield csv.writer(f, delimiter=delimiter)
|
pyobo/utils/path.py
CHANGED
|
@@ -99,25 +99,22 @@ def ensure_df(
|
|
|
99
99
|
class CacheArtifact(enum.Enum):
|
|
100
100
|
"""An enumeration for."""
|
|
101
101
|
|
|
102
|
-
names = "names.tsv"
|
|
103
|
-
definitions = "definitions.tsv"
|
|
104
|
-
species = "species.tsv"
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
nodes = "nodes.tsv"
|
|
120
|
-
edges = "edges.tsv"
|
|
102
|
+
names = "names.tsv.gz"
|
|
103
|
+
definitions = "definitions.tsv.gz"
|
|
104
|
+
species = "species.tsv.gz"
|
|
105
|
+
mappings = "mappings.tsv.gz"
|
|
106
|
+
relations = "relations.tsv.gz"
|
|
107
|
+
alts = "alt_ids.tsv.gz"
|
|
108
|
+
typedefs = "typedefs.tsv.gz"
|
|
109
|
+
literal_mappings = "literal_mappings.tsv.gz"
|
|
110
|
+
references = "references.tsv.gz"
|
|
111
|
+
obsoletes = "obsolete.tsv.gz"
|
|
112
|
+
|
|
113
|
+
literal_properties = "literal_properties.tsv.gz"
|
|
114
|
+
object_properties = "object_properties.tsv.gz"
|
|
115
|
+
|
|
116
|
+
nodes = "nodes.tsv.gz"
|
|
117
|
+
edges = "edges.tsv.gz"
|
|
121
118
|
|
|
122
119
|
prefixes = "prefixes.json"
|
|
123
120
|
metadata = "metadata.json"
|
pyobo/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyobo
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.2
|
|
4
4
|
Summary: A python package for handling and generating OBO
|
|
5
5
|
Keywords: snekpack,cookiecutter,ontologies,biomedical ontologies,life sciences,natural sciences,bioinformatics,cheminformatics,Open Biomedical Ontologies,OBO
|
|
6
6
|
Author: Charles Tapley Hoyt
|
|
@@ -37,15 +37,16 @@ Requires-Dist: humanize
|
|
|
37
37
|
Requires-Dist: tabulate
|
|
38
38
|
Requires-Dist: cachier
|
|
39
39
|
Requires-Dist: pystow>=0.7.0
|
|
40
|
-
Requires-Dist: bioversions>=0.
|
|
40
|
+
Requires-Dist: bioversions>=0.8.0
|
|
41
41
|
Requires-Dist: bioregistry>=0.12.7
|
|
42
|
-
Requires-Dist: bioontologies>=0.7.
|
|
42
|
+
Requires-Dist: bioontologies>=0.7.2
|
|
43
43
|
Requires-Dist: ssslm>=0.0.13
|
|
44
44
|
Requires-Dist: zenodo-client>=0.3.6
|
|
45
45
|
Requires-Dist: class-resolver>=0.6.0
|
|
46
46
|
Requires-Dist: psycopg2-binary
|
|
47
47
|
Requires-Dist: pydantic>=2.0
|
|
48
|
-
Requires-Dist: curies>=0.10.
|
|
48
|
+
Requires-Dist: curies>=0.10.13
|
|
49
|
+
Requires-Dist: curies-processing>=0.1.0
|
|
49
50
|
Requires-Dist: python-dateutil
|
|
50
51
|
Requires-Dist: networkx>=3.4
|
|
51
52
|
Requires-Dist: drugbank-downloader
|
|
@@ -55,14 +56,8 @@ Requires-Dist: clinicaltrials-downloader>=0.0.2
|
|
|
55
56
|
Requires-Dist: nih-reporter-downloader>=0.0.1
|
|
56
57
|
Requires-Dist: typing-extensions
|
|
57
58
|
Requires-Dist: rdflib
|
|
58
|
-
Requires-Dist: sphinx>=8 ; extra == 'docs'
|
|
59
|
-
Requires-Dist: sphinx-rtd-theme>=3.0 ; extra == 'docs'
|
|
60
|
-
Requires-Dist: sphinx-click ; extra == 'docs'
|
|
61
|
-
Requires-Dist: sphinx-automodapi ; extra == 'docs'
|
|
62
59
|
Requires-Dist: ssslm[gilda] ; extra == 'gilda'
|
|
63
60
|
Requires-Dist: ssslm[gilda-slim] ; extra == 'gilda-slim'
|
|
64
|
-
Requires-Dist: pytest ; extra == 'tests'
|
|
65
|
-
Requires-Dist: coverage[toml] ; extra == 'tests'
|
|
66
61
|
Maintainer: Charles Tapley Hoyt
|
|
67
62
|
Maintainer-email: Charles Tapley Hoyt <cthoyt@gmail.com>
|
|
68
63
|
Requires-Python: >=3.10
|
|
@@ -71,10 +66,8 @@ Project-URL: Documentation, https://pyobo.readthedocs.io
|
|
|
71
66
|
Project-URL: Funding, https://github.com/sponsors/cthoyt
|
|
72
67
|
Project-URL: Homepage, https://github.com/biopragmatics/pyobo
|
|
73
68
|
Project-URL: Repository, https://github.com/biopragmatics/pyobo.git
|
|
74
|
-
Provides-Extra: docs
|
|
75
69
|
Provides-Extra: gilda
|
|
76
70
|
Provides-Extra: gilda-slim
|
|
77
|
-
Provides-Extra: tests
|
|
78
71
|
Description-Content-Type: text/markdown
|
|
79
72
|
|
|
80
73
|
<!--
|
|
@@ -120,14 +113,7 @@ identifiers. It also means all identifiers are strings, no exceptions.
|
|
|
120
113
|
|
|
121
114
|
Note! The first time you run these, they have to download and cache all
|
|
122
115
|
resources. We're not in the business of redistributing data, so all scripts
|
|
123
|
-
should be completely reproducible.
|
|
124
|
-
hosting/downloading pre-compiled versions in `pyobo.aws` if you don't have time
|
|
125
|
-
for that.
|
|
126
|
-
|
|
127
|
-
Note! PyOBO can perform grounding in a limited number of cases, but it is _not_
|
|
128
|
-
a general solution for named entity recognition (NER) or grounding. It's
|
|
129
|
-
suggested to check `Gilda <https://github.com/indralab/gilda>`\_ for a
|
|
130
|
-
no-nonsense solution.
|
|
116
|
+
should be completely reproducible.
|
|
131
117
|
|
|
132
118
|
### Mapping Identifiers and CURIEs
|
|
133
119
|
|
|
@@ -136,19 +122,11 @@ Get mapping of ChEBI identifiers to names:
|
|
|
136
122
|
```python
|
|
137
123
|
import pyobo
|
|
138
124
|
|
|
139
|
-
chebi_id_to_name = pyobo.get_id_name_mapping(
|
|
140
|
-
|
|
141
|
-
name = chebi_id_to_name['132964']
|
|
142
|
-
assert name == 'fluazifop-P-butyl'
|
|
143
|
-
```
|
|
144
|
-
|
|
145
|
-
Or, you don't have time for two lines:
|
|
146
|
-
|
|
147
|
-
```python
|
|
148
|
-
import pyobo
|
|
125
|
+
chebi_id_to_name = pyobo.get_id_name_mapping("chebi")
|
|
126
|
+
assert "fluazifop-P-butyl" == chebi_id_to_name["132964"]
|
|
149
127
|
|
|
150
|
-
|
|
151
|
-
assert
|
|
128
|
+
# or more directly
|
|
129
|
+
assert "fluazifop-P-butyl" == pyobo.get_name("chebi", "132964")
|
|
152
130
|
```
|
|
153
131
|
|
|
154
132
|
Get reverse mapping of ChEBI names to identifiers:
|
|
@@ -156,10 +134,8 @@ Get reverse mapping of ChEBI names to identifiers:
|
|
|
156
134
|
```python
|
|
157
135
|
import pyobo
|
|
158
136
|
|
|
159
|
-
chebi_name_to_id = pyobo.get_name_id_mapping(
|
|
160
|
-
|
|
161
|
-
identifier = chebi_name_to_id['fluazifop-P-butyl']
|
|
162
|
-
assert identifier == '132964'
|
|
137
|
+
chebi_name_to_id = pyobo.get_name_id_mapping("chebi")
|
|
138
|
+
assert "132964" == chebi_name_to_id["fluazifop-P-butyl"]
|
|
163
139
|
```
|
|
164
140
|
|
|
165
141
|
Maybe you live in CURIE world and just want to normalize something like
|
|
@@ -168,8 +144,7 @@ Maybe you live in CURIE world and just want to normalize something like
|
|
|
168
144
|
```python
|
|
169
145
|
import pyobo
|
|
170
146
|
|
|
171
|
-
|
|
172
|
-
assert name == 'fluazifop-P-butyl'
|
|
147
|
+
assert "fluazifop-P-butyl" == pyobo.get_name_by_curie("CHEBI:132964")
|
|
173
148
|
```
|
|
174
149
|
|
|
175
150
|
Sometimes you accidentally got an old CURIE. It can be mapped to the more recent
|
|
@@ -181,11 +156,11 @@ from pyobo import Reference
|
|
|
181
156
|
|
|
182
157
|
# Look up DNA-binding transcription factor activity (go:0003700)
|
|
183
158
|
# based on an old id
|
|
184
|
-
primary_curie = pyobo.get_primary_curie(
|
|
185
|
-
assert primary_curie ==
|
|
159
|
+
primary_curie = pyobo.get_primary_curie("go:0001071")
|
|
160
|
+
assert primary_curie == "go:0003700"
|
|
186
161
|
|
|
187
162
|
# If it's already the primary, it just gets returned
|
|
188
|
-
assert Reference.from_curie(
|
|
163
|
+
assert Reference.from_curie("go:0003700") == pyobo.get_primary_curie("go:0003700")
|
|
189
164
|
```
|
|
190
165
|
|
|
191
166
|
### Mapping Species
|
|
@@ -196,11 +171,10 @@ WikiPathway identifiers to species (as NCBI taxonomy identifiers):
|
|
|
196
171
|
```python
|
|
197
172
|
import pyobo
|
|
198
173
|
|
|
199
|
-
wikipathways_id_to_species = pyobo.get_id_species_mapping(
|
|
174
|
+
wikipathways_id_to_species = pyobo.get_id_species_mapping("wikipathways")
|
|
200
175
|
|
|
201
176
|
# Apoptosis (Homo sapiens)
|
|
202
|
-
|
|
203
|
-
assert taxonomy_id == '9606'
|
|
177
|
+
assert "9606" == wikipathways_id_to_species["WP254"]
|
|
204
178
|
```
|
|
205
179
|
|
|
206
180
|
Or, you don't have time for two lines:
|
|
@@ -209,8 +183,8 @@ Or, you don't have time for two lines:
|
|
|
209
183
|
import pyobo
|
|
210
184
|
|
|
211
185
|
# Apoptosis (Homo sapiens)
|
|
212
|
-
taxonomy_id = pyobo.get_species(
|
|
213
|
-
assert taxonomy_id ==
|
|
186
|
+
taxonomy_id = pyobo.get_species("wikipathways", "WP254")
|
|
187
|
+
assert taxonomy_id == "9606"
|
|
214
188
|
```
|
|
215
189
|
|
|
216
190
|
### Grounding
|
|
@@ -222,13 +196,13 @@ it up and its preferred label.
|
|
|
222
196
|
```python
|
|
223
197
|
import pyobo
|
|
224
198
|
|
|
225
|
-
reference = pyobo.ground(
|
|
226
|
-
assert reference.prefix ==
|
|
227
|
-
assert reference.identifier ==
|
|
228
|
-
assert reference.name ==
|
|
199
|
+
reference = pyobo.ground("chebi", "Fusilade II")
|
|
200
|
+
assert reference.prefix == "chebi"
|
|
201
|
+
assert reference.identifier == "132964"
|
|
202
|
+
assert reference.name == "fluazifop-P-butyl"
|
|
229
203
|
|
|
230
204
|
# When failure happens...
|
|
231
|
-
reference = pyobo.ground(
|
|
205
|
+
reference = pyobo.ground("chebi", "Definitely not a real name")
|
|
232
206
|
assert reference is None
|
|
233
207
|
```
|
|
234
208
|
|
|
@@ -240,10 +214,10 @@ false positives in case of conflicts):
|
|
|
240
214
|
import pyobo
|
|
241
215
|
|
|
242
216
|
# looking for phenotypes/pathways
|
|
243
|
-
reference = pyobo.ground([
|
|
244
|
-
assert reference.prefix ==
|
|
245
|
-
assert reference.identifier ==
|
|
246
|
-
assert reference.name ==
|
|
217
|
+
reference = pyobo.ground(["efo", "go"], "ERAD")
|
|
218
|
+
assert reference.prefix == "go"
|
|
219
|
+
assert reference.identifier == "0030433"
|
|
220
|
+
assert reference.name == "ubiquitin-dependent ERAD pathway"
|
|
247
221
|
```
|
|
248
222
|
|
|
249
223
|
### Cross-referencing
|
|
@@ -253,10 +227,10 @@ Get xrefs from ChEBI to PubChem:
|
|
|
253
227
|
```python
|
|
254
228
|
import pyobo
|
|
255
229
|
|
|
256
|
-
chebi_id_to_pubchem_compound_id = pyobo.get_filtered_xrefs(
|
|
230
|
+
chebi_id_to_pubchem_compound_id = pyobo.get_filtered_xrefs("chebi", "pubchem.compound")
|
|
257
231
|
|
|
258
|
-
pubchem_compound_id = chebi_id_to_pubchem_compound_id[
|
|
259
|
-
assert pubchem_compound_id ==
|
|
232
|
+
pubchem_compound_id = chebi_id_to_pubchem_compound_id["132964"]
|
|
233
|
+
assert pubchem_compound_id == "3033674"
|
|
260
234
|
```
|
|
261
235
|
|
|
262
236
|
If you don't have time for two lines:
|
|
@@ -264,8 +238,8 @@ If you don't have time for two lines:
|
|
|
264
238
|
```python
|
|
265
239
|
import pyobo
|
|
266
240
|
|
|
267
|
-
pubchem_compound_id = pyobo.get_xref(
|
|
268
|
-
assert pubchem_compound_id ==
|
|
241
|
+
pubchem_compound_id = pyobo.get_xref("chebi", "132964", "pubchem.compound")
|
|
242
|
+
assert pubchem_compound_id == "3033674"
|
|
269
243
|
```
|
|
270
244
|
|
|
271
245
|
Get xrefs from Entrez to HGNC, but they're only available through HGNC, so you
|
|
@@ -274,13 +248,13 @@ need to flip them:
|
|
|
274
248
|
```python
|
|
275
249
|
import pyobo
|
|
276
250
|
|
|
277
|
-
hgnc_id_to_ncbigene_id = pyobo.get_filtered_xrefs(
|
|
251
|
+
hgnc_id_to_ncbigene_id = pyobo.get_filtered_xrefs("hgnc", "ncbigene")
|
|
278
252
|
ncbigene_id_to_hgnc_id = {
|
|
279
253
|
ncbigene_id: hgnc_id
|
|
280
254
|
for hgnc_id, ncbigene_id in hgnc_id_to_ncbigene_id.items()
|
|
281
255
|
}
|
|
282
|
-
mapt_hgnc = ncbigene_id_to_hgnc_id[
|
|
283
|
-
assert mapt_hgnc ==
|
|
256
|
+
mapt_hgnc = ncbigene_id_to_hgnc_id["4137"]
|
|
257
|
+
assert mapt_hgnc == "6893"
|
|
284
258
|
```
|
|
285
259
|
|
|
286
260
|
Since this is a common pattern, there's a keyword argument `flip` that does this
|
|
@@ -289,9 +263,9 @@ for you:
|
|
|
289
263
|
```python
|
|
290
264
|
import pyobo
|
|
291
265
|
|
|
292
|
-
ncbigene_id_to_hgnc_id = pyobo.get_filtered_xrefs(
|
|
293
|
-
mapt_hgnc_id = ncbigene_id_to_hgnc_id[
|
|
294
|
-
assert mapt_hgnc_id ==
|
|
266
|
+
ncbigene_id_to_hgnc_id = pyobo.get_filtered_xrefs("hgnc", "ncbigene", flip=True)
|
|
267
|
+
mapt_hgnc_id = ncbigene_id_to_hgnc_id["4137"]
|
|
268
|
+
assert mapt_hgnc_id == "6893"
|
|
295
269
|
```
|
|
296
270
|
|
|
297
271
|
If you don't have time for two lines (I admit this one is a bit confusing) and
|
|
@@ -300,8 +274,8 @@ need to flip it:
|
|
|
300
274
|
```python
|
|
301
275
|
import pyobo
|
|
302
276
|
|
|
303
|
-
hgnc_id = pyobo.get_xref(
|
|
304
|
-
assert hgnc_id ==
|
|
277
|
+
hgnc_id = pyobo.get_xref("hgnc", "4137", "ncbigene", flip=True)
|
|
278
|
+
assert hgnc_id == "6893"
|
|
305
279
|
```
|
|
306
280
|
|
|
307
281
|
### Properties
|
|
@@ -313,11 +287,11 @@ basis.
|
|
|
313
287
|
import pyobo
|
|
314
288
|
|
|
315
289
|
# I don't make the rules. I wouldn't have chosen this as the key for this property. It could be any string
|
|
316
|
-
chebi_smiles_property =
|
|
317
|
-
chebi_id_to_smiles = pyobo.get_filtered_properties_mapping(
|
|
290
|
+
chebi_smiles_property = "http://purl.obolibrary.org/obo/chebi/smiles"
|
|
291
|
+
chebi_id_to_smiles = pyobo.get_filtered_properties_mapping("chebi", chebi_smiles_property)
|
|
318
292
|
|
|
319
|
-
smiles = chebi_id_to_smiles[
|
|
320
|
-
assert smiles ==
|
|
293
|
+
smiles = chebi_id_to_smiles["132964"]
|
|
294
|
+
assert smiles == "C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F"
|
|
321
295
|
```
|
|
322
296
|
|
|
323
297
|
If you don't have time for two lines:
|
|
@@ -325,8 +299,8 @@ If you don't have time for two lines:
|
|
|
325
299
|
```python
|
|
326
300
|
import pyobo
|
|
327
301
|
|
|
328
|
-
smiles = pyobo.get_property(
|
|
329
|
-
assert smiles ==
|
|
302
|
+
smiles = pyobo.get_property("chebi", "132964", "http://purl.obolibrary.org/obo/chebi/smiles")
|
|
303
|
+
assert smiles == "C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F"
|
|
330
304
|
```
|
|
331
305
|
|
|
332
306
|
### Hierarchy
|
|
@@ -338,12 +312,12 @@ import pyobo
|
|
|
338
312
|
from pyobo import Reference
|
|
339
313
|
|
|
340
314
|
# check that go:0008219 ! cell death is an ancestor of go:0006915 ! apoptotic process
|
|
341
|
-
assert Reference.from_curie(
|
|
315
|
+
assert Reference.from_curie("go:0008219") in pyobo.get_ancestors("go", "0006915")
|
|
342
316
|
|
|
343
317
|
# check that go:0070246 ! natural killer cell apoptotic process is a
|
|
344
318
|
# descendant of go:0006915 ! apoptotic process
|
|
345
|
-
apopototic_process_descendants = pyobo.get_descendants(
|
|
346
|
-
assert Reference.from_curie(
|
|
319
|
+
apopototic_process_descendants = pyobo.get_descendants("go", "0006915")
|
|
320
|
+
assert Reference.from_curie("go:0070246") in apopototic_process_descendants
|
|
347
321
|
```
|
|
348
322
|
|
|
349
323
|
Get the sub-hierarchy below a given node:
|
|
@@ -353,11 +327,11 @@ import pyobo
|
|
|
353
327
|
from pyobo import Reference
|
|
354
328
|
|
|
355
329
|
# get the descendant graph of go:0006915 ! apoptotic process
|
|
356
|
-
apopototic_process_subhierarchy = pyobo.get_subhierarchy(
|
|
330
|
+
apopototic_process_subhierarchy = pyobo.get_subhierarchy("go", "0006915")
|
|
357
331
|
|
|
358
332
|
# check that go:0070246 ! natural killer cell apoptotic process is a
|
|
359
333
|
# descendant of go:0006915 ! apoptotic process through the subhierarchy
|
|
360
|
-
assert Reference.from_curie(
|
|
334
|
+
assert Reference.from_curie("go:0070246") in apopototic_process_subhierarchy
|
|
361
335
|
```
|
|
362
336
|
|
|
363
337
|
Get a hierarchy with properties preloaded in the node data dictionaries:
|
|
@@ -366,12 +340,12 @@ Get a hierarchy with properties preloaded in the node data dictionaries:
|
|
|
366
340
|
import pyobo
|
|
367
341
|
from pyobo import Reference
|
|
368
342
|
|
|
369
|
-
prop =
|
|
370
|
-
chebi_hierarchy = pyobo.get_hierarchy(
|
|
343
|
+
prop = "http://purl.obolibrary.org/obo/chebi/smiles"
|
|
344
|
+
chebi_hierarchy = pyobo.get_hierarchy("chebi", properties=[prop])
|
|
371
345
|
|
|
372
|
-
assert Reference.from_curie(
|
|
373
|
-
assert prop in chebi_hierarchy.nodes[
|
|
374
|
-
assert chebi_hierarchy.nodes[
|
|
346
|
+
assert Reference.from_curie("chebi:132964") in chebi_hierarchy
|
|
347
|
+
assert prop in chebi_hierarchy.nodes["chebi:132964"]
|
|
348
|
+
assert chebi_hierarchy.nodes["chebi:132964"][prop] == "C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F"
|
|
375
349
|
```
|
|
376
350
|
|
|
377
351
|
### Relations
|
|
@@ -381,9 +355,9 @@ way)
|
|
|
381
355
|
|
|
382
356
|
```python
|
|
383
357
|
>>> import pyobo
|
|
384
|
-
>>> human_mapt_hgnc_id =
|
|
385
|
-
>>> mouse_mapt_mgi_id =
|
|
386
|
-
>>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping(
|
|
358
|
+
>>> human_mapt_hgnc_id = "6893"
|
|
359
|
+
>>> mouse_mapt_mgi_id = "97180"
|
|
360
|
+
>>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping("hgnc", "ro:HOM0000017", "mgi")
|
|
387
361
|
>>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id]
|
|
388
362
|
```
|
|
389
363
|
|
|
@@ -392,9 +366,9 @@ If you want to do it in one line, use:
|
|
|
392
366
|
```python
|
|
393
367
|
|
|
394
368
|
>>> import pyobo
|
|
395
|
-
>>> human_mapt_hgnc_id =
|
|
396
|
-
>>> mouse_mapt_mgi_id =
|
|
397
|
-
>>> assert mouse_mapt_mgi_id == pyobo.get_relation(
|
|
369
|
+
>>> human_mapt_hgnc_id = "6893"
|
|
370
|
+
>>> mouse_mapt_mgi_id = "97180"
|
|
371
|
+
>>> assert mouse_mapt_mgi_id == pyobo.get_relation("hgnc", "ro:HOM0000017", "mgi", human_mapt_hgnc_id)
|
|
398
372
|
```
|
|
399
373
|
|
|
400
374
|
### Writings Tests that Use PyOBO
|
|
@@ -410,8 +384,8 @@ import pyobo
|
|
|
410
384
|
from pyobo.mocks import get_mock_id_name_mapping
|
|
411
385
|
|
|
412
386
|
mock_id_name_mapping = get_mock_id_name_mapping({
|
|
413
|
-
|
|
414
|
-
|
|
387
|
+
"chebi": {
|
|
388
|
+
"132964": "fluazifop-P-butyl",
|
|
415
389
|
},
|
|
416
390
|
})
|
|
417
391
|
|
|
@@ -419,34 +393,9 @@ class MyTestCase(unittest.TestCase):
|
|
|
419
393
|
def my_test(self):
|
|
420
394
|
with mock_id_name_mapping:
|
|
421
395
|
# use functions directly, or use your functions that wrap them
|
|
422
|
-
pyobo.get_name(
|
|
396
|
+
pyobo.get_name("chebi", "1234")
|
|
423
397
|
```
|
|
424
398
|
|
|
425
|
-
## Curation of the Bioregistry
|
|
426
|
-
|
|
427
|
-
In order to normalize references and identify resources, PyOBO uses the
|
|
428
|
-
[Bioregistry](https://github.com/bioregistry/bioregistry). It used to be a part
|
|
429
|
-
of PyOBO, but has since been externalized for more general reuse.
|
|
430
|
-
|
|
431
|
-
At
|
|
432
|
-
[src/pyobo/registries/metaregistry.json](https://github.com/pyobo/pyobo/blob/master/src/pyobo/registries/metaregistry.json)
|
|
433
|
-
is the curated "metaregistry". This is a source of information that contains all
|
|
434
|
-
sorts of fixes for missing/wrong information in MIRIAM, OLS, and OBO Foundry;
|
|
435
|
-
entries that don't appear in any of them; additional synonym information for
|
|
436
|
-
each namespace/prefix; rules for normalizing xrefs and CURIEs, etc.
|
|
437
|
-
|
|
438
|
-
Other entries in the metaregistry:
|
|
439
|
-
|
|
440
|
-
- The `"remappings"->"full"` entry is a dictionary from strings that might
|
|
441
|
-
follow `xref:` in a given OBO file that need to be completely replaced, due to
|
|
442
|
-
incorrect formatting
|
|
443
|
-
- The `"remappings"->"prefix"` entry contains a dictionary of prefixes for xrefs
|
|
444
|
-
that need to be remapped. Several rules, for example, remove superfluous
|
|
445
|
-
spaces that occur inside CURIEs or and others address instances of the GOGO
|
|
446
|
-
issue.
|
|
447
|
-
- The `"blacklists"` entry contains rules for throwing out malformed xrefs based
|
|
448
|
-
on full string, just prefix, or just suffix.
|
|
449
|
-
|
|
450
399
|
## Troubleshooting
|
|
451
400
|
|
|
452
401
|
The OBO Foundry seems to be pretty unstable with respect to the URLs to OBO
|