pyobo 0.10.10__py3-none-any.whl → 0.10.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/api/alts.py +13 -8
- pyobo/api/hierarchy.py +9 -5
- pyobo/api/metadata.py +6 -3
- pyobo/api/names.py +34 -11
- pyobo/api/relations.py +11 -3
- pyobo/api/species.py +3 -3
- pyobo/api/typedefs.py +6 -2
- pyobo/api/utils.py +5 -0
- pyobo/api/xrefs.py +10 -3
- pyobo/aws.py +12 -7
- pyobo/cli/lookup.py +5 -4
- pyobo/constants.py +31 -10
- pyobo/gilda_utils.py +21 -0
- pyobo/identifier_utils.py +22 -5
- pyobo/reader.py +1 -1
- pyobo/sources/__init__.py +2 -0
- pyobo/sources/antibodyregistry.py +7 -6
- pyobo/sources/biogrid.py +8 -4
- pyobo/sources/ccle.py +5 -5
- pyobo/sources/credit.py +68 -0
- pyobo/sources/geonames.py +27 -9
- pyobo/sources/hgnc.py +2 -2
- pyobo/sources/mesh.py +9 -7
- pyobo/sources/msigdb.py +1 -1
- pyobo/sources/npass.py +1 -1
- pyobo/sources/pubchem.py +3 -3
- pyobo/sources/rgd.py +1 -1
- pyobo/sources/rhea.py +2 -2
- pyobo/sources/ror.py +67 -21
- pyobo/sources/uniprot/uniprot.py +2 -2
- pyobo/struct/struct.py +4 -3
- pyobo/struct/typedef.py +10 -0
- pyobo/utils/path.py +2 -1
- pyobo/version.py +1 -1
- pyobo/xrefdb/sources/__init__.py +6 -3
- pyobo/xrefdb/sources/chembl.py +5 -5
- pyobo/xrefdb/sources/pubchem.py +3 -2
- pyobo/xrefdb/sources/wikidata.py +8 -1
- {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/METADATA +23 -23
- {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/RECORD +44 -44
- {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/WHEEL +1 -1
- pyobo/xrefdb/bengo.py +0 -44
- {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/LICENSE +0 -0
- {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/entry_points.txt +0 -0
- {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/top_level.txt +0 -0
|
@@ -5,12 +5,12 @@
|
|
|
5
5
|
import logging
|
|
6
6
|
from typing import Iterable, Mapping, Optional
|
|
7
7
|
|
|
8
|
-
import bioversions
|
|
9
8
|
import pandas as pd
|
|
10
9
|
from bioregistry.utils import removeprefix
|
|
11
10
|
from tqdm.auto import tqdm
|
|
12
11
|
|
|
13
12
|
from pyobo import Obo, Term
|
|
13
|
+
from pyobo.api.utils import get_version
|
|
14
14
|
from pyobo.utils.path import ensure_df
|
|
15
15
|
|
|
16
16
|
__all__ = [
|
|
@@ -24,9 +24,10 @@ URL = "http://antibodyregistry.org/php/fileHandler.php"
|
|
|
24
24
|
CHUNKSIZE = 20_000
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def get_chunks(force: bool = False) -> pd.DataFrame:
|
|
27
|
+
def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame:
|
|
28
28
|
"""Get the BioGRID identifiers mapping dataframe."""
|
|
29
|
-
version
|
|
29
|
+
if version is None:
|
|
30
|
+
version = get_version(PREFIX)
|
|
30
31
|
df = ensure_df(
|
|
31
32
|
PREFIX,
|
|
32
33
|
url=URL,
|
|
@@ -47,7 +48,7 @@ class AntibodyRegistryGetter(Obo):
|
|
|
47
48
|
|
|
48
49
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
49
50
|
"""Iterate over terms in the ontology."""
|
|
50
|
-
return iter_terms(force=force)
|
|
51
|
+
return iter_terms(force=force, version=self._version_or_raise)
|
|
51
52
|
|
|
52
53
|
|
|
53
54
|
def get_obo(*, force: bool = False) -> Obo:
|
|
@@ -74,9 +75,9 @@ SKIP = {
|
|
|
74
75
|
}
|
|
75
76
|
|
|
76
77
|
|
|
77
|
-
def iter_terms(force: bool = False) -> Iterable[Term]:
|
|
78
|
+
def iter_terms(*, force: bool = False, version: Optional[str] = None) -> Iterable[Term]:
|
|
78
79
|
"""Iterate over antibodies."""
|
|
79
|
-
chunks = get_chunks(force=force)
|
|
80
|
+
chunks = get_chunks(force=force, version=version)
|
|
80
81
|
needs_curating = set()
|
|
81
82
|
# df['vendor'] = df['vendor'].map(bioregistry.normalize_prefix)
|
|
82
83
|
it = tqdm(chunks, desc=f"{PREFIX}, chunkssize={CHUNKSIZE}")
|
pyobo/sources/biogrid.py
CHANGED
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
"""Extract and convert BioGRID identifiers."""
|
|
4
4
|
|
|
5
|
+
from functools import partial
|
|
5
6
|
from typing import Mapping, Optional
|
|
6
7
|
|
|
7
|
-
import bioversions
|
|
8
8
|
import pandas as pd
|
|
9
9
|
|
|
10
|
-
from pyobo.
|
|
10
|
+
from pyobo.api.utils import get_version
|
|
11
11
|
from pyobo.resources.ncbitaxon import get_ncbitaxon_id
|
|
12
12
|
from pyobo.utils.cache import cached_mapping
|
|
13
13
|
from pyobo.utils.path import ensure_df, prefix_directory_join
|
|
@@ -52,7 +52,7 @@ def _lookup(name: str) -> Optional[str]:
|
|
|
52
52
|
|
|
53
53
|
def get_df() -> pd.DataFrame:
|
|
54
54
|
"""Get the BioGRID identifiers mapping dataframe."""
|
|
55
|
-
version =
|
|
55
|
+
version = get_version("biogrid")
|
|
56
56
|
url = f"{BASE_URL}/BIOGRID-{version}/BIOGRID-IDENTIFIERS-{version}.tab.zip"
|
|
57
57
|
df = ensure_df(PREFIX, url=url, skiprows=28, dtype=str, version=version)
|
|
58
58
|
df["taxonomy_id"] = df["ORGANISM_OFFICIAL_NAME"].map(_lookup)
|
|
@@ -61,7 +61,11 @@ def get_df() -> pd.DataFrame:
|
|
|
61
61
|
|
|
62
62
|
@cached_mapping(
|
|
63
63
|
path=prefix_directory_join(
|
|
64
|
-
PREFIX,
|
|
64
|
+
PREFIX,
|
|
65
|
+
"cache",
|
|
66
|
+
"xrefs",
|
|
67
|
+
name="ncbigene.tsv",
|
|
68
|
+
version=partial(get_version, PREFIX),
|
|
65
69
|
),
|
|
66
70
|
header=["biogrid_id", "ncbigene_id"],
|
|
67
71
|
)
|
pyobo/sources/ccle.py
CHANGED
|
@@ -50,7 +50,7 @@ def iter_terms(version: Optional[str] = None, force: bool = False) -> Iterable[T
|
|
|
50
50
|
yield term
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
def
|
|
53
|
+
def get_ccle_static_version() -> str:
|
|
54
54
|
"""Get the default version of CCLE's cell lines."""
|
|
55
55
|
return "2019"
|
|
56
56
|
|
|
@@ -58,21 +58,21 @@ def get_version() -> str:
|
|
|
58
58
|
def get_url(version: Optional[str] = None) -> str:
|
|
59
59
|
"""Get the cBioPortal URL for the given version of CCLE's cell lines."""
|
|
60
60
|
if version is None:
|
|
61
|
-
version =
|
|
61
|
+
version = get_ccle_static_version()
|
|
62
62
|
return f"https://cbioportal-datahub.s3.amazonaws.com/ccle_broad_{version}.tar.gz"
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
def get_inner(version: Optional[str] = None) -> str:
|
|
66
66
|
"""Get the inner tarfile path."""
|
|
67
67
|
if version is None:
|
|
68
|
-
version =
|
|
68
|
+
version = get_ccle_static_version()
|
|
69
69
|
return f"ccle_broad_{version}/data_clinical_sample.txt"
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
def ensure(version: Optional[str] = None, **kwargs) -> Path:
|
|
73
73
|
"""Ensure the given version is downloaded."""
|
|
74
74
|
if version is None:
|
|
75
|
-
version =
|
|
75
|
+
version = get_ccle_static_version()
|
|
76
76
|
url = get_url(version=version)
|
|
77
77
|
return pystow.ensure("pyobo", "raw", PREFIX, version, url=url, **kwargs)
|
|
78
78
|
|
|
@@ -80,7 +80,7 @@ def ensure(version: Optional[str] = None, **kwargs) -> Path:
|
|
|
80
80
|
def ensure_df(version: Optional[str] = None, force: bool = False) -> pd.DataFrame:
|
|
81
81
|
"""Get the CCLE clinical sample dataframe."""
|
|
82
82
|
if version is None:
|
|
83
|
-
version =
|
|
83
|
+
version = get_ccle_static_version()
|
|
84
84
|
path = ensure(version=version, force=force)
|
|
85
85
|
inner_path = get_inner(version=version)
|
|
86
86
|
with tarfile.open(path) as tf:
|
pyobo/sources/credit.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Converter for the Contributor Roles Taxonomy."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
from more_itertools import chunked
|
|
9
|
+
|
|
10
|
+
from pyobo.struct import Obo, Term
|
|
11
|
+
from pyobo.utils.path import ensure_path
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"CreditGetter",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
url = "https://api.github.com/repos/CASRAI-CRedIT/Dictionary/contents/Picklists/Contributor%20Roles"
|
|
18
|
+
PREFIX = "credit"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CreditGetter(Obo):
|
|
22
|
+
"""An ontology representation of the Contributor Roles Taxonomy."""
|
|
23
|
+
|
|
24
|
+
ontology = PREFIX
|
|
25
|
+
static_version = "2022"
|
|
26
|
+
idspaces = {
|
|
27
|
+
PREFIX: "https://credit.niso.org/contributor-roles/",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
31
|
+
"""Iterate over terms in the ontology."""
|
|
32
|
+
return get_terms(force=force)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_obo(force: bool = False) -> Obo:
|
|
36
|
+
"""Get RGD as OBO."""
|
|
37
|
+
return CreditGetter(force=force)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_terms(force: bool = False) -> list[Term]:
|
|
41
|
+
"""Get terms from the Contributor Roles Taxonomy via GitHub."""
|
|
42
|
+
path = ensure_path(PREFIX, url=url, name="picklist-api.json", force=force)
|
|
43
|
+
with open(path) as f:
|
|
44
|
+
data = json.load(f)
|
|
45
|
+
terms = []
|
|
46
|
+
for x in data:
|
|
47
|
+
name = x["name"].removesuffix(".md").lower()
|
|
48
|
+
|
|
49
|
+
pp = ensure_path(PREFIX, "picklist", url=x["download_url"], backend="requests")
|
|
50
|
+
with open(pp) as f:
|
|
51
|
+
header, *rest = f.read().splitlines()
|
|
52
|
+
name = header = header.removeprefix("# Contributor Roles/")
|
|
53
|
+
dd = {k.removeprefix("## "): v for k, v in chunked(rest, 2)}
|
|
54
|
+
identifier = (
|
|
55
|
+
dd["Canonical URL"]
|
|
56
|
+
.removeprefix("https://credit.niso.org/contributor-roles/")
|
|
57
|
+
.rstrip("/")
|
|
58
|
+
)
|
|
59
|
+
desc = dd["Short definition"]
|
|
60
|
+
terms.append(
|
|
61
|
+
Term.from_triple(prefix=PREFIX, identifier=identifier, name=name, definition=desc)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return terms
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
if __name__ == "__main__":
|
|
68
|
+
get_obo(force=True).write_default(write_obo=True)
|
pyobo/sources/geonames.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Get terms from geonames."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import logging
|
|
4
6
|
from typing import Collection, Iterable, Mapping
|
|
5
7
|
|
|
@@ -146,15 +148,7 @@ def get_code_to_admin2(
|
|
|
146
148
|
return code_to_admin2
|
|
147
149
|
|
|
148
150
|
|
|
149
|
-
def
|
|
150
|
-
code_to_country,
|
|
151
|
-
code_to_admin1,
|
|
152
|
-
code_to_admin2,
|
|
153
|
-
*,
|
|
154
|
-
minimum_population: int = 100_000,
|
|
155
|
-
force: bool = False,
|
|
156
|
-
) -> Mapping[str, Term]:
|
|
157
|
-
"""Get a mapping from city code to term."""
|
|
151
|
+
def _get_cities_df(force: bool = False) -> pd.DataFrame:
|
|
158
152
|
columns = [
|
|
159
153
|
"geonames_id",
|
|
160
154
|
"name",
|
|
@@ -184,7 +178,19 @@ def get_cities(
|
|
|
184
178
|
names=columns,
|
|
185
179
|
dtype=str,
|
|
186
180
|
)
|
|
181
|
+
return cities_df
|
|
182
|
+
|
|
187
183
|
|
|
184
|
+
def get_cities(
|
|
185
|
+
code_to_country,
|
|
186
|
+
code_to_admin1,
|
|
187
|
+
code_to_admin2,
|
|
188
|
+
*,
|
|
189
|
+
minimum_population: int = 100_000,
|
|
190
|
+
force: bool = False,
|
|
191
|
+
) -> Mapping[str, Term]:
|
|
192
|
+
"""Get a mapping from city code to term."""
|
|
193
|
+
cities_df = _get_cities_df(force=force)
|
|
188
194
|
cities_df = cities_df[cities_df.population.astype(int) > minimum_population]
|
|
189
195
|
cities_df.synonyms = cities_df.synonyms.str.split(",")
|
|
190
196
|
|
|
@@ -235,5 +241,17 @@ def get_cities(
|
|
|
235
241
|
return terms
|
|
236
242
|
|
|
237
243
|
|
|
244
|
+
def get_city_to_country() -> dict[str, str]:
|
|
245
|
+
"""Get a mapping from city GeoNames to country GeoNames id."""
|
|
246
|
+
rv = {}
|
|
247
|
+
code_to_country = get_code_to_country()
|
|
248
|
+
cities_df = _get_cities_df()
|
|
249
|
+
for city_geonames_id, country_code in cities_df[["geonames_id", "country_code"]].values:
|
|
250
|
+
if pd.isna(city_geonames_id) or pd.isna(country_code):
|
|
251
|
+
continue
|
|
252
|
+
rv[city_geonames_id] = code_to_country[country_code].identifier
|
|
253
|
+
return rv
|
|
254
|
+
|
|
255
|
+
|
|
238
256
|
if __name__ == "__main__":
|
|
239
257
|
GeonamesGetter().write_default(write_obo=True, force=True)
|
pyobo/sources/hgnc.py
CHANGED
|
@@ -10,10 +10,10 @@ from collections import Counter, defaultdict
|
|
|
10
10
|
from operator import attrgetter
|
|
11
11
|
from typing import DefaultDict, Dict, Iterable, Optional
|
|
12
12
|
|
|
13
|
-
import bioversions
|
|
14
13
|
from tabulate import tabulate
|
|
15
14
|
from tqdm.auto import tqdm
|
|
16
15
|
|
|
16
|
+
from pyobo.api.utils import get_version
|
|
17
17
|
from pyobo.struct import (
|
|
18
18
|
Obo,
|
|
19
19
|
Reference,
|
|
@@ -241,7 +241,7 @@ def get_obo(*, force: bool = False) -> Obo:
|
|
|
241
241
|
def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]: # noqa:C901
|
|
242
242
|
"""Get HGNC terms."""
|
|
243
243
|
if version is None:
|
|
244
|
-
version =
|
|
244
|
+
version = get_version("hgnc")
|
|
245
245
|
unhandled_entry_keys: typing.Counter[str] = Counter()
|
|
246
246
|
unhandle_locus_types: DefaultDict[str, Dict[str, Term]] = defaultdict(dict)
|
|
247
247
|
path = ensure_path(
|
pyobo/sources/mesh.py
CHANGED
|
@@ -11,6 +11,7 @@ from xml.etree.ElementTree import Element
|
|
|
11
11
|
|
|
12
12
|
from tqdm.auto import tqdm
|
|
13
13
|
|
|
14
|
+
from pyobo.api.utils import get_version
|
|
14
15
|
from pyobo.identifier_utils import standardize_ec
|
|
15
16
|
from pyobo.struct import Obo, Reference, Synonym, Term
|
|
16
17
|
from pyobo.utils.cache import cached_json, cached_mapping
|
|
@@ -318,21 +319,22 @@ def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]:
|
|
|
318
319
|
]
|
|
319
320
|
|
|
320
321
|
|
|
321
|
-
def get_mesh_category_curies(
|
|
322
|
+
def get_mesh_category_curies(
|
|
323
|
+
letter: str, *, skip: Optional[Collection[str]] = None, version: Optional[str] = None
|
|
324
|
+
) -> List[str]:
|
|
322
325
|
"""Get the MeSH LUIDs for a category, by letter (e.g., "A").
|
|
323
326
|
|
|
324
327
|
:param letter: The MeSH tree, A for anatomy, C for disease, etc.
|
|
325
328
|
:param skip: An optional collection of MeSH tree codes to skip, such as "A03"
|
|
329
|
+
:param version: The MeSH version to use. Defaults to latest
|
|
326
330
|
:returns: A list of MeSH CURIE strings for the top level of each MeSH tree.
|
|
327
331
|
|
|
328
332
|
.. seealso:: https://meshb.nlm.nih.gov/treeView
|
|
329
333
|
"""
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
raise ValueError
|
|
335
|
-
tree_to_mesh = get_tree_to_mesh_id(mesh_version)
|
|
334
|
+
if version is None:
|
|
335
|
+
version = get_version("mesh")
|
|
336
|
+
assert version is not None
|
|
337
|
+
tree_to_mesh = get_tree_to_mesh_id(version=version)
|
|
336
338
|
rv = []
|
|
337
339
|
for i in range(1, 100):
|
|
338
340
|
key = f"{letter}{i:02}"
|
pyobo/sources/msigdb.py
CHANGED
|
@@ -137,7 +137,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
137
137
|
def _get_definition(attrib) -> Optional[str]:
|
|
138
138
|
rv = attrib["DESCRIPTION_FULL"].strip() or attrib["DESCRIPTION_BRIEF"].strip() or None
|
|
139
139
|
if rv is not None:
|
|
140
|
-
return rv.replace("\d", "").replace("\s", "") # noqa: W605
|
|
140
|
+
return rv.replace(r"\d", "").replace(r"\s", "") # noqa: W605
|
|
141
141
|
return None
|
|
142
142
|
|
|
143
143
|
|
pyobo/sources/npass.py
CHANGED
pyobo/sources/pubchem.py
CHANGED
|
@@ -5,12 +5,12 @@
|
|
|
5
5
|
import logging
|
|
6
6
|
from typing import Iterable, Mapping, Optional
|
|
7
7
|
|
|
8
|
-
import bioversions
|
|
9
8
|
import pandas as pd
|
|
10
9
|
from bioregistry.utils import removeprefix
|
|
11
10
|
from tqdm.auto import tqdm
|
|
12
11
|
|
|
13
12
|
from ..api import get_name_id_mapping
|
|
13
|
+
from ..api.utils import get_version
|
|
14
14
|
from ..struct import Obo, Reference, Synonym, Term
|
|
15
15
|
from ..utils.iter import iterate_gzips_together
|
|
16
16
|
from ..utils.path import ensure_df, ensure_path
|
|
@@ -26,7 +26,7 @@ PREFIX = "pubchem.compound"
|
|
|
26
26
|
|
|
27
27
|
def _get_pubchem_extras_url(version: Optional[str], end: str) -> str:
|
|
28
28
|
if version is None:
|
|
29
|
-
version =
|
|
29
|
+
version = get_version("pubchem")
|
|
30
30
|
return f"ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/Monthly/{version}/Extras/{end}"
|
|
31
31
|
|
|
32
32
|
|
|
@@ -100,7 +100,7 @@ def get_pubchem_id_to_mesh_id(version: str) -> Mapping[str, str]:
|
|
|
100
100
|
|
|
101
101
|
def _ensure_cid_name_path(*, version: Optional[str] = None, force: bool = False) -> str:
|
|
102
102
|
if version is None:
|
|
103
|
-
version =
|
|
103
|
+
version = get_version("pubchem")
|
|
104
104
|
# 2 tab-separated columns: compound_id, name
|
|
105
105
|
cid_name_url = _get_pubchem_extras_url(version, "CID-Title.gz")
|
|
106
106
|
cid_name_path = ensure_path(PREFIX, url=cid_name_url, version=version, force=force)
|
pyobo/sources/rgd.py
CHANGED
|
@@ -28,7 +28,7 @@ old_name_type = SynonymTypeDef.from_text("old_name")
|
|
|
28
28
|
|
|
29
29
|
# NOTE unigene id was discontinue in January 18th, 2021 dump
|
|
30
30
|
|
|
31
|
-
GENES_URL = "https://download.rgd.mcw.edu/data_release/
|
|
31
|
+
GENES_URL = "https://download.rgd.mcw.edu/data_release/GENES_RAT.txt"
|
|
32
32
|
GENES_HEADER = [
|
|
33
33
|
"GENE_RGD_ID",
|
|
34
34
|
"SYMBOL",
|
pyobo/sources/rhea.py
CHANGED
|
@@ -5,9 +5,9 @@
|
|
|
5
5
|
import logging
|
|
6
6
|
from typing import TYPE_CHECKING, Dict, Iterable, Optional
|
|
7
7
|
|
|
8
|
-
import bioversions
|
|
9
8
|
import pystow
|
|
10
9
|
|
|
10
|
+
from pyobo.api.utils import get_version
|
|
11
11
|
from pyobo.struct import Obo, Reference, Term
|
|
12
12
|
from pyobo.struct.typedef import (
|
|
13
13
|
TypeDef,
|
|
@@ -63,7 +63,7 @@ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdfl
|
|
|
63
63
|
"""Get the Rhea RDF graph."""
|
|
64
64
|
# see docs: https://ftp.expasy.org/databases/rhea/rdf/rhea_rdf_documentation.pdf
|
|
65
65
|
if version is None:
|
|
66
|
-
version =
|
|
66
|
+
version = get_version(PREFIX)
|
|
67
67
|
return pystow.ensure_rdf(
|
|
68
68
|
"pyobo",
|
|
69
69
|
"raw",
|
pyobo/sources/ror.py
CHANGED
|
@@ -1,34 +1,40 @@
|
|
|
1
1
|
"""Convert the Research Organization Registry (ROR) into an ontology."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import json
|
|
4
6
|
import zipfile
|
|
5
|
-
from typing import Iterable
|
|
7
|
+
from typing import Any, Iterable
|
|
6
8
|
|
|
7
9
|
import bioregistry
|
|
8
10
|
import zenodo_client
|
|
9
11
|
from tqdm.auto import tqdm
|
|
10
12
|
|
|
11
|
-
from pyobo.struct import Obo, Reference, Term
|
|
13
|
+
from pyobo.struct import Obo, Reference, Term
|
|
12
14
|
from pyobo.struct.struct import acronym
|
|
15
|
+
from pyobo.struct.typedef import (
|
|
16
|
+
has_homepage,
|
|
17
|
+
has_part,
|
|
18
|
+
has_predecessor,
|
|
19
|
+
has_successor,
|
|
20
|
+
located_in,
|
|
21
|
+
part_of,
|
|
22
|
+
see_also,
|
|
23
|
+
)
|
|
13
24
|
|
|
14
25
|
PREFIX = "ror"
|
|
15
26
|
ROR_ZENODO_RECORD_ID = "10086202"
|
|
16
27
|
|
|
17
28
|
# Constants
|
|
18
29
|
ORG_CLASS = Reference(prefix="OBI", identifier="0000245")
|
|
19
|
-
LOCATED_IN = Reference(prefix="RO", identifier="0001025")
|
|
20
|
-
PART_OF = Reference(prefix="BFO", identifier="0000050")
|
|
21
|
-
HAS_PART = Reference(prefix="BFO", identifier="0000051")
|
|
22
|
-
SUCCESSOR = Reference(prefix="BFO", identifier="0000063")
|
|
23
|
-
PREDECESSOR = Reference(prefix="BFO", identifier="0000062")
|
|
24
30
|
|
|
25
31
|
RMAP = {
|
|
26
|
-
"Related":
|
|
27
|
-
"Child":
|
|
28
|
-
"Parent":
|
|
29
|
-
"Predecessor":
|
|
30
|
-
"Successor":
|
|
31
|
-
"Located in":
|
|
32
|
+
"Related": see_also,
|
|
33
|
+
"Child": has_part,
|
|
34
|
+
"Parent": part_of,
|
|
35
|
+
"Predecessor": has_predecessor,
|
|
36
|
+
"Successor": has_successor,
|
|
37
|
+
"Located in": located_in,
|
|
32
38
|
}
|
|
33
39
|
NAME_REMAPPING = {
|
|
34
40
|
"'s-Hertogenbosch": "Den Bosch", # SMH Netherlands, why u gotta be like this
|
|
@@ -43,16 +49,16 @@ class RORGetter(Obo):
|
|
|
43
49
|
"""An ontology representation of the ROR."""
|
|
44
50
|
|
|
45
51
|
ontology = bioregistry_key = PREFIX
|
|
46
|
-
typedefs =
|
|
52
|
+
typedefs = [has_homepage, *RMAP.values()]
|
|
47
53
|
synonym_typedefs = [acronym]
|
|
48
54
|
idspaces = {
|
|
49
55
|
"ror": "https://ror.org/",
|
|
50
56
|
"geonames": "https://www.geonames.org/",
|
|
51
|
-
"
|
|
52
|
-
"
|
|
53
|
-
"
|
|
54
|
-
"
|
|
55
|
-
"
|
|
57
|
+
"ENVO": "http://purl.obolibrary.org/obo/ENVO_",
|
|
58
|
+
"BFO": "http://purl.obolibrary.org/obo/BFO_",
|
|
59
|
+
"RO": "http://purl.obolibrary.org/obo/RO_",
|
|
60
|
+
"OBI": "http://purl.obolibrary.org/obo/OBI_",
|
|
61
|
+
"OMO": "http://purl.obolibrary.org/obo/OMO_",
|
|
56
62
|
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
|
57
63
|
}
|
|
58
64
|
|
|
@@ -65,6 +71,18 @@ class RORGetter(Obo):
|
|
|
65
71
|
return iterate_ror_terms(force=force)
|
|
66
72
|
|
|
67
73
|
|
|
74
|
+
ROR_ORGANIZATION_TYPE_TO_OBI = {
|
|
75
|
+
"Education": ...,
|
|
76
|
+
"Facility": ...,
|
|
77
|
+
"Company": ...,
|
|
78
|
+
"Government": ...,
|
|
79
|
+
"Healthcare": ...,
|
|
80
|
+
"Other": ...,
|
|
81
|
+
"Archive": ...,
|
|
82
|
+
}
|
|
83
|
+
_MISSED_ORG_TYPES: set[str] = set()
|
|
84
|
+
|
|
85
|
+
|
|
68
86
|
def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
|
|
69
87
|
"""Iterate over terms in ROR."""
|
|
70
88
|
version, source_uri, records = get_latest(force=force)
|
|
@@ -74,10 +92,23 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
|
|
|
74
92
|
name = record["name"]
|
|
75
93
|
name = NAME_REMAPPING.get(name, name)
|
|
76
94
|
|
|
95
|
+
organization_types = record.get("types", [])
|
|
96
|
+
description = f"{organization_types[0]} in {record['country']['country_name']}"
|
|
97
|
+
if established := record["established"]:
|
|
98
|
+
description += f" established in {established}"
|
|
99
|
+
|
|
77
100
|
term = Term(
|
|
78
|
-
reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
|
|
101
|
+
reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
|
|
102
|
+
type="Instance",
|
|
103
|
+
definition=description,
|
|
79
104
|
)
|
|
80
105
|
term.append_parent(ORG_CLASS)
|
|
106
|
+
# TODO replace term.append_parent(ORG_CLASS) with:
|
|
107
|
+
# for organization_type in organization_types:
|
|
108
|
+
# term.append_parent(ORG_PARENTS[organization_type])
|
|
109
|
+
|
|
110
|
+
for link in record.get("links", []):
|
|
111
|
+
term.append_property(has_homepage, link)
|
|
81
112
|
|
|
82
113
|
if name.startswith("The "):
|
|
83
114
|
term.append_synonym(name.removeprefix("The "))
|
|
@@ -159,5 +190,20 @@ def get_latest(*, force: bool = False):
|
|
|
159
190
|
raise FileNotFoundError
|
|
160
191
|
|
|
161
192
|
|
|
193
|
+
def get_ror_to_country_geonames(**kwargs: Any) -> dict[str, str]:
|
|
194
|
+
"""Get a mapping of ROR ids to GeoNames IDs for countries."""
|
|
195
|
+
from pyobo.sources.geonames import get_city_to_country
|
|
196
|
+
|
|
197
|
+
city_to_country = get_city_to_country()
|
|
198
|
+
rv = {}
|
|
199
|
+
for term in iterate_ror_terms(**kwargs):
|
|
200
|
+
city_geonames_reference = term.get_relationship(located_in)
|
|
201
|
+
if city_geonames_reference is None:
|
|
202
|
+
continue
|
|
203
|
+
if city_geonames_reference.identifier in city_to_country:
|
|
204
|
+
rv[term.identifier] = city_to_country[city_geonames_reference.identifier]
|
|
205
|
+
return rv
|
|
206
|
+
|
|
207
|
+
|
|
162
208
|
if __name__ == "__main__":
|
|
163
|
-
RORGetter().write_default(write_obo=True, force=True)
|
|
209
|
+
RORGetter(force=True).write_default(write_obo=True, force=True)
|
pyobo/sources/uniprot/uniprot.py
CHANGED
|
@@ -6,10 +6,10 @@ from operator import attrgetter
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Iterable, List, Optional, cast
|
|
8
8
|
|
|
9
|
-
import bioversions
|
|
10
9
|
from tqdm.auto import tqdm
|
|
11
10
|
|
|
12
11
|
from pyobo import Obo, Reference
|
|
12
|
+
from pyobo.api.utils import get_version
|
|
13
13
|
from pyobo.constants import RAW_MODULE
|
|
14
14
|
from pyobo.identifier_utils import standardize_ec
|
|
15
15
|
from pyobo.struct import Term, derives_from, enables, from_species, participates_in
|
|
@@ -166,7 +166,7 @@ def _parse_go(go_terms) -> List[Reference]:
|
|
|
166
166
|
def ensure(version: Optional[str] = None, force: bool = False) -> Path:
|
|
167
167
|
"""Ensure the reviewed uniprot names are available."""
|
|
168
168
|
if version is None:
|
|
169
|
-
version =
|
|
169
|
+
version = get_version("uniprot")
|
|
170
170
|
return RAW_MODULE.ensure(
|
|
171
171
|
PREFIX,
|
|
172
172
|
version,
|
pyobo/struct/struct.py
CHANGED
|
@@ -56,6 +56,7 @@ from .typedef import (
|
|
|
56
56
|
term_replaced_by,
|
|
57
57
|
)
|
|
58
58
|
from .utils import comma_separate, obo_escape_slim
|
|
59
|
+
from ..api.utils import get_version
|
|
59
60
|
from ..constants import (
|
|
60
61
|
DATE_FORMAT,
|
|
61
62
|
NCBITAXON_PREFIX,
|
|
@@ -77,6 +78,8 @@ __all__ = [
|
|
|
77
78
|
"Term",
|
|
78
79
|
"Obo",
|
|
79
80
|
"make_ad_hoc_ontology",
|
|
81
|
+
"abbreviation",
|
|
82
|
+
"acronym",
|
|
80
83
|
]
|
|
81
84
|
|
|
82
85
|
logger = logging.getLogger(__name__)
|
|
@@ -583,10 +586,8 @@ class Obo:
|
|
|
583
586
|
|
|
584
587
|
def _get_version(self) -> Optional[str]:
|
|
585
588
|
if self.bioversions_key:
|
|
586
|
-
import bioversions
|
|
587
|
-
|
|
588
589
|
try:
|
|
589
|
-
return
|
|
590
|
+
return get_version(self.bioversions_key)
|
|
590
591
|
except KeyError:
|
|
591
592
|
logger.warning(f"[{self.bioversions_key}] bioversions doesn't list this resource ")
|
|
592
593
|
except IOError:
|
pyobo/struct/typedef.py
CHANGED
|
@@ -42,9 +42,13 @@ __all__ = [
|
|
|
42
42
|
"has_participant",
|
|
43
43
|
"exact_match",
|
|
44
44
|
"has_dbxref",
|
|
45
|
+
"located_in",
|
|
46
|
+
"has_successor",
|
|
47
|
+
"has_predecessor",
|
|
45
48
|
# Properties
|
|
46
49
|
"has_inchi",
|
|
47
50
|
"has_smiles",
|
|
51
|
+
"has_homepage",
|
|
48
52
|
]
|
|
49
53
|
|
|
50
54
|
|
|
@@ -323,6 +327,9 @@ enabled_by = TypeDef(reference=_enabled_by_reference, inverse=_enables_reference
|
|
|
323
327
|
has_input = TypeDef.from_triple(prefix=RO_PREFIX, identifier="0002233", name="has input")
|
|
324
328
|
has_output = TypeDef.from_triple(prefix=RO_PREFIX, identifier="0002234", name="has output")
|
|
325
329
|
|
|
330
|
+
has_successor = TypeDef.from_triple(prefix="BFO", identifier="0000063", name="has successor")
|
|
331
|
+
has_predecessor = TypeDef.from_triple(prefix="BFO", identifier="0000062", name="has predecessor")
|
|
332
|
+
|
|
326
333
|
"""ChEBI"""
|
|
327
334
|
|
|
328
335
|
is_conjugate_base_of = TypeDef(
|
|
@@ -355,6 +362,9 @@ has_inchi = TypeDef(
|
|
|
355
362
|
reference=Reference(prefix="debio", identifier="0000020", name="has InChI"),
|
|
356
363
|
)
|
|
357
364
|
|
|
365
|
+
has_homepage = TypeDef(
|
|
366
|
+
reference=Reference(prefix="foaf", identifier="homepage", name="homepage"), is_metadata_tag=True
|
|
367
|
+
)
|
|
358
368
|
|
|
359
369
|
default_typedefs: Dict[Tuple[str, str], TypeDef] = {
|
|
360
370
|
v.pair: v for k, v in locals().items() if isinstance(v, TypeDef)
|
pyobo/utils/path.py
CHANGED
|
@@ -25,7 +25,7 @@ __all__ = [
|
|
|
25
25
|
|
|
26
26
|
logger = logging.getLogger(__name__)
|
|
27
27
|
|
|
28
|
-
VersionHint = Union[None, str, Callable[[], str]]
|
|
28
|
+
VersionHint = Union[None, str, Callable[[], Optional[str]]]
|
|
29
29
|
|
|
30
30
|
requests_ftp.monkeypatch_session()
|
|
31
31
|
|
|
@@ -46,6 +46,7 @@ def prefix_directory_join(
|
|
|
46
46
|
logger.info("[%s] got version %s", prefix, version)
|
|
47
47
|
elif not isinstance(version, str):
|
|
48
48
|
raise TypeError(f"Invalid type: {version} ({type(version)})")
|
|
49
|
+
assert version is not None
|
|
49
50
|
version = cleanup_version(version, prefix=prefix)
|
|
50
51
|
if version is not None and "/" in version:
|
|
51
52
|
raise ValueError(f"[{prefix}] Can not have slash in version: {version}")
|