pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
|
@@ -1,17 +1,20 @@
|
|
|
1
1
|
"""Utilities or interacting with the ICD API.
|
|
2
2
|
|
|
3
|
-
Want to get your own API
|
|
3
|
+
Want to get your own API client ID and client secret?
|
|
4
4
|
|
|
5
5
|
1. Register at https://icdapihome.azurewebsites.net/icdapi/Account/Register
|
|
6
6
|
2. Sell your soul to the American government
|
|
7
|
+
|
|
8
|
+
.. note::
|
|
9
|
+
|
|
10
|
+
If web requests are stalling, try deleting the ``~/.cachier`` directory.
|
|
7
11
|
"""
|
|
8
12
|
|
|
9
13
|
import datetime
|
|
10
14
|
import json
|
|
11
|
-
import
|
|
12
|
-
from collections.abc import Iterable, Mapping
|
|
15
|
+
from collections.abc import Callable, Iterable, Mapping
|
|
13
16
|
from pathlib import Path
|
|
14
|
-
from typing import Any
|
|
17
|
+
from typing import Any
|
|
15
18
|
|
|
16
19
|
import pystow
|
|
17
20
|
import requests
|
|
@@ -19,27 +22,66 @@ from cachier import cachier
|
|
|
19
22
|
from pystow.config_api import ConfigError
|
|
20
23
|
from tqdm.auto import tqdm
|
|
21
24
|
|
|
22
|
-
from
|
|
23
|
-
from
|
|
25
|
+
from ...getters import NoBuildError
|
|
26
|
+
from ...struct import Term
|
|
24
27
|
|
|
25
28
|
TOKEN_URL = "https://icdaccessmanagement.who.int/connect/token" # noqa:S105
|
|
26
29
|
|
|
27
30
|
ICD_BASE_URL = "https://id.who.int/icd"
|
|
28
31
|
|
|
29
32
|
ICD11_TOP_LEVEL_URL = f"{ICD_BASE_URL}/entity"
|
|
33
|
+
ICD_11_MMS_URL = f"{ICD_BASE_URL}/release/11/2024-01/mms"
|
|
30
34
|
ICD10_TOP_LEVEL_URL = f"{ICD_BASE_URL}/release/10/2016"
|
|
31
35
|
|
|
32
36
|
|
|
33
37
|
def get_icd(url: str) -> requests.Response:
|
|
34
38
|
"""Get an ICD API endpoint."""
|
|
35
|
-
|
|
39
|
+
headers = get_icd_api_headers()
|
|
40
|
+
return requests.get(url, headers=headers, timeout=5)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_icd_10_top(version: str, path: Path) -> dict[str, Any]:
|
|
44
|
+
"""Get from the ICD10 top."""
|
|
45
|
+
if path.is_file():
|
|
46
|
+
return json.loads(path.read_text())
|
|
47
|
+
rv = get_icd(ICD10_TOP_LEVEL_URL).json()
|
|
48
|
+
path.write_text(json.dumps(rv, indent=2))
|
|
49
|
+
return rv
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_icd_11(identifier: str) -> dict[str, Any]:
|
|
53
|
+
"""Get from ICD11."""
|
|
54
|
+
return get_icd_entity(ICD11_TOP_LEVEL_URL, identifier)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_icd_11_mms(identifier: str) -> dict[str, Any]:
|
|
58
|
+
"""Get from ICD11 MMS."""
|
|
59
|
+
return get_icd_entity(ICD_11_MMS_URL, identifier)
|
|
36
60
|
|
|
37
61
|
|
|
38
|
-
|
|
62
|
+
class ICDError(ValueError):
|
|
63
|
+
"""An error on getting data from ICD."""
|
|
64
|
+
|
|
65
|
+
def __init__(self, identifier: str, url: str, text: str) -> None:
|
|
66
|
+
"""Instantiate an ICD error."""
|
|
67
|
+
self.identifier = identifier
|
|
68
|
+
self.url = url
|
|
69
|
+
self.text = text
|
|
70
|
+
|
|
71
|
+
def __str__(self) -> str:
|
|
72
|
+
"""Make a string for the ICD error."""
|
|
73
|
+
return f"[icd11:{self.identifier}] Error getting {self.url} - {self.text}. Try {ICD11_TOP_LEVEL_URL}/{self.identifier}"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def get_icd_entity(endpoint: str, identifier: str) -> dict[str, Any]:
|
|
77
|
+
"""Query a given endpoint at ICD."""
|
|
39
78
|
url = f"{endpoint}/{identifier}"
|
|
40
|
-
# tqdm.write(f'query {identifier} at {url}')
|
|
41
79
|
res = get_icd(url)
|
|
42
|
-
|
|
80
|
+
try:
|
|
81
|
+
rv = res.json()
|
|
82
|
+
except OSError:
|
|
83
|
+
raise ICDError(identifier, url, res.text) from None
|
|
84
|
+
return rv
|
|
43
85
|
|
|
44
86
|
|
|
45
87
|
def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) -> list[str]:
|
|
@@ -47,9 +89,13 @@ def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) -> list[st
|
|
|
47
89
|
return [url[len(endpoint) :].lstrip("/") for url in res_json.get("child", [])]
|
|
48
90
|
|
|
49
91
|
|
|
50
|
-
|
|
92
|
+
DELAY = 45
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@cachier(stale_after=datetime.timedelta(minutes=DELAY))
|
|
51
96
|
def get_icd_api_headers() -> Mapping[str, str]:
|
|
52
97
|
"""Get the headers, and refresh every hour."""
|
|
98
|
+
tqdm.write("Getting ICD credentials w/ PyStow")
|
|
53
99
|
try:
|
|
54
100
|
icd_client_id = pystow.get_config("pyobo", "icd_client_id", raise_on_missing=True)
|
|
55
101
|
icd_client_secret = pystow.get_config("pyobo", "icd_client_secret", raise_on_missing=True)
|
|
@@ -58,8 +104,10 @@ def get_icd_api_headers() -> Mapping[str, str]:
|
|
|
58
104
|
|
|
59
105
|
grant_type = "client_credentials"
|
|
60
106
|
body_params = {"grant_type": grant_type}
|
|
61
|
-
tqdm.write("getting ICD API token")
|
|
62
|
-
res = requests.post(
|
|
107
|
+
tqdm.write(f"getting ICD API token, good for {DELAY} minutes")
|
|
108
|
+
res = requests.post(
|
|
109
|
+
TOKEN_URL, data=body_params, auth=(icd_client_id, icd_client_secret), timeout=10
|
|
110
|
+
)
|
|
63
111
|
res_json = res.json()
|
|
64
112
|
access_type = res_json["token_type"]
|
|
65
113
|
access_token = res_json["access_token"]
|
|
@@ -73,7 +121,7 @@ def get_icd_api_headers() -> Mapping[str, str]:
|
|
|
73
121
|
def visiter(
|
|
74
122
|
identifier: str,
|
|
75
123
|
visited_identifiers: set[str],
|
|
76
|
-
directory:
|
|
124
|
+
directory: str | Path,
|
|
77
125
|
*,
|
|
78
126
|
endpoint: str,
|
|
79
127
|
converter: Callable[[Mapping[str, Any]], Term],
|
|
@@ -84,13 +132,11 @@ def visiter(
|
|
|
84
132
|
return
|
|
85
133
|
visited_identifiers.add(identifier)
|
|
86
134
|
|
|
87
|
-
if
|
|
88
|
-
|
|
89
|
-
res_json = json.load(file)
|
|
135
|
+
if path.is_file():
|
|
136
|
+
res_json = json.loads(path.read_text())
|
|
90
137
|
else:
|
|
91
|
-
res_json =
|
|
92
|
-
|
|
93
|
-
json.dump(res_json, file, indent=2)
|
|
138
|
+
res_json = get_icd_entity(endpoint, identifier)
|
|
139
|
+
path.write_text(json.dumps(res_json, indent=2))
|
|
94
140
|
|
|
95
141
|
yield converter(res_json)
|
|
96
142
|
for identifier in get_child_identifiers(endpoint, res_json):
|
pyobo/sources/interpro.py
CHANGED
|
@@ -5,7 +5,7 @@ from collections.abc import Iterable, Mapping
|
|
|
5
5
|
|
|
6
6
|
from .utils import get_go_mapping
|
|
7
7
|
from ..struct import Obo, Reference, Term
|
|
8
|
-
from ..struct.typedef import enables, has_member
|
|
8
|
+
from ..struct.typedef import enables, has_category, has_member
|
|
9
9
|
from ..utils.io import multisetdict
|
|
10
10
|
from ..utils.path import ensure_df, ensure_path
|
|
11
11
|
|
|
@@ -30,18 +30,13 @@ class InterProGetter(Obo):
|
|
|
30
30
|
"""An ontology representation of InterPro."""
|
|
31
31
|
|
|
32
32
|
ontology = bioversions_key = PREFIX
|
|
33
|
-
typedefs = [enables, has_member]
|
|
33
|
+
typedefs = [enables, has_member, has_category]
|
|
34
34
|
|
|
35
35
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
36
36
|
"""Iterate over InterPro terms."""
|
|
37
37
|
return iter_terms(version=self._version_or_raise, force=force)
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
def get_obo(force: bool = False) -> Obo:
|
|
41
|
-
"""Get InterPro as OBO."""
|
|
42
|
-
return InterProGetter(force=force)
|
|
43
|
-
|
|
44
|
-
|
|
45
40
|
def iter_terms(*, version: str, proteins: bool = False, force: bool = False) -> Iterable[Term]:
|
|
46
41
|
"""Get InterPro terms."""
|
|
47
42
|
parents = get_interpro_tree(version=version, force=force)
|
|
@@ -74,7 +69,7 @@ def iter_terms(*, version: str, proteins: bool = False, force: bool = False) ->
|
|
|
74
69
|
term.append_relationship(
|
|
75
70
|
enables, Reference(prefix="go", identifier=go_id, name=go_name)
|
|
76
71
|
)
|
|
77
|
-
term.
|
|
72
|
+
term.annotate_string(has_category, entry_type)
|
|
78
73
|
for uniprot_id in interpro_to_proteins.get(identifier, []):
|
|
79
74
|
term.append_relationship(has_member, Reference(prefix="uniprot", identifier=uniprot_id))
|
|
80
75
|
yield term
|
|
@@ -91,7 +86,7 @@ def get_interpro_tree(version: str, force: bool = False):
|
|
|
91
86
|
"""Get InterPro Data source."""
|
|
92
87
|
url = f"https://ftp.ebi.ac.uk/pub/databases/interpro/releases/{version}/ParentChildTreeFile.txt"
|
|
93
88
|
path = ensure_path(PREFIX, url=url, version=version, force=force)
|
|
94
|
-
with open(
|
|
89
|
+
with path.open() as f:
|
|
95
90
|
return _parse_tree_helper(f)
|
|
96
91
|
|
|
97
92
|
|
pyobo/sources/itis.py
CHANGED
|
@@ -43,11 +43,6 @@ class ITISGetter(Obo):
|
|
|
43
43
|
return iter_terms(force=force, version=self._version_or_raise)
|
|
44
44
|
|
|
45
45
|
|
|
46
|
-
def get_obo() -> Obo:
|
|
47
|
-
"""Get ITIS as OBO."""
|
|
48
|
-
return ITISGetter()
|
|
49
|
-
|
|
50
|
-
|
|
51
46
|
def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
52
47
|
"""Get ITIS terms."""
|
|
53
48
|
zip_path = ensure_path(PREFIX, url=URL, force=force, version=version)
|
pyobo/sources/kegg/__init__.py
CHANGED
|
File without changes
|
pyobo/sources/kegg/api.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import urllib.error
|
|
4
4
|
from collections.abc import Mapping
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from
|
|
6
|
+
from pathlib import Path
|
|
7
7
|
|
|
8
8
|
from pyobo import Reference, Term, ensure_path
|
|
9
9
|
from pyobo.struct import from_species
|
|
@@ -30,9 +30,9 @@ class KEGGGenome:
|
|
|
30
30
|
|
|
31
31
|
identifier: str
|
|
32
32
|
name: str
|
|
33
|
-
code:
|
|
34
|
-
long_code:
|
|
35
|
-
taxonomy_id:
|
|
33
|
+
code: str | None
|
|
34
|
+
long_code: str | None
|
|
35
|
+
taxonomy_id: str | None
|
|
36
36
|
|
|
37
37
|
def annotate_term(self, term: Term) -> None:
|
|
38
38
|
"""Annotate the term with the species represented by this object."""
|
|
@@ -52,7 +52,7 @@ class KEGGGenome:
|
|
|
52
52
|
)
|
|
53
53
|
|
|
54
54
|
|
|
55
|
-
def ensure_list_genomes(version: str) ->
|
|
55
|
+
def ensure_list_genomes(version: str) -> Path:
|
|
56
56
|
"""Ensure the KEGG Genome file is downloaded."""
|
|
57
57
|
return ensure_path(
|
|
58
58
|
KEGG_GENOME_PREFIX,
|
|
@@ -76,7 +76,7 @@ def ensure_list_pathways(version: str) -> Mapping[str, str]:
|
|
|
76
76
|
"""GENOME SPECIFIC"""
|
|
77
77
|
|
|
78
78
|
|
|
79
|
-
def ensure_list_genome(kegg_genome_id: str, *, version: str) ->
|
|
79
|
+
def ensure_list_genome(kegg_genome_id: str, *, version: str) -> Path:
|
|
80
80
|
"""Get the list of genes for the given organism."""
|
|
81
81
|
return ensure_path(
|
|
82
82
|
KEGG_GENES_PREFIX,
|
|
@@ -87,22 +87,14 @@ def ensure_list_genome(kegg_genome_id: str, *, version: str) -> str:
|
|
|
87
87
|
)
|
|
88
88
|
|
|
89
89
|
|
|
90
|
-
def ensure_conv_genome_uniprot(
|
|
91
|
-
kegg_genome_id: str, *, version: str, error_on_missing: bool = False
|
|
92
|
-
) -> Optional[str]:
|
|
90
|
+
def ensure_conv_genome_uniprot(kegg_genome_id: str, *, version: str) -> Path | None:
|
|
93
91
|
"""Get the KEGG-UniProt protein map for the given organism."""
|
|
94
|
-
return _ensure_conv_genome_helper(
|
|
95
|
-
kegg_genome_id, "uniprot", version=version, error_on_missing=error_on_missing
|
|
96
|
-
)
|
|
92
|
+
return _ensure_conv_genome_helper(kegg_genome_id, "uniprot", version=version)
|
|
97
93
|
|
|
98
94
|
|
|
99
|
-
def ensure_conv_genome_ncbigene(
|
|
100
|
-
kegg_genome_id: str, *, version: str, error_on_missing: bool = False
|
|
101
|
-
) -> Optional[str]:
|
|
95
|
+
def ensure_conv_genome_ncbigene(kegg_genome_id: str, *, version: str) -> Path | None:
|
|
102
96
|
"""Get the KEGG-NCBIGENE protein map for the given organism."""
|
|
103
|
-
return _ensure_conv_genome_helper(
|
|
104
|
-
kegg_genome_id, "ncbi-geneid", version=version, error_on_missing=error_on_missing
|
|
105
|
-
)
|
|
97
|
+
return _ensure_conv_genome_helper(kegg_genome_id, "ncbi-geneid", version=version)
|
|
106
98
|
|
|
107
99
|
|
|
108
100
|
def _ensure_conv_genome_helper(
|
|
@@ -110,8 +102,7 @@ def _ensure_conv_genome_helper(
|
|
|
110
102
|
target_database: str,
|
|
111
103
|
*,
|
|
112
104
|
version: str,
|
|
113
|
-
|
|
114
|
-
) -> Optional[str]:
|
|
105
|
+
) -> Path | None:
|
|
115
106
|
"""Get the KEGG-external protein map for the given organism/database."""
|
|
116
107
|
name = f"{kegg_genome_id}.tsv"
|
|
117
108
|
try:
|
|
@@ -120,7 +111,6 @@ def _ensure_conv_genome_helper(
|
|
|
120
111
|
f"conv_{target_database}",
|
|
121
112
|
url=f"{BASE}/conv/{target_database}/{kegg_genome_id}",
|
|
122
113
|
name=name,
|
|
123
|
-
error_on_missing=error_on_missing,
|
|
124
114
|
version=version,
|
|
125
115
|
)
|
|
126
116
|
except urllib.error.HTTPError:
|
|
@@ -132,42 +122,30 @@ def _ensure_conv_genome_helper(
|
|
|
132
122
|
)
|
|
133
123
|
with path_rv.open("w") as file:
|
|
134
124
|
print(file=file)
|
|
135
|
-
return path_rv
|
|
125
|
+
return path_rv
|
|
136
126
|
except FileNotFoundError:
|
|
137
127
|
return None
|
|
138
128
|
else:
|
|
139
129
|
return rv
|
|
140
130
|
|
|
141
131
|
|
|
142
|
-
def ensure_link_pathway_genome(
|
|
143
|
-
|
|
144
|
-
) -> str:
|
|
145
|
-
"""Get the protein-pathway links for the given organism.
|
|
146
|
-
|
|
147
|
-
:raises: FileNotFoundError
|
|
148
|
-
"""
|
|
132
|
+
def ensure_link_pathway_genome(kegg_genome_id: str, *, version: str) -> Path:
|
|
133
|
+
"""Get the protein-pathway links for the given organism."""
|
|
149
134
|
return ensure_path(
|
|
150
135
|
KEGG_PATHWAY_PREFIX,
|
|
151
136
|
"link_pathway",
|
|
152
137
|
url=f"{BASE}/link/pathway/{kegg_genome_id}",
|
|
153
138
|
name=f"{kegg_genome_id}.tsv",
|
|
154
|
-
error_on_missing=error_on_missing,
|
|
155
139
|
version=version,
|
|
156
140
|
)
|
|
157
141
|
|
|
158
142
|
|
|
159
|
-
def ensure_list_pathway_genome(
|
|
160
|
-
|
|
161
|
-
) -> str:
|
|
162
|
-
"""Get the list of pathways for the given organism.
|
|
163
|
-
|
|
164
|
-
:raises: FileNotFoundError
|
|
165
|
-
"""
|
|
143
|
+
def ensure_list_pathway_genome(kegg_genome_id: str, *, version: str) -> Path:
|
|
144
|
+
"""Get the list of pathways for the given organism."""
|
|
166
145
|
return ensure_path(
|
|
167
146
|
KEGG_PATHWAY_PREFIX,
|
|
168
147
|
"pathways",
|
|
169
148
|
url=f"{BASE}/list/pathway/{kegg_genome_id}",
|
|
170
149
|
name=f"{kegg_genome_id}.tsv",
|
|
171
|
-
error_on_missing=error_on_missing,
|
|
172
150
|
version=version,
|
|
173
151
|
)
|
pyobo/sources/kegg/genes.py
CHANGED
|
@@ -5,10 +5,8 @@ Run with ``python -m pyobo.sources.kegg.genes``
|
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
7
|
from collections.abc import Iterable
|
|
8
|
-
from
|
|
8
|
+
from pathlib import Path
|
|
9
9
|
|
|
10
|
-
import click
|
|
11
|
-
from more_click import verbose_option
|
|
12
10
|
from tqdm.auto import tqdm
|
|
13
11
|
|
|
14
12
|
from .api import (
|
|
@@ -42,11 +40,6 @@ class KEGGGeneGetter(Obo):
|
|
|
42
40
|
return iter_terms(version=self._version_or_raise)
|
|
43
41
|
|
|
44
42
|
|
|
45
|
-
def get_obo() -> Obo:
|
|
46
|
-
"""Get KEGG Genes as OBO."""
|
|
47
|
-
return KEGGGeneGetter()
|
|
48
|
-
|
|
49
|
-
|
|
50
43
|
def iter_terms(version: str) -> Iterable[Term]:
|
|
51
44
|
"""Iterate over terms for KEGG Genome."""
|
|
52
45
|
for kegg_genome in iter_kegg_genomes(version=version, desc="KEGG Genes"):
|
|
@@ -72,9 +65,9 @@ def iter_terms(version: str) -> Iterable[Term]:
|
|
|
72
65
|
|
|
73
66
|
def _make_terms(
|
|
74
67
|
kegg_genome: KEGGGenome,
|
|
75
|
-
list_genome_path:
|
|
76
|
-
conv_uniprot_path:
|
|
77
|
-
conv_ncbigene_path:
|
|
68
|
+
list_genome_path: Path,
|
|
69
|
+
conv_uniprot_path: Path | None = None,
|
|
70
|
+
conv_ncbigene_path: Path | None = None,
|
|
78
71
|
) -> Iterable[Term]:
|
|
79
72
|
uniprot_conv = _load_conv(conv_uniprot_path, "up:") if conv_uniprot_path else {}
|
|
80
73
|
ncbigene_conv = _load_conv(conv_ncbigene_path, "ncbi-geneid:") if conv_ncbigene_path else {}
|
|
@@ -101,7 +94,9 @@ def _make_terms(
|
|
|
101
94
|
|
|
102
95
|
uniprot_xref = uniprot_conv.get(identifier)
|
|
103
96
|
if uniprot_xref is not None:
|
|
104
|
-
term.
|
|
97
|
+
term.annotate_object(
|
|
98
|
+
has_gene_product, Reference(prefix="uniprot", identifier=uniprot_xref)
|
|
99
|
+
)
|
|
105
100
|
|
|
106
101
|
ncbigene_xref = ncbigene_conv.get(identifier)
|
|
107
102
|
if ncbigene_xref is not None:
|
|
@@ -111,17 +106,11 @@ def _make_terms(
|
|
|
111
106
|
yield term
|
|
112
107
|
|
|
113
108
|
|
|
114
|
-
def _load_conv(path, value_prefix):
|
|
109
|
+
def _load_conv(path: Path, value_prefix):
|
|
115
110
|
m = open_map_tsv(path)
|
|
116
111
|
m = {k: v[len(value_prefix) :] for k, v in m.items()}
|
|
117
112
|
return m
|
|
118
113
|
|
|
119
114
|
|
|
120
|
-
@click.command()
|
|
121
|
-
@verbose_option
|
|
122
|
-
def _main():
|
|
123
|
-
get_obo().write_default()
|
|
124
|
-
|
|
125
|
-
|
|
126
115
|
if __name__ == "__main__":
|
|
127
|
-
|
|
116
|
+
KEGGGeneGetter.cli()
|
pyobo/sources/kegg/genome.py
CHANGED
|
@@ -42,12 +42,6 @@ class KEGGGenomeGetter(Obo):
|
|
|
42
42
|
return iter_terms(version=self._version_or_raise)
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
def get_obo() -> Obo:
|
|
46
|
-
"""Get KEGG Genome as OBO."""
|
|
47
|
-
# since old kegg versions go away forever, do NOT add a force option
|
|
48
|
-
return KEGGGenomeGetter()
|
|
49
|
-
|
|
50
|
-
|
|
51
45
|
def parse_genome_line(line: str) -> KEGGGenome | None:
|
|
52
46
|
"""Parse a line from the KEGG Genome database."""
|
|
53
47
|
if not line.startswith("T"):
|
|
@@ -94,7 +88,7 @@ def iter_kegg_genomes(version: str, desc: str) -> Iterable[KEGGGenome]:
|
|
|
94
88
|
"""Iterate over all KEGG genomes."""
|
|
95
89
|
# since old kegg versions go away forever, do NOT add a force option
|
|
96
90
|
path = ensure_list_genomes(version=version)
|
|
97
|
-
with open(
|
|
91
|
+
with path.open() as file:
|
|
98
92
|
lines = [line.strip() for line in file]
|
|
99
93
|
it = tqdm(lines, desc=desc, unit_scale=True, unit="genome")
|
|
100
94
|
for line in it:
|
pyobo/sources/kegg/pathway.py
CHANGED
|
@@ -8,7 +8,6 @@ import urllib.error
|
|
|
8
8
|
from collections import defaultdict
|
|
9
9
|
from collections.abc import Iterable, Mapping
|
|
10
10
|
from functools import partial
|
|
11
|
-
from typing import Union
|
|
12
11
|
|
|
13
12
|
from tqdm.auto import tqdm
|
|
14
13
|
from tqdm.contrib.concurrent import thread_map
|
|
@@ -39,6 +38,7 @@ __all__ = [
|
|
|
39
38
|
logger = logging.getLogger(__name__)
|
|
40
39
|
|
|
41
40
|
|
|
41
|
+
# FIXME KEGG API is not usable anymore
|
|
42
42
|
class KEGGPathwayGetter(Obo):
|
|
43
43
|
"""An ontology representation of KEGG Pathways."""
|
|
44
44
|
|
|
@@ -51,12 +51,6 @@ class KEGGPathwayGetter(Obo):
|
|
|
51
51
|
return iter_terms(version=self._version_or_raise)
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
def get_obo() -> Obo:
|
|
55
|
-
"""Get KEGG Pathways as OBO."""
|
|
56
|
-
# since old kegg versions go away forever, do NOT add a force option
|
|
57
|
-
return KEGGPathwayGetter()
|
|
58
|
-
|
|
59
|
-
|
|
60
54
|
def iter_terms(version: str, skip_missing: bool = True) -> Iterable[Term]:
|
|
61
55
|
"""Iterate over terms for KEGG Pathway."""
|
|
62
56
|
# since old kegg versions go away forever, do NOT add a force option
|
|
@@ -135,7 +129,7 @@ def _iter_genome_terms(
|
|
|
135
129
|
tqdm.write(f"could not find kegg.pathway:{pathway_id} for {kegg_genome.name}")
|
|
136
130
|
continue
|
|
137
131
|
for protein_id in protein_ids:
|
|
138
|
-
pathway_term.
|
|
132
|
+
pathway_term.annotate_object(
|
|
139
133
|
has_participant,
|
|
140
134
|
Reference(
|
|
141
135
|
prefix=KEGG_GENES_PREFIX,
|
|
@@ -148,26 +142,20 @@ def _iter_genome_terms(
|
|
|
148
142
|
|
|
149
143
|
def iter_kegg_pathway_paths(
|
|
150
144
|
version: str, skip_missing: bool = True
|
|
151
|
-
) -> Iterable[
|
|
145
|
+
) -> Iterable[tuple[KEGGGenome, str, str] | tuple[None, None, None]]:
|
|
152
146
|
"""Get paths for the KEGG Pathway files."""
|
|
153
|
-
genomes =
|
|
147
|
+
genomes = sorted(
|
|
148
|
+
iter_kegg_genomes(version=version, desc="KEGG Pathways"), key=lambda x: int(x.identifier)
|
|
149
|
+
)
|
|
154
150
|
func = partial(_process_genome, version=version, skip_missing=skip_missing)
|
|
155
151
|
return thread_map(func, genomes, unit="pathway", unit_scale=True)
|
|
156
152
|
|
|
157
153
|
|
|
158
|
-
def _process_genome(kegg_genome, version, skip_missing):
|
|
154
|
+
def _process_genome(kegg_genome: KEGGGenome, version: str, skip_missing: bool):
|
|
159
155
|
with logging_redirect_tqdm():
|
|
160
156
|
try:
|
|
161
|
-
list_pathway_path = ensure_list_pathway_genome(
|
|
162
|
-
|
|
163
|
-
version=version,
|
|
164
|
-
error_on_missing=not skip_missing,
|
|
165
|
-
)
|
|
166
|
-
link_pathway_path = ensure_link_pathway_genome(
|
|
167
|
-
kegg_genome.identifier,
|
|
168
|
-
version=version,
|
|
169
|
-
error_on_missing=not skip_missing,
|
|
170
|
-
)
|
|
157
|
+
list_pathway_path = ensure_list_pathway_genome(kegg_genome.identifier, version=version)
|
|
158
|
+
link_pathway_path = ensure_link_pathway_genome(kegg_genome.identifier, version=version)
|
|
171
159
|
except urllib.error.HTTPError as e:
|
|
172
160
|
code = e.getcode()
|
|
173
161
|
if code != 404:
|