pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -117
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +107 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +210 -160
  20. pyobo/cli/database_utils.py +155 -0
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +209 -191
  25. pyobo/gilda_utils.py +52 -250
  26. pyobo/identifier_utils/__init__.py +33 -0
  27. pyobo/identifier_utils/api.py +305 -0
  28. pyobo/identifier_utils/preprocessing.json +873 -0
  29. pyobo/identifier_utils/preprocessing.py +27 -0
  30. pyobo/identifier_utils/relations/__init__.py +8 -0
  31. pyobo/identifier_utils/relations/api.py +162 -0
  32. pyobo/identifier_utils/relations/data.json +5824 -0
  33. pyobo/identifier_utils/relations/data_owl.json +57 -0
  34. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  35. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  36. pyobo/mocks.py +9 -6
  37. pyobo/ner/__init__.py +9 -0
  38. pyobo/ner/api.py +72 -0
  39. pyobo/ner/normalizer.py +33 -0
  40. pyobo/obographs.py +48 -40
  41. pyobo/plugins.py +5 -4
  42. pyobo/py.typed +0 -0
  43. pyobo/reader.py +1354 -395
  44. pyobo/reader_utils.py +155 -0
  45. pyobo/resource_utils.py +42 -22
  46. pyobo/resources/__init__.py +0 -0
  47. pyobo/resources/goc.py +75 -0
  48. pyobo/resources/goc.tsv +188 -0
  49. pyobo/resources/ncbitaxon.py +4 -5
  50. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  51. pyobo/resources/ro.py +3 -2
  52. pyobo/resources/ro.tsv +0 -0
  53. pyobo/resources/so.py +0 -0
  54. pyobo/resources/so.tsv +0 -0
  55. pyobo/sources/README.md +12 -8
  56. pyobo/sources/__init__.py +52 -29
  57. pyobo/sources/agrovoc.py +0 -0
  58. pyobo/sources/antibodyregistry.py +11 -12
  59. pyobo/sources/bigg/__init__.py +13 -0
  60. pyobo/sources/bigg/bigg_compartment.py +81 -0
  61. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  62. pyobo/sources/bigg/bigg_model.py +46 -0
  63. pyobo/sources/bigg/bigg_reaction.py +77 -0
  64. pyobo/sources/biogrid.py +1 -2
  65. pyobo/sources/ccle.py +7 -12
  66. pyobo/sources/cgnc.py +9 -6
  67. pyobo/sources/chebi.py +1 -1
  68. pyobo/sources/chembl/__init__.py +9 -0
  69. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  70. pyobo/sources/chembl/chembl_target.py +160 -0
  71. pyobo/sources/civic_gene.py +55 -15
  72. pyobo/sources/clinicaltrials.py +160 -0
  73. pyobo/sources/complexportal.py +24 -24
  74. pyobo/sources/conso.py +14 -22
  75. pyobo/sources/cpt.py +0 -0
  76. pyobo/sources/credit.py +1 -9
  77. pyobo/sources/cvx.py +27 -5
  78. pyobo/sources/depmap.py +9 -12
  79. pyobo/sources/dictybase_gene.py +2 -7
  80. pyobo/sources/drugbank/__init__.py +9 -0
  81. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  82. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  83. pyobo/sources/drugcentral.py +17 -13
  84. pyobo/sources/expasy.py +31 -34
  85. pyobo/sources/famplex.py +13 -18
  86. pyobo/sources/flybase.py +8 -13
  87. pyobo/sources/gard.py +62 -0
  88. pyobo/sources/geonames/__init__.py +9 -0
  89. pyobo/sources/geonames/features.py +28 -0
  90. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  91. pyobo/sources/geonames/utils.py +115 -0
  92. pyobo/sources/gmt_utils.py +6 -7
  93. pyobo/sources/go.py +20 -13
  94. pyobo/sources/gtdb.py +154 -0
  95. pyobo/sources/gwascentral/__init__.py +9 -0
  96. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  97. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  98. pyobo/sources/hgnc/__init__.py +9 -0
  99. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  100. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  101. pyobo/sources/icd/__init__.py +9 -0
  102. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  103. pyobo/sources/icd/icd11.py +148 -0
  104. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  105. pyobo/sources/interpro.py +4 -9
  106. pyobo/sources/itis.py +0 -5
  107. pyobo/sources/kegg/__init__.py +0 -0
  108. pyobo/sources/kegg/api.py +16 -38
  109. pyobo/sources/kegg/genes.py +9 -20
  110. pyobo/sources/kegg/genome.py +1 -7
  111. pyobo/sources/kegg/pathway.py +9 -21
  112. pyobo/sources/mesh.py +58 -24
  113. pyobo/sources/mgi.py +3 -10
  114. pyobo/sources/mirbase/__init__.py +11 -0
  115. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  116. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  117. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  118. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  119. pyobo/sources/msigdb.py +74 -39
  120. pyobo/sources/ncbi/__init__.py +9 -0
  121. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  122. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  123. pyobo/sources/nih_reporter.py +60 -0
  124. pyobo/sources/nlm/__init__.py +9 -0
  125. pyobo/sources/nlm/nlm_catalog.py +48 -0
  126. pyobo/sources/nlm/nlm_publisher.py +36 -0
  127. pyobo/sources/nlm/utils.py +116 -0
  128. pyobo/sources/npass.py +6 -8
  129. pyobo/sources/omim_ps.py +11 -4
  130. pyobo/sources/pathbank.py +4 -8
  131. pyobo/sources/pfam/__init__.py +9 -0
  132. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  133. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  134. pyobo/sources/pharmgkb/__init__.py +15 -0
  135. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  136. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  137. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  138. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  139. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  140. pyobo/sources/pharmgkb/utils.py +86 -0
  141. pyobo/sources/pid.py +1 -6
  142. pyobo/sources/pombase.py +6 -10
  143. pyobo/sources/pubchem.py +4 -9
  144. pyobo/sources/reactome.py +5 -11
  145. pyobo/sources/rgd.py +11 -16
  146. pyobo/sources/rhea.py +37 -36
  147. pyobo/sources/ror.py +69 -42
  148. pyobo/sources/selventa/__init__.py +0 -0
  149. pyobo/sources/selventa/schem.py +4 -7
  150. pyobo/sources/selventa/scomp.py +1 -6
  151. pyobo/sources/selventa/sdis.py +4 -7
  152. pyobo/sources/selventa/sfam.py +1 -6
  153. pyobo/sources/sgd.py +6 -11
  154. pyobo/sources/signor/__init__.py +7 -0
  155. pyobo/sources/signor/download.py +41 -0
  156. pyobo/sources/signor/signor_complexes.py +105 -0
  157. pyobo/sources/slm.py +12 -15
  158. pyobo/sources/umls/__init__.py +7 -1
  159. pyobo/sources/umls/__main__.py +0 -0
  160. pyobo/sources/umls/get_synonym_types.py +20 -4
  161. pyobo/sources/umls/sty.py +57 -0
  162. pyobo/sources/umls/synonym_types.tsv +1 -1
  163. pyobo/sources/umls/umls.py +18 -22
  164. pyobo/sources/unimod.py +46 -0
  165. pyobo/sources/uniprot/__init__.py +1 -1
  166. pyobo/sources/uniprot/uniprot.py +40 -32
  167. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  168. pyobo/sources/utils.py +3 -2
  169. pyobo/sources/wikipathways.py +7 -10
  170. pyobo/sources/zfin.py +5 -10
  171. pyobo/ssg/__init__.py +12 -16
  172. pyobo/ssg/base.html +0 -0
  173. pyobo/ssg/index.html +26 -13
  174. pyobo/ssg/term.html +12 -2
  175. pyobo/ssg/typedef.html +0 -0
  176. pyobo/struct/__init__.py +54 -8
  177. pyobo/struct/functional/__init__.py +1 -0
  178. pyobo/struct/functional/dsl.py +2572 -0
  179. pyobo/struct/functional/macros.py +423 -0
  180. pyobo/struct/functional/obo_to_functional.py +385 -0
  181. pyobo/struct/functional/ontology.py +272 -0
  182. pyobo/struct/functional/utils.py +112 -0
  183. pyobo/struct/reference.py +331 -136
  184. pyobo/struct/struct.py +1484 -657
  185. pyobo/struct/struct_utils.py +1078 -0
  186. pyobo/struct/typedef.py +162 -210
  187. pyobo/struct/utils.py +12 -5
  188. pyobo/struct/vocabulary.py +138 -0
  189. pyobo/utils/__init__.py +0 -0
  190. pyobo/utils/cache.py +16 -15
  191. pyobo/utils/io.py +51 -41
  192. pyobo/utils/iter.py +5 -5
  193. pyobo/utils/misc.py +41 -53
  194. pyobo/utils/ndex_utils.py +0 -0
  195. pyobo/utils/path.py +73 -70
  196. pyobo/version.py +3 -3
  197. pyobo-0.12.1.dist-info/METADATA +671 -0
  198. pyobo-0.12.1.dist-info/RECORD +201 -0
  199. pyobo-0.12.1.dist-info/WHEEL +4 -0
  200. {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
  201. pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
  202. pyobo/aws.py +0 -162
  203. pyobo/cli/aws.py +0 -47
  204. pyobo/identifier_utils.py +0 -142
  205. pyobo/normalizer.py +0 -232
  206. pyobo/registries/__init__.py +0 -16
  207. pyobo/registries/metaregistry.json +0 -507
  208. pyobo/registries/metaregistry.py +0 -135
  209. pyobo/sources/icd11.py +0 -105
  210. pyobo/xrefdb/__init__.py +0 -1
  211. pyobo/xrefdb/canonicalizer.py +0 -214
  212. pyobo/xrefdb/priority.py +0 -59
  213. pyobo/xrefdb/sources/__init__.py +0 -60
  214. pyobo/xrefdb/sources/biomappings.py +0 -36
  215. pyobo/xrefdb/sources/cbms2019.py +0 -91
  216. pyobo/xrefdb/sources/chembl.py +0 -83
  217. pyobo/xrefdb/sources/compath.py +0 -82
  218. pyobo/xrefdb/sources/famplex.py +0 -64
  219. pyobo/xrefdb/sources/gilda.py +0 -50
  220. pyobo/xrefdb/sources/intact.py +0 -113
  221. pyobo/xrefdb/sources/ncit.py +0 -133
  222. pyobo/xrefdb/sources/pubchem.py +0 -27
  223. pyobo/xrefdb/sources/wikidata.py +0 -116
  224. pyobo/xrefdb/xrefs_pipeline.py +0 -180
  225. pyobo-0.11.2.dist-info/METADATA +0 -711
  226. pyobo-0.11.2.dist-info/RECORD +0 -157
  227. pyobo-0.11.2.dist-info/WHEEL +0 -5
  228. pyobo-0.11.2.dist-info/top_level.txt +0 -1
@@ -1,17 +1,20 @@
1
1
  """Utilities or interacting with the ICD API.
2
2
 
3
- Want to get your own API cliend ID and client secret?
3
+ Want to get your own API client ID and client secret?
4
4
 
5
5
  1. Register at https://icdapihome.azurewebsites.net/icdapi/Account/Register
6
6
  2. Sell your soul to the American government
7
+
8
+ .. note::
9
+
10
+ If web requests are stalling, try deleting the ``~/.cachier`` directory.
7
11
  """
8
12
 
9
13
  import datetime
10
14
  import json
11
- import os
12
- from collections.abc import Iterable, Mapping
15
+ from collections.abc import Callable, Iterable, Mapping
13
16
  from pathlib import Path
14
- from typing import Any, Callable, Union
17
+ from typing import Any
15
18
 
16
19
  import pystow
17
20
  import requests
@@ -19,27 +22,66 @@ from cachier import cachier
19
22
  from pystow.config_api import ConfigError
20
23
  from tqdm.auto import tqdm
21
24
 
22
- from ..getters import NoBuildError
23
- from ..struct import Term
25
+ from ...getters import NoBuildError
26
+ from ...struct import Term
24
27
 
25
28
  TOKEN_URL = "https://icdaccessmanagement.who.int/connect/token" # noqa:S105
26
29
 
27
30
  ICD_BASE_URL = "https://id.who.int/icd"
28
31
 
29
32
  ICD11_TOP_LEVEL_URL = f"{ICD_BASE_URL}/entity"
33
+ ICD_11_MMS_URL = f"{ICD_BASE_URL}/release/11/2024-01/mms"
30
34
  ICD10_TOP_LEVEL_URL = f"{ICD_BASE_URL}/release/10/2016"
31
35
 
32
36
 
33
37
  def get_icd(url: str) -> requests.Response:
34
38
  """Get an ICD API endpoint."""
35
- return requests.get(url, headers=get_icd_api_headers())
39
+ headers = get_icd_api_headers()
40
+ return requests.get(url, headers=headers, timeout=5)
41
+
42
+
43
+ def get_icd_10_top(version: str, path: Path) -> dict[str, Any]:
44
+ """Get from the ICD10 top."""
45
+ if path.is_file():
46
+ return json.loads(path.read_text())
47
+ rv = get_icd(ICD10_TOP_LEVEL_URL).json()
48
+ path.write_text(json.dumps(rv, indent=2))
49
+ return rv
50
+
51
+
52
+ def get_icd_11(identifier: str) -> dict[str, Any]:
53
+ """Get from ICD11."""
54
+ return get_icd_entity(ICD11_TOP_LEVEL_URL, identifier)
55
+
56
+
57
+ def get_icd_11_mms(identifier: str) -> dict[str, Any]:
58
+ """Get from ICD11 MMS."""
59
+ return get_icd_entity(ICD_11_MMS_URL, identifier)
36
60
 
37
61
 
38
- def _get_entity(endpoint: str, identifier: str):
62
+ class ICDError(ValueError):
63
+ """An error on getting data from ICD."""
64
+
65
+ def __init__(self, identifier: str, url: str, text: str) -> None:
66
+ """Instantiate an ICD error."""
67
+ self.identifier = identifier
68
+ self.url = url
69
+ self.text = text
70
+
71
+ def __str__(self) -> str:
72
+ """Make a string for the ICD error."""
73
+ return f"[icd11:{self.identifier}] Error getting {self.url} - {self.text}. Try {ICD11_TOP_LEVEL_URL}/{self.identifier}"
74
+
75
+
76
+ def get_icd_entity(endpoint: str, identifier: str) -> dict[str, Any]:
77
+ """Query a given endpoint at ICD."""
39
78
  url = f"{endpoint}/{identifier}"
40
- # tqdm.write(f'query {identifier} at {url}')
41
79
  res = get_icd(url)
42
- return res.json()
80
+ try:
81
+ rv = res.json()
82
+ except OSError:
83
+ raise ICDError(identifier, url, res.text) from None
84
+ return rv
43
85
 
44
86
 
45
87
  def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) -> list[str]:
@@ -47,9 +89,13 @@ def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) -> list[st
47
89
  return [url[len(endpoint) :].lstrip("/") for url in res_json.get("child", [])]
48
90
 
49
91
 
50
- @cachier(stale_after=datetime.timedelta(minutes=45))
92
+ DELAY = 45
93
+
94
+
95
+ @cachier(stale_after=datetime.timedelta(minutes=DELAY))
51
96
  def get_icd_api_headers() -> Mapping[str, str]:
52
97
  """Get the headers, and refresh every hour."""
98
+ tqdm.write("Getting ICD credentials w/ PyStow")
53
99
  try:
54
100
  icd_client_id = pystow.get_config("pyobo", "icd_client_id", raise_on_missing=True)
55
101
  icd_client_secret = pystow.get_config("pyobo", "icd_client_secret", raise_on_missing=True)
@@ -58,8 +104,10 @@ def get_icd_api_headers() -> Mapping[str, str]:
58
104
 
59
105
  grant_type = "client_credentials"
60
106
  body_params = {"grant_type": grant_type}
61
- tqdm.write("getting ICD API token")
62
- res = requests.post(TOKEN_URL, data=body_params, auth=(icd_client_id, icd_client_secret))
107
+ tqdm.write(f"getting ICD API token, good for {DELAY} minutes")
108
+ res = requests.post(
109
+ TOKEN_URL, data=body_params, auth=(icd_client_id, icd_client_secret), timeout=10
110
+ )
63
111
  res_json = res.json()
64
112
  access_type = res_json["token_type"]
65
113
  access_token = res_json["access_token"]
@@ -73,7 +121,7 @@ def get_icd_api_headers() -> Mapping[str, str]:
73
121
  def visiter(
74
122
  identifier: str,
75
123
  visited_identifiers: set[str],
76
- directory: Union[str, Path],
124
+ directory: str | Path,
77
125
  *,
78
126
  endpoint: str,
79
127
  converter: Callable[[Mapping[str, Any]], Term],
@@ -84,13 +132,11 @@ def visiter(
84
132
  return
85
133
  visited_identifiers.add(identifier)
86
134
 
87
- if os.path.exists(path):
88
- with open(path) as file:
89
- res_json = json.load(file)
135
+ if path.is_file():
136
+ res_json = json.loads(path.read_text())
90
137
  else:
91
- res_json = _get_entity(endpoint, identifier)
92
- with open(path, "w") as file:
93
- json.dump(res_json, file, indent=2)
138
+ res_json = get_icd_entity(endpoint, identifier)
139
+ path.write_text(json.dumps(res_json, indent=2))
94
140
 
95
141
  yield converter(res_json)
96
142
  for identifier in get_child_identifiers(endpoint, res_json):
pyobo/sources/interpro.py CHANGED
@@ -5,7 +5,7 @@ from collections.abc import Iterable, Mapping
5
5
 
6
6
  from .utils import get_go_mapping
7
7
  from ..struct import Obo, Reference, Term
8
- from ..struct.typedef import enables, has_member
8
+ from ..struct.typedef import enables, has_category, has_member
9
9
  from ..utils.io import multisetdict
10
10
  from ..utils.path import ensure_df, ensure_path
11
11
 
@@ -30,18 +30,13 @@ class InterProGetter(Obo):
30
30
  """An ontology representation of InterPro."""
31
31
 
32
32
  ontology = bioversions_key = PREFIX
33
- typedefs = [enables, has_member]
33
+ typedefs = [enables, has_member, has_category]
34
34
 
35
35
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
36
36
  """Iterate over InterPro terms."""
37
37
  return iter_terms(version=self._version_or_raise, force=force)
38
38
 
39
39
 
40
- def get_obo(force: bool = False) -> Obo:
41
- """Get InterPro as OBO."""
42
- return InterProGetter(force=force)
43
-
44
-
45
40
  def iter_terms(*, version: str, proteins: bool = False, force: bool = False) -> Iterable[Term]:
46
41
  """Get InterPro terms."""
47
42
  parents = get_interpro_tree(version=version, force=force)
@@ -74,7 +69,7 @@ def iter_terms(*, version: str, proteins: bool = False, force: bool = False) ->
74
69
  term.append_relationship(
75
70
  enables, Reference(prefix="go", identifier=go_id, name=go_name)
76
71
  )
77
- term.append_property("type", entry_type)
72
+ term.annotate_string(has_category, entry_type)
78
73
  for uniprot_id in interpro_to_proteins.get(identifier, []):
79
74
  term.append_relationship(has_member, Reference(prefix="uniprot", identifier=uniprot_id))
80
75
  yield term
@@ -91,7 +86,7 @@ def get_interpro_tree(version: str, force: bool = False):
91
86
  """Get InterPro Data source."""
92
87
  url = f"https://ftp.ebi.ac.uk/pub/databases/interpro/releases/{version}/ParentChildTreeFile.txt"
93
88
  path = ensure_path(PREFIX, url=url, version=version, force=force)
94
- with open(path) as f:
89
+ with path.open() as f:
95
90
  return _parse_tree_helper(f)
96
91
 
97
92
 
pyobo/sources/itis.py CHANGED
@@ -43,11 +43,6 @@ class ITISGetter(Obo):
43
43
  return iter_terms(force=force, version=self._version_or_raise)
44
44
 
45
45
 
46
- def get_obo() -> Obo:
47
- """Get ITIS as OBO."""
48
- return ITISGetter()
49
-
50
-
51
46
  def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
52
47
  """Get ITIS terms."""
53
48
  zip_path = ensure_path(PREFIX, url=URL, force=force, version=version)
File without changes
pyobo/sources/kegg/api.py CHANGED
@@ -3,7 +3,7 @@
3
3
  import urllib.error
4
4
  from collections.abc import Mapping
5
5
  from dataclasses import dataclass
6
- from typing import Optional
6
+ from pathlib import Path
7
7
 
8
8
  from pyobo import Reference, Term, ensure_path
9
9
  from pyobo.struct import from_species
@@ -30,9 +30,9 @@ class KEGGGenome:
30
30
 
31
31
  identifier: str
32
32
  name: str
33
- code: Optional[str]
34
- long_code: Optional[str]
35
- taxonomy_id: Optional[str]
33
+ code: str | None
34
+ long_code: str | None
35
+ taxonomy_id: str | None
36
36
 
37
37
  def annotate_term(self, term: Term) -> None:
38
38
  """Annotate the term with the species represented by this object."""
@@ -52,7 +52,7 @@ class KEGGGenome:
52
52
  )
53
53
 
54
54
 
55
- def ensure_list_genomes(version: str) -> str:
55
+ def ensure_list_genomes(version: str) -> Path:
56
56
  """Ensure the KEGG Genome file is downloaded."""
57
57
  return ensure_path(
58
58
  KEGG_GENOME_PREFIX,
@@ -76,7 +76,7 @@ def ensure_list_pathways(version: str) -> Mapping[str, str]:
76
76
  """GENOME SPECIFIC"""
77
77
 
78
78
 
79
- def ensure_list_genome(kegg_genome_id: str, *, version: str) -> str:
79
+ def ensure_list_genome(kegg_genome_id: str, *, version: str) -> Path:
80
80
  """Get the list of genes for the given organism."""
81
81
  return ensure_path(
82
82
  KEGG_GENES_PREFIX,
@@ -87,22 +87,14 @@ def ensure_list_genome(kegg_genome_id: str, *, version: str) -> str:
87
87
  )
88
88
 
89
89
 
90
- def ensure_conv_genome_uniprot(
91
- kegg_genome_id: str, *, version: str, error_on_missing: bool = False
92
- ) -> Optional[str]:
90
+ def ensure_conv_genome_uniprot(kegg_genome_id: str, *, version: str) -> Path | None:
93
91
  """Get the KEGG-UniProt protein map for the given organism."""
94
- return _ensure_conv_genome_helper(
95
- kegg_genome_id, "uniprot", version=version, error_on_missing=error_on_missing
96
- )
92
+ return _ensure_conv_genome_helper(kegg_genome_id, "uniprot", version=version)
97
93
 
98
94
 
99
- def ensure_conv_genome_ncbigene(
100
- kegg_genome_id: str, *, version: str, error_on_missing: bool = False
101
- ) -> Optional[str]:
95
+ def ensure_conv_genome_ncbigene(kegg_genome_id: str, *, version: str) -> Path | None:
102
96
  """Get the KEGG-NCBIGENE protein map for the given organism."""
103
- return _ensure_conv_genome_helper(
104
- kegg_genome_id, "ncbi-geneid", version=version, error_on_missing=error_on_missing
105
- )
97
+ return _ensure_conv_genome_helper(kegg_genome_id, "ncbi-geneid", version=version)
106
98
 
107
99
 
108
100
  def _ensure_conv_genome_helper(
@@ -110,8 +102,7 @@ def _ensure_conv_genome_helper(
110
102
  target_database: str,
111
103
  *,
112
104
  version: str,
113
- error_on_missing: bool = False,
114
- ) -> Optional[str]:
105
+ ) -> Path | None:
115
106
  """Get the KEGG-external protein map for the given organism/database."""
116
107
  name = f"{kegg_genome_id}.tsv"
117
108
  try:
@@ -120,7 +111,6 @@ def _ensure_conv_genome_helper(
120
111
  f"conv_{target_database}",
121
112
  url=f"{BASE}/conv/{target_database}/{kegg_genome_id}",
122
113
  name=name,
123
- error_on_missing=error_on_missing,
124
114
  version=version,
125
115
  )
126
116
  except urllib.error.HTTPError:
@@ -132,42 +122,30 @@ def _ensure_conv_genome_helper(
132
122
  )
133
123
  with path_rv.open("w") as file:
134
124
  print(file=file)
135
- return path_rv.as_posix()
125
+ return path_rv
136
126
  except FileNotFoundError:
137
127
  return None
138
128
  else:
139
129
  return rv
140
130
 
141
131
 
142
- def ensure_link_pathway_genome(
143
- kegg_genome_id: str, *, version: str, error_on_missing: bool = False
144
- ) -> str:
145
- """Get the protein-pathway links for the given organism.
146
-
147
- :raises: FileNotFoundError
148
- """
132
+ def ensure_link_pathway_genome(kegg_genome_id: str, *, version: str) -> Path:
133
+ """Get the protein-pathway links for the given organism."""
149
134
  return ensure_path(
150
135
  KEGG_PATHWAY_PREFIX,
151
136
  "link_pathway",
152
137
  url=f"{BASE}/link/pathway/{kegg_genome_id}",
153
138
  name=f"{kegg_genome_id}.tsv",
154
- error_on_missing=error_on_missing,
155
139
  version=version,
156
140
  )
157
141
 
158
142
 
159
- def ensure_list_pathway_genome(
160
- kegg_genome_id: str, *, version: str, error_on_missing: bool = False
161
- ) -> str:
162
- """Get the list of pathways for the given organism.
163
-
164
- :raises: FileNotFoundError
165
- """
143
+ def ensure_list_pathway_genome(kegg_genome_id: str, *, version: str) -> Path:
144
+ """Get the list of pathways for the given organism."""
166
145
  return ensure_path(
167
146
  KEGG_PATHWAY_PREFIX,
168
147
  "pathways",
169
148
  url=f"{BASE}/list/pathway/{kegg_genome_id}",
170
149
  name=f"{kegg_genome_id}.tsv",
171
- error_on_missing=error_on_missing,
172
150
  version=version,
173
151
  )
@@ -5,10 +5,8 @@ Run with ``python -m pyobo.sources.kegg.genes``
5
5
 
6
6
  import logging
7
7
  from collections.abc import Iterable
8
- from typing import Optional
8
+ from pathlib import Path
9
9
 
10
- import click
11
- from more_click import verbose_option
12
10
  from tqdm.auto import tqdm
13
11
 
14
12
  from .api import (
@@ -42,11 +40,6 @@ class KEGGGeneGetter(Obo):
42
40
  return iter_terms(version=self._version_or_raise)
43
41
 
44
42
 
45
- def get_obo() -> Obo:
46
- """Get KEGG Genes as OBO."""
47
- return KEGGGeneGetter()
48
-
49
-
50
43
  def iter_terms(version: str) -> Iterable[Term]:
51
44
  """Iterate over terms for KEGG Genome."""
52
45
  for kegg_genome in iter_kegg_genomes(version=version, desc="KEGG Genes"):
@@ -72,9 +65,9 @@ def iter_terms(version: str) -> Iterable[Term]:
72
65
 
73
66
  def _make_terms(
74
67
  kegg_genome: KEGGGenome,
75
- list_genome_path: str,
76
- conv_uniprot_path: Optional[str] = None,
77
- conv_ncbigene_path: Optional[str] = None,
68
+ list_genome_path: Path,
69
+ conv_uniprot_path: Path | None = None,
70
+ conv_ncbigene_path: Path | None = None,
78
71
  ) -> Iterable[Term]:
79
72
  uniprot_conv = _load_conv(conv_uniprot_path, "up:") if conv_uniprot_path else {}
80
73
  ncbigene_conv = _load_conv(conv_ncbigene_path, "ncbi-geneid:") if conv_ncbigene_path else {}
@@ -101,7 +94,9 @@ def _make_terms(
101
94
 
102
95
  uniprot_xref = uniprot_conv.get(identifier)
103
96
  if uniprot_xref is not None:
104
- term.append_relationship(has_gene_product, Reference("uniprot", uniprot_xref))
97
+ term.annotate_object(
98
+ has_gene_product, Reference(prefix="uniprot", identifier=uniprot_xref)
99
+ )
105
100
 
106
101
  ncbigene_xref = ncbigene_conv.get(identifier)
107
102
  if ncbigene_xref is not None:
@@ -111,17 +106,11 @@ def _make_terms(
111
106
  yield term
112
107
 
113
108
 
114
- def _load_conv(path, value_prefix):
109
+ def _load_conv(path: Path, value_prefix):
115
110
  m = open_map_tsv(path)
116
111
  m = {k: v[len(value_prefix) :] for k, v in m.items()}
117
112
  return m
118
113
 
119
114
 
120
- @click.command()
121
- @verbose_option
122
- def _main():
123
- get_obo().write_default()
124
-
125
-
126
115
  if __name__ == "__main__":
127
- _main()
116
+ KEGGGeneGetter.cli()
@@ -42,12 +42,6 @@ class KEGGGenomeGetter(Obo):
42
42
  return iter_terms(version=self._version_or_raise)
43
43
 
44
44
 
45
- def get_obo() -> Obo:
46
- """Get KEGG Genome as OBO."""
47
- # since old kegg versions go away forever, do NOT add a force option
48
- return KEGGGenomeGetter()
49
-
50
-
51
45
  def parse_genome_line(line: str) -> KEGGGenome | None:
52
46
  """Parse a line from the KEGG Genome database."""
53
47
  if not line.startswith("T"):
@@ -94,7 +88,7 @@ def iter_kegg_genomes(version: str, desc: str) -> Iterable[KEGGGenome]:
94
88
  """Iterate over all KEGG genomes."""
95
89
  # since old kegg versions go away forever, do NOT add a force option
96
90
  path = ensure_list_genomes(version=version)
97
- with open(path) as file:
91
+ with path.open() as file:
98
92
  lines = [line.strip() for line in file]
99
93
  it = tqdm(lines, desc=desc, unit_scale=True, unit="genome")
100
94
  for line in it:
@@ -8,7 +8,6 @@ import urllib.error
8
8
  from collections import defaultdict
9
9
  from collections.abc import Iterable, Mapping
10
10
  from functools import partial
11
- from typing import Union
12
11
 
13
12
  from tqdm.auto import tqdm
14
13
  from tqdm.contrib.concurrent import thread_map
@@ -39,6 +38,7 @@ __all__ = [
39
38
  logger = logging.getLogger(__name__)
40
39
 
41
40
 
41
+ # FIXME KEGG API is not usable anymore
42
42
  class KEGGPathwayGetter(Obo):
43
43
  """An ontology representation of KEGG Pathways."""
44
44
 
@@ -51,12 +51,6 @@ class KEGGPathwayGetter(Obo):
51
51
  return iter_terms(version=self._version_or_raise)
52
52
 
53
53
 
54
- def get_obo() -> Obo:
55
- """Get KEGG Pathways as OBO."""
56
- # since old kegg versions go away forever, do NOT add a force option
57
- return KEGGPathwayGetter()
58
-
59
-
60
54
  def iter_terms(version: str, skip_missing: bool = True) -> Iterable[Term]:
61
55
  """Iterate over terms for KEGG Pathway."""
62
56
  # since old kegg versions go away forever, do NOT add a force option
@@ -135,7 +129,7 @@ def _iter_genome_terms(
135
129
  tqdm.write(f"could not find kegg.pathway:{pathway_id} for {kegg_genome.name}")
136
130
  continue
137
131
  for protein_id in protein_ids:
138
- pathway_term.append_relationship(
132
+ pathway_term.annotate_object(
139
133
  has_participant,
140
134
  Reference(
141
135
  prefix=KEGG_GENES_PREFIX,
@@ -148,26 +142,20 @@ def _iter_genome_terms(
148
142
 
149
143
  def iter_kegg_pathway_paths(
150
144
  version: str, skip_missing: bool = True
151
- ) -> Iterable[Union[tuple[KEGGGenome, str, str], tuple[None, None, None]]]:
145
+ ) -> Iterable[tuple[KEGGGenome, str, str] | tuple[None, None, None]]:
152
146
  """Get paths for the KEGG Pathway files."""
153
- genomes = list(iter_kegg_genomes(version=version, desc="KEGG Pathways"))
147
+ genomes = sorted(
148
+ iter_kegg_genomes(version=version, desc="KEGG Pathways"), key=lambda x: int(x.identifier)
149
+ )
154
150
  func = partial(_process_genome, version=version, skip_missing=skip_missing)
155
151
  return thread_map(func, genomes, unit="pathway", unit_scale=True)
156
152
 
157
153
 
158
- def _process_genome(kegg_genome, version, skip_missing):
154
+ def _process_genome(kegg_genome: KEGGGenome, version: str, skip_missing: bool):
159
155
  with logging_redirect_tqdm():
160
156
  try:
161
- list_pathway_path = ensure_list_pathway_genome(
162
- kegg_genome.identifier,
163
- version=version,
164
- error_on_missing=not skip_missing,
165
- )
166
- link_pathway_path = ensure_link_pathway_genome(
167
- kegg_genome.identifier,
168
- version=version,
169
- error_on_missing=not skip_missing,
170
- )
157
+ list_pathway_path = ensure_list_pathway_genome(kegg_genome.identifier, version=version)
158
+ link_pathway_path = ensure_link_pathway_genome(kegg_genome.identifier, version=version)
171
159
  except urllib.error.HTTPError as e:
172
160
  code = e.getcode()
173
161
  if code != 404: