pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
  203. pyobo/apps/__init__.py +0 -3
  204. pyobo/apps/cli.py +0 -24
  205. pyobo/apps/gilda/__init__.py +0 -3
  206. pyobo/apps/gilda/__main__.py +0 -8
  207. pyobo/apps/gilda/app.py +0 -48
  208. pyobo/apps/gilda/cli.py +0 -36
  209. pyobo/apps/gilda/templates/base.html +0 -33
  210. pyobo/apps/gilda/templates/home.html +0 -11
  211. pyobo/apps/gilda/templates/matches.html +0 -32
  212. pyobo/apps/mapper/__init__.py +0 -3
  213. pyobo/apps/mapper/__main__.py +0 -11
  214. pyobo/apps/mapper/cli.py +0 -37
  215. pyobo/apps/mapper/mapper.py +0 -187
  216. pyobo/apps/mapper/templates/base.html +0 -35
  217. pyobo/apps/mapper/templates/mapper_home.html +0 -64
  218. pyobo/aws.py +0 -162
  219. pyobo/cli/aws.py +0 -47
  220. pyobo/identifier_utils.py +0 -142
  221. pyobo/normalizer.py +0 -232
  222. pyobo/registries/__init__.py +0 -16
  223. pyobo/registries/metaregistry.json +0 -507
  224. pyobo/registries/metaregistry.py +0 -135
  225. pyobo/sources/icd11.py +0 -105
  226. pyobo/xrefdb/__init__.py +0 -1
  227. pyobo/xrefdb/canonicalizer.py +0 -214
  228. pyobo/xrefdb/priority.py +0 -59
  229. pyobo/xrefdb/sources/__init__.py +0 -60
  230. pyobo/xrefdb/sources/biomappings.py +0 -36
  231. pyobo/xrefdb/sources/cbms2019.py +0 -91
  232. pyobo/xrefdb/sources/chembl.py +0 -83
  233. pyobo/xrefdb/sources/compath.py +0 -82
  234. pyobo/xrefdb/sources/famplex.py +0 -64
  235. pyobo/xrefdb/sources/gilda.py +0 -50
  236. pyobo/xrefdb/sources/intact.py +0 -113
  237. pyobo/xrefdb/sources/ncit.py +0 -133
  238. pyobo/xrefdb/sources/pubchem.py +0 -27
  239. pyobo/xrefdb/sources/wikidata.py +0 -116
  240. pyobo-0.11.1.dist-info/RECORD +0 -173
  241. pyobo-0.11.1.dist-info/WHEEL +0 -5
  242. pyobo-0.11.1.dist-info/top_level.txt +0 -1
@@ -6,8 +6,6 @@ import logging
6
6
  import typing
7
7
  from collections import Counter, defaultdict
8
8
  from collections.abc import Iterable
9
- from operator import attrgetter
10
- from typing import Optional
11
9
 
12
10
  from tabulate import tabulate
13
11
  from tqdm.auto import tqdm
@@ -17,11 +15,13 @@ from pyobo.resources.so import get_so_name
17
15
  from pyobo.struct import (
18
16
  Obo,
19
17
  Reference,
20
- Synonym,
21
18
  SynonymTypeDef,
22
19
  Term,
20
+ TypeDef,
21
+ default_reference,
23
22
  from_species,
24
23
  gene_product_member_of,
24
+ has_citation,
25
25
  has_gene_product,
26
26
  member_of,
27
27
  orthologous,
@@ -42,10 +42,27 @@ DEFINITIONS_URL_FMT = (
42
42
  "hgnc_complete_set_{version}.json"
43
43
  )
44
44
 
45
- previous_symbol_type = SynonymTypeDef.from_text("previous_symbol")
46
- alias_symbol_type = SynonymTypeDef.from_text("alias_symbol")
47
- previous_name_type = SynonymTypeDef.from_text("previous_name")
48
- alias_name_type = SynonymTypeDef.from_text("alias_name")
45
+ previous_symbol_type = SynonymTypeDef(
46
+ reference=default_reference(PREFIX, "previous_symbol", name="previous symbol")
47
+ )
48
+ alias_symbol_type = SynonymTypeDef(
49
+ reference=default_reference(PREFIX, "alias_symbol", name="alias symbol")
50
+ )
51
+ previous_name_type = SynonymTypeDef(
52
+ reference=default_reference(PREFIX, "previous_name", name="previous name")
53
+ )
54
+ alias_name_type = SynonymTypeDef(
55
+ reference=default_reference(PREFIX, "alias_name", name="alias name")
56
+ )
57
+ HAS_LOCUS_TYPE = TypeDef(
58
+ reference=default_reference(PREFIX, "locus_type", name="has locus type"), is_metadata_tag=True
59
+ )
60
+ HAS_LOCUS_GROUP = TypeDef(
61
+ reference=default_reference(PREFIX, "locus_group", name="has locus group"), is_metadata_tag=True
62
+ )
63
+ HAS_LOCATION = TypeDef(
64
+ reference=default_reference(PREFIX, "location", name="has location"), is_metadata_tag=True
65
+ )
49
66
 
50
67
  #: First column is MIRIAM prefix, second column is HGNC key
51
68
  gene_xrefs = [
@@ -129,6 +146,7 @@ SKIP_KEYS = {
129
146
  "cd", # symbol
130
147
  "homeodb", # TODO add to bioregistry, though this is defunct
131
148
  "mamit-trnadb", # TODO add to bioregistry, though this is defunct
149
+ "mane_select", # TODO
132
150
  }
133
151
 
134
152
  #: A mapping from HGNC's locus_type annotations to sequence ontology identifiers
@@ -167,38 +185,8 @@ LOCUS_TYPE_TO_SO = {
167
185
  None: "0000704", # gene
168
186
  }
169
187
 
170
- IDSPACES = {
171
- prefix: f"https://bioregistry.io/{prefix}:"
172
- for prefix in {
173
- "rgd",
174
- "mgi",
175
- "eccode",
176
- "rnacentral",
177
- "pubmed",
178
- "uniprot",
179
- "mirbase",
180
- "snornabase",
181
- "hgnc",
182
- "hgnc.genegroup",
183
- "debio",
184
- "ensembl",
185
- "NCBIGene",
186
- "vega",
187
- "ucsc",
188
- "ena",
189
- "ccds",
190
- "omim",
191
- "cosmic",
192
- "merops",
193
- "orphanet",
194
- "pseudogene",
195
- "lncipedia",
196
- "refseq",
197
- }
198
- }
199
- IDSPACES.update(
200
- NCBITaxon="http://purl.obolibrary.org/obo/NCBITaxon_",
201
- SO="http://purl.obolibrary.org/obo/SO_",
188
+ PUBLICATION_TERM = Term(
189
+ reference=Reference(prefix="IAO", identifier="0000013", name="journal article")
202
190
  )
203
191
 
204
192
 
@@ -214,8 +202,11 @@ class HGNCGetter(Obo):
214
202
  orthologous,
215
203
  member_of,
216
204
  exact_match,
205
+ has_citation,
206
+ HAS_LOCUS_GROUP,
207
+ HAS_LOCUS_TYPE,
208
+ HAS_LOCATION,
217
209
  ]
218
- idspaces = IDSPACES
219
210
  synonym_typedefs = [
220
211
  previous_name_type,
221
212
  previous_symbol_type,
@@ -233,12 +224,7 @@ class HGNCGetter(Obo):
233
224
  return get_terms(force=force, version=self.data_version)
234
225
 
235
226
 
236
- def get_obo(*, force: bool = False) -> Obo:
237
- """Get HGNC as OBO."""
238
- return HGNCGetter(force=force)
239
-
240
-
241
- def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
227
+ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]:
242
228
  """Get HGNC terms."""
243
229
  if version is None:
244
230
  version = get_version("hgnc")
@@ -251,18 +237,15 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
251
237
  version=version,
252
238
  name="hgnc_complete_set.json",
253
239
  )
254
- with open(path) as file:
240
+ with path.open() as file:
255
241
  entries = json.load(file)["response"]["docs"]
256
242
 
257
243
  yield Term.from_triple("NCBITaxon", "9606", "Homo sapiens")
258
- yield from sorted(
259
- {
260
- Term(reference=Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)))
261
- for so_id in sorted(LOCUS_TYPE_TO_SO.values())
262
- if so_id
263
- },
264
- key=attrgetter("identifier"),
265
- )
244
+ _so_ids: set[str] = {s for s in LOCUS_TYPE_TO_SO.values() if s}
245
+ yield from [
246
+ Term(reference=Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)))
247
+ for so_id in sorted(_so_ids)
248
+ ]
266
249
 
267
250
  statuses = set()
268
251
  for entry in tqdm(entries, desc=f"Mapping {PREFIX}", unit="gene", unit_scale=True):
@@ -273,7 +256,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
273
256
  )
274
257
  status = entry.pop("status")
275
258
  if status == "Approved":
276
- is_obsolete = False
259
+ is_obsolete = None
277
260
  elif status not in statuses:
278
261
  statuses.add(status)
279
262
  tqdm.write(f"[{PREFIX}] unhandled {status}")
@@ -297,7 +280,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
297
280
  continue # only add concrete annotations
298
281
  term.append_relationship(
299
282
  gene_product_member_of,
300
- Reference(prefix="eccode", identifier=ec_code),
283
+ Reference(prefix="ec", identifier=ec_code),
301
284
  )
302
285
  for rna_central_ids in entry.pop("rna_central_id", []):
303
286
  for rna_central_id in rna_central_ids.split(","):
@@ -364,7 +347,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
364
347
  xref_identifiers = entry.pop(key, None)
365
348
  if xref_identifiers is None:
366
349
  continue
367
- if isinstance(xref_identifiers, (str, int)):
350
+ if isinstance(xref_identifiers, str | int):
368
351
  xref_identifiers = [str(xref_identifiers)]
369
352
 
370
353
  if xref_prefix == "merops.entry":
@@ -389,7 +372,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
389
372
 
390
373
  gene_group_ids = entry.pop("gene_group_id", [])
391
374
  gene_groups = entry.pop("gene_group", [])
392
- for gene_group_id, gene_group_label in zip(gene_group_ids, gene_groups):
375
+ for gene_group_id, gene_group_label in zip(gene_group_ids, gene_groups, strict=False):
393
376
  term.append_relationship(
394
377
  member_of,
395
378
  Reference(
@@ -400,20 +383,20 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
400
383
  )
401
384
 
402
385
  for alias_symbol in entry.pop("alias_symbol", []):
403
- term.append_synonym(Synonym(name=alias_symbol, type=alias_symbol_type))
386
+ term.append_synonym(alias_symbol, type=alias_symbol_type)
404
387
  for alias_name in entry.pop("alias_name", []):
405
- term.append_synonym(Synonym(name=alias_name, type=alias_name_type))
388
+ term.append_synonym(alias_name, type=alias_name_type)
406
389
  for previous_symbol in itt.chain(
407
390
  entry.pop("previous_symbol", []), entry.pop("prev_symbol", [])
408
391
  ):
409
- term.append_synonym(Synonym(name=previous_symbol, type=previous_symbol_type))
392
+ term.append_synonym(previous_symbol, type=previous_symbol_type)
410
393
  for previous_name in entry.pop("prev_name", []):
411
- term.append_synonym(Synonym(name=previous_name, type=previous_name_type))
394
+ term.append_synonym(previous_name, type=previous_name_type)
412
395
 
413
- for prop in ["location"]:
396
+ for prop, td in [("location", HAS_LOCATION)]:
414
397
  value = entry.pop(prop, None)
415
398
  if value:
416
- term.append_property(prop, value)
399
+ term.annotate_string(td, value)
417
400
 
418
401
  locus_type = entry.pop("locus_type")
419
402
  locus_group = entry.pop("locus_group")
@@ -425,8 +408,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
425
408
  Reference(prefix="SO", identifier="0000704", name=get_so_name("0000704"))
426
409
  ) # gene
427
410
  unhandle_locus_types[locus_type][identifier] = term
428
- term.append_property("locus_type", locus_type)
429
- term.append_property("locus_group", locus_group)
411
+ term.annotate_string(HAS_LOCUS_TYPE, locus_type)
412
+ term.annotate_string(HAS_LOCUS_GROUP, locus_group)
430
413
 
431
414
  term.set_species(identifier="9606", name="Homo sapiens")
432
415
 
@@ -453,9 +436,11 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
453
436
  hgnc_id,
454
437
  term.name,
455
438
  term.is_obsolete,
456
- term.bioregistry_link,
439
+ f"https://bioregistry.io/{term.curie}",
457
440
  ", ".join(
458
- p.bioregistry_link for p in term.provenance if p.bioregistry_link
441
+ f"https://bioregistry.io/{p.curie}"
442
+ for p in term.provenance
443
+ if isinstance(p, Reference)
459
444
  ),
460
445
  )
461
446
  for hgnc_id, term in sorted(v.items())
@@ -472,7 +457,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
472
457
  logger.warning(
473
458
  "Unhandled locus types:\n%s", tabulate(unhandle_locus_type_counter.most_common())
474
459
  )
475
- logger.warning("Unhandled keys:\n%s", tabulate(unhandled_entry_keys.most_common()))
460
+ if unhandled_entry_keys:
461
+ logger.warning("Unhandled keys:\n%s", tabulate(unhandled_entry_keys.most_common()))
476
462
 
477
463
 
478
464
  if __name__ == "__main__":
@@ -5,16 +5,9 @@ from collections.abc import Iterable, Mapping
5
5
 
6
6
  import pandas as pd
7
7
 
8
- from ..struct import (
9
- Obo,
10
- Reference,
11
- Synonym,
12
- SynonymTypeDef,
13
- Term,
14
- enables,
15
- from_species,
16
- )
17
- from ..utils.path import ensure_path
8
+ from ...struct import Obo, Reference, SynonymTypeDef, Term, has_citation
9
+ from ...struct.typedef import enables, exact_match, from_species
10
+ from ...utils.path import ensure_path
18
11
 
19
12
  __all__ = [
20
13
  "HGNCGroupGetter",
@@ -36,18 +29,13 @@ class HGNCGroupGetter(Obo):
36
29
  ontology = PREFIX
37
30
  bioversions_key = "hgnc"
38
31
  synonym_typedefs = [symbol_type]
39
- typedefs = [from_species, enables]
32
+ typedefs = [from_species, enables, exact_match, has_citation]
40
33
 
41
34
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
42
35
  """Iterate over terms in the ontology."""
43
36
  return get_terms(force=force)
44
37
 
45
38
 
46
- def get_obo(force: bool = False) -> Obo:
47
- """Get HGNC Gene Groups as OBO."""
48
- return HGNCGroupGetter(force=force)
49
-
50
-
51
39
  def get_hierarchy(force: bool = False) -> Mapping[str, list[str]]:
52
40
  """Get the HGNC Gene Families hierarchy as a dictionary."""
53
41
  path = ensure_path(PREFIX, url=HIERARCHY_URL, force=force)
@@ -99,12 +87,14 @@ def _get_terms_helper(force: bool = False) -> Iterable[Term]:
99
87
  )
100
88
  if pubmed_ids and pd.notna(pubmed_ids):
101
89
  for s in pubmed_ids.replace(" ", ",").split(","):
102
- term.append_provenance(Reference(prefix="pubmed", identifier=s.strip()))
90
+ s = s.strip()
91
+ if s:
92
+ term.append_provenance(Reference(prefix="pubmed", identifier=s))
103
93
  if desc_go and pd.notna(desc_go):
104
94
  go_id = desc_go[len("http://purl.uniprot.org/go/") :]
105
95
  term.append_relationship(enables, Reference(prefix="GO", identifier=go_id))
106
96
  if symbol and pd.notna(symbol):
107
- term.append_synonym(Synonym(name=symbol, type=symbol_type))
97
+ term.append_synonym(symbol, type=symbol_type)
108
98
  term.set_species(identifier="9606", name="Homo sapiens")
109
99
  yield term
110
100
 
@@ -0,0 +1,9 @@
1
+ """Resources from ICD."""
2
+
3
+ from .icd10 import ICD10Getter
4
+ from .icd11 import ICD11Getter
5
+
6
+ __all__ = [
7
+ "ICD10Getter",
8
+ "ICD11Getter",
9
+ ]
@@ -1,24 +1,27 @@
1
1
  """Convert ICD-10 to OBO.
2
2
 
3
- Run with python -m pyobo.sources.icd10 -v
3
+ Run with ``python -m pyobo.sources.icd10 -v``.
4
+
5
+ .. note::
6
+
7
+ If web requests are stalling, try deleting the ``~/.cachier`` directory.
4
8
  """
5
9
 
6
10
  import logging
7
11
  from collections.abc import Iterable, Mapping
12
+ from pathlib import Path
8
13
  from typing import Any
9
14
 
10
- import click
11
- from more_click import verbose_option
12
15
  from tqdm.auto import tqdm
13
16
 
14
- from ..sources.icd_utils import (
17
+ from .icd_utils import (
15
18
  ICD10_TOP_LEVEL_URL,
16
19
  get_child_identifiers,
17
- get_icd,
20
+ get_icd_10_top,
18
21
  visiter,
19
22
  )
20
- from ..struct import Obo, Reference, Synonym, Term
21
- from ..utils.path import prefix_directory_join
23
+ from ...struct import Obo, Reference, Synonym, Term, has_category
24
+ from ...utils.path import prefix_directory_join
22
25
 
23
26
  __all__ = [
24
27
  "ICD10Getter",
@@ -34,37 +37,39 @@ class ICD10Getter(Obo):
34
37
  """An ontology representation of ICD-10."""
35
38
 
36
39
  ontology = PREFIX
37
- dynamic_version = True
40
+ static_version = VERSION
41
+ typedefs = [has_category]
38
42
 
39
43
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
40
44
  """Iterate over terms in the ontology."""
41
- return iter_terms()
45
+ return iter_terms(self._version_or_raise)
42
46
 
43
47
 
44
- def get_obo() -> Obo:
45
- """Get ICD-10 as OBO."""
46
- return ICD10Getter()
48
+ def _get_chapters(version: str, path: Path):
49
+ res_json = get_icd_10_top(version=version, path=path)
50
+ chapter_urls = res_json["child"]
51
+ tqdm.write(f"there are {len(chapter_urls)} chapters")
52
+ identifiers = get_child_identifiers(ICD10_TOP_LEVEL_URL, res_json)
53
+ return identifiers
47
54
 
48
55
 
49
- def iter_terms() -> Iterable[Term]:
56
+ def iter_terms(version: str) -> Iterable[Term]:
50
57
  """Iterate over ICD-10 terms."""
51
- r = get_icd(ICD10_TOP_LEVEL_URL)
52
- res_json = r.json()
53
-
54
- directory = prefix_directory_join(PREFIX, version=VERSION)
55
-
56
- chapter_urls = res_json["child"]
57
- tqdm.write(f"there are {len(chapter_urls)} chapters")
58
+ directory = prefix_directory_join(PREFIX, version=version)
59
+ identifiers = _get_chapters(version=version, path=directory.joinpath("top.json"))
58
60
 
59
61
  visited_identifiers: set[str] = set()
60
- for identifier in get_child_identifiers(ICD10_TOP_LEVEL_URL, res_json):
61
- yield from visiter(
62
- identifier,
63
- visited_identifiers,
64
- directory,
65
- endpoint=ICD10_TOP_LEVEL_URL,
66
- converter=_extract_icd10,
67
- )
62
+ with tqdm(desc=f"[{PREFIX}]") as pbar:
63
+ for identifier in identifiers:
64
+ for term in visiter(
65
+ identifier,
66
+ visited_identifiers,
67
+ directory,
68
+ endpoint=ICD10_TOP_LEVEL_URL,
69
+ converter=_extract_icd10,
70
+ ):
71
+ pbar.update(1)
72
+ yield term
68
73
 
69
74
 
70
75
  def _extract_icd10(res_json: Mapping[str, Any]) -> Term:
@@ -81,17 +86,10 @@ def _extract_icd10(res_json: Mapping[str, Any]) -> Term:
81
86
  synonyms=synonyms,
82
87
  parents=parents,
83
88
  )
84
-
85
- rv.append_property("class_kind", res_json["classKind"])
89
+ rv.annotate_string(has_category, res_json["classKind"])
86
90
 
87
91
  return rv
88
92
 
89
93
 
90
- @click.command()
91
- @verbose_option
92
- def _main():
93
- get_obo().write_default(use_tqdm=True)
94
-
95
-
96
94
  if __name__ == "__main__":
97
- _main()
95
+ ICD10Getter.cli()
@@ -0,0 +1,148 @@
1
+ """Convert ICD11 to OBO.
2
+
3
+ Run with ``python -m pyobo.sources.icd11 -v``.
4
+
5
+ .. note::
6
+
7
+ If web requests are stalling, try deleting the ``~/.cachier`` directory.
8
+ """
9
+
10
+ import json
11
+ import logging
12
+ from collections.abc import Iterable, Mapping
13
+ from typing import Any
14
+
15
+ from tqdm.auto import tqdm
16
+
17
+ from .icd_utils import (
18
+ ICD11_TOP_LEVEL_URL,
19
+ ICDError,
20
+ get_child_identifiers,
21
+ get_icd,
22
+ get_icd_11_mms,
23
+ visiter,
24
+ )
25
+ from ...struct import Obo, Reference, Synonym, Term, TypeDef, default_reference
26
+ from ...utils.path import prefix_directory_join
27
+
28
+ __all__ = [
29
+ "ICD11Getter",
30
+ ]
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ PREFIX = "icd11"
35
+ CODE_PREFIX = "icd11.code"
36
+
37
+ CODE_PROP = TypeDef(reference=default_reference(PREFIX, "icd_mms_code"), is_metadata_tag=True)
38
+
39
+
40
+ class ICD11Getter(Obo):
41
+ """An ontology representation of ICD-11."""
42
+
43
+ ontology = PREFIX
44
+ typedefs = [CODE_PROP]
45
+ dynamic_version = True
46
+
47
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
48
+ """Iterate over terms in the ontology."""
49
+ return iterate_icd11()
50
+
51
+
52
+ def iterate_icd11(version: str | None = None) -> Iterable[Term]:
53
+ """Iterate over the terms in ICD11 and enrich them with MMS."""
54
+ # Get all terms from the ICD foundation API
55
+ version_strict, terms = _get_icd11_terms_helper(version=version)
56
+
57
+ # prepare a directory for enriching from MMS
58
+ mms_directory = prefix_directory_join(PREFIX, "mms", version=version_strict)
59
+
60
+ # this takes a bit more than 2 hours
61
+ for term in tqdm(terms, desc="Getting MMS", unit_scale=True):
62
+ path = mms_directory.joinpath(term.identifier).with_suffix(".json")
63
+ if path.exists():
64
+ mms_data = json.loads(path.read_text())
65
+ else:
66
+ try:
67
+ mms_data = get_icd_11_mms(term.identifier)
68
+ except ICDError:
69
+ # writing this isn't necessary since not all terms have MMS entries
70
+ # tqdm.write(str(e))
71
+ mms_data = {}
72
+ path.write_text(json.dumps(mms_data))
73
+
74
+ if code := mms_data.get("code"):
75
+ term.append_exact_match(Reference(prefix=CODE_PREFIX, identifier=code))
76
+
77
+ yield term
78
+
79
+
80
+ def _get_icd11_terms_helper(version: str | None = None) -> tuple[str, list[Term]]:
81
+ """Iterate over the terms in ICD11.
82
+
83
+ The API doesn't seem to have a rate limit, but returns pretty slow. This means that
84
+ it only gets results at at about 5 calls/second. Get ready to be patient - the API
85
+ token expires every hour so there's a caching mechanism with :mod:`cachier` that
86
+ gets a new one every hour.
87
+ """
88
+ if version is not None:
89
+ directory = prefix_directory_join(PREFIX, "base", version=version)
90
+ top_path = directory.joinpath("top.json")
91
+ if top_path.is_file():
92
+ res_json = json.loads(top_path.read_text())
93
+ else:
94
+ res_json = get_icd(ICD11_TOP_LEVEL_URL).json()
95
+ top_path.write_text(json.dumps(res_json, indent=2))
96
+ else:
97
+ tqdm.write("No version passed, looking up version from ICD11")
98
+ res_json = get_icd(ICD11_TOP_LEVEL_URL).json()
99
+ version = res_json["releaseId"]
100
+ directory = prefix_directory_join(PREFIX, "base", version=version)
101
+ top_path = directory.joinpath("top.json")
102
+ with top_path.open("w") as file:
103
+ json.dump(res_json, file, indent=2)
104
+
105
+ tqdm.write(f"There are {len(res_json['child'])} top level entities")
106
+
107
+ visited_identifiers: set[str] = set()
108
+ rv: list[Term] = []
109
+ for identifier in get_child_identifiers(ICD11_TOP_LEVEL_URL, res_json):
110
+ rv.extend(
111
+ visiter(
112
+ identifier,
113
+ visited_identifiers,
114
+ directory,
115
+ endpoint=ICD11_TOP_LEVEL_URL,
116
+ converter=_extract_icd11,
117
+ )
118
+ )
119
+
120
+ return version, rv
121
+
122
+
123
+ def _extract_icd11(res_json: Mapping[str, Any]) -> Term:
124
+ identifier = res_json["@id"][len(ICD11_TOP_LEVEL_URL) :].lstrip("/")
125
+ if "definition" in res_json:
126
+ definition = res_json["definition"]["@value"]
127
+ definition = definition.strip().replace("\r\n", " ")
128
+ definition = definition.strip().replace("\\n", " ")
129
+ definition = definition.strip().replace("\n", " ")
130
+ else:
131
+ definition = None
132
+ name = res_json["title"]["@value"]
133
+ synonyms = [Synonym(synonym["label"]["@value"]) for synonym in res_json.get("synonym", [])]
134
+ parents = [
135
+ Reference(prefix=PREFIX, identifier=url[len("http://id.who.int/icd/entity/") :])
136
+ for url in res_json["parent"]
137
+ if url[len("http://id.who.int/icd/entity/") :]
138
+ ]
139
+ return Term(
140
+ reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
141
+ definition=definition,
142
+ synonyms=synonyms,
143
+ parents=parents,
144
+ )
145
+
146
+
147
+ if __name__ == "__main__":
148
+ ICD11Getter.cli()