pyobo 0.10.12__py3-none-any.whl → 0.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. pyobo/__init__.py +0 -2
  2. pyobo/__main__.py +0 -2
  3. pyobo/api/__init__.py +0 -2
  4. pyobo/api/alts.py +6 -7
  5. pyobo/api/hierarchy.py +14 -15
  6. pyobo/api/metadata.py +3 -4
  7. pyobo/api/names.py +31 -32
  8. pyobo/api/properties.py +6 -7
  9. pyobo/api/relations.py +12 -11
  10. pyobo/api/species.py +5 -6
  11. pyobo/api/typedefs.py +1 -3
  12. pyobo/api/utils.py +61 -5
  13. pyobo/api/xrefs.py +4 -5
  14. pyobo/aws.py +3 -5
  15. pyobo/cli/__init__.py +0 -2
  16. pyobo/cli/aws.py +0 -2
  17. pyobo/cli/cli.py +0 -4
  18. pyobo/cli/database.py +1 -3
  19. pyobo/cli/lookup.py +0 -2
  20. pyobo/cli/utils.py +0 -2
  21. pyobo/constants.py +1 -33
  22. pyobo/getters.py +19 -26
  23. pyobo/gilda_utils.py +19 -17
  24. pyobo/identifier_utils.py +10 -10
  25. pyobo/mocks.py +5 -6
  26. pyobo/normalizer.py +24 -24
  27. pyobo/obographs.py +8 -5
  28. pyobo/plugins.py +3 -4
  29. pyobo/py.typed +0 -0
  30. pyobo/reader.py +19 -21
  31. pyobo/registries/__init__.py +0 -2
  32. pyobo/registries/metaregistry.py +6 -8
  33. pyobo/resource_utils.py +1 -3
  34. pyobo/resources/__init__.py +0 -2
  35. pyobo/resources/ncbitaxon.py +2 -3
  36. pyobo/resources/ro.py +2 -4
  37. pyobo/resources/so.py +55 -0
  38. pyobo/resources/so.tsv +2604 -0
  39. pyobo/sources/README.md +15 -0
  40. pyobo/sources/__init__.py +0 -2
  41. pyobo/sources/agrovoc.py +3 -3
  42. pyobo/sources/antibodyregistry.py +2 -3
  43. pyobo/sources/biogrid.py +4 -4
  44. pyobo/sources/ccle.py +3 -4
  45. pyobo/sources/cgnc.py +1 -3
  46. pyobo/sources/chebi.py +2 -4
  47. pyobo/sources/chembl.py +1 -3
  48. pyobo/sources/civic_gene.py +2 -3
  49. pyobo/sources/complexportal.py +57 -20
  50. pyobo/sources/conso.py +2 -4
  51. pyobo/sources/cpt.py +1 -3
  52. pyobo/sources/credit.py +1 -1
  53. pyobo/sources/cvx.py +1 -3
  54. pyobo/sources/depmap.py +3 -4
  55. pyobo/sources/dictybase_gene.py +15 -12
  56. pyobo/sources/drugbank.py +6 -7
  57. pyobo/sources/drugbank_salt.py +3 -4
  58. pyobo/sources/drugcentral.py +9 -8
  59. pyobo/sources/expasy.py +33 -16
  60. pyobo/sources/famplex.py +3 -5
  61. pyobo/sources/flybase.py +5 -6
  62. pyobo/sources/geonames.py +1 -1
  63. pyobo/sources/gmt_utils.py +5 -6
  64. pyobo/sources/go.py +4 -6
  65. pyobo/sources/gwascentral_phenotype.py +1 -3
  66. pyobo/sources/gwascentral_study.py +2 -3
  67. pyobo/sources/hgnc.py +30 -26
  68. pyobo/sources/hgncgenefamily.py +9 -11
  69. pyobo/sources/icd10.py +3 -4
  70. pyobo/sources/icd11.py +3 -4
  71. pyobo/sources/icd_utils.py +6 -7
  72. pyobo/sources/interpro.py +3 -5
  73. pyobo/sources/itis.py +1 -3
  74. pyobo/sources/kegg/__init__.py +0 -2
  75. pyobo/sources/kegg/api.py +3 -4
  76. pyobo/sources/kegg/genes.py +3 -4
  77. pyobo/sources/kegg/genome.py +19 -9
  78. pyobo/sources/kegg/pathway.py +5 -6
  79. pyobo/sources/mesh.py +19 -21
  80. pyobo/sources/mgi.py +1 -3
  81. pyobo/sources/mirbase.py +13 -9
  82. pyobo/sources/mirbase_constants.py +0 -2
  83. pyobo/sources/mirbase_family.py +1 -3
  84. pyobo/sources/mirbase_mature.py +1 -3
  85. pyobo/sources/msigdb.py +4 -5
  86. pyobo/sources/ncbigene.py +3 -5
  87. pyobo/sources/npass.py +2 -4
  88. pyobo/sources/omim_ps.py +1 -3
  89. pyobo/sources/pathbank.py +35 -28
  90. pyobo/sources/pfam.py +1 -3
  91. pyobo/sources/pfam_clan.py +1 -3
  92. pyobo/sources/pid.py +3 -5
  93. pyobo/sources/pombase.py +7 -6
  94. pyobo/sources/pubchem.py +2 -3
  95. pyobo/sources/reactome.py +30 -11
  96. pyobo/sources/rgd.py +3 -4
  97. pyobo/sources/rhea.py +7 -8
  98. pyobo/sources/ror.py +3 -2
  99. pyobo/sources/selventa/__init__.py +0 -2
  100. pyobo/sources/selventa/schem.py +1 -3
  101. pyobo/sources/selventa/scomp.py +1 -3
  102. pyobo/sources/selventa/sdis.py +1 -3
  103. pyobo/sources/selventa/sfam.py +1 -3
  104. pyobo/sources/sgd.py +1 -3
  105. pyobo/sources/slm.py +29 -17
  106. pyobo/sources/umls/__init__.py +0 -2
  107. pyobo/sources/umls/__main__.py +0 -2
  108. pyobo/sources/umls/get_synonym_types.py +1 -1
  109. pyobo/sources/umls/umls.py +2 -4
  110. pyobo/sources/uniprot/__init__.py +0 -2
  111. pyobo/sources/uniprot/uniprot.py +11 -10
  112. pyobo/sources/uniprot/uniprot_ptm.py +6 -5
  113. pyobo/sources/utils.py +3 -5
  114. pyobo/sources/wikipathways.py +1 -3
  115. pyobo/sources/zfin.py +20 -9
  116. pyobo/ssg/__init__.py +3 -2
  117. pyobo/struct/__init__.py +0 -2
  118. pyobo/struct/reference.py +22 -23
  119. pyobo/struct/struct.py +132 -116
  120. pyobo/struct/typedef.py +14 -10
  121. pyobo/struct/utils.py +0 -2
  122. pyobo/utils/__init__.py +0 -2
  123. pyobo/utils/cache.py +14 -6
  124. pyobo/utils/io.py +9 -10
  125. pyobo/utils/iter.py +5 -6
  126. pyobo/utils/misc.py +1 -3
  127. pyobo/utils/ndex_utils.py +6 -7
  128. pyobo/utils/path.py +4 -5
  129. pyobo/version.py +3 -5
  130. pyobo/xrefdb/__init__.py +0 -2
  131. pyobo/xrefdb/canonicalizer.py +27 -18
  132. pyobo/xrefdb/priority.py +0 -2
  133. pyobo/xrefdb/sources/__init__.py +3 -4
  134. pyobo/xrefdb/sources/biomappings.py +0 -2
  135. pyobo/xrefdb/sources/cbms2019.py +0 -2
  136. pyobo/xrefdb/sources/chembl.py +0 -2
  137. pyobo/xrefdb/sources/compath.py +1 -3
  138. pyobo/xrefdb/sources/famplex.py +3 -5
  139. pyobo/xrefdb/sources/gilda.py +0 -2
  140. pyobo/xrefdb/sources/intact.py +5 -5
  141. pyobo/xrefdb/sources/ncit.py +1 -3
  142. pyobo/xrefdb/sources/pubchem.py +2 -5
  143. pyobo/xrefdb/sources/wikidata.py +2 -4
  144. pyobo/xrefdb/xrefs_pipeline.py +15 -16
  145. {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/LICENSE +1 -1
  146. pyobo-0.11.1.dist-info/METADATA +711 -0
  147. pyobo-0.11.1.dist-info/RECORD +173 -0
  148. {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/WHEEL +1 -1
  149. pyobo-0.11.1.dist-info/entry_points.txt +2 -0
  150. pyobo-0.10.12.dist-info/METADATA +0 -499
  151. pyobo-0.10.12.dist-info/RECORD +0 -169
  152. pyobo-0.10.12.dist-info/entry_points.txt +0 -15
  153. {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/top_level.txt +0 -0
pyobo/sources/famplex.py CHANGED
@@ -1,10 +1,8 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for FamPlex."""
4
2
 
5
3
  import logging
6
4
  from collections import defaultdict
7
- from typing import Iterable, List, Mapping, Tuple
5
+ from collections.abc import Iterable, Mapping
8
6
 
9
7
  import bioregistry
10
8
  from pystow.utils import get_commit
@@ -62,7 +60,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
62
60
  dtype=str,
63
61
  force=force,
64
62
  )
65
- id_to_definition: Mapping[str, Tuple[str, str]] = {
63
+ id_to_definition: Mapping[str, tuple[str, str]] = {
66
64
  identifier: (definition, provenance)
67
65
  for identifier, provenance, definition in definitions_df.values
68
66
  }
@@ -140,7 +138,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
140
138
  yield term
141
139
 
142
140
 
143
- def _get_xref_df(version: str) -> Mapping[str, List[Reference]]:
141
+ def _get_xref_df(version: str) -> Mapping[str, list[Reference]]:
144
142
  base_url = f"https://raw.githubusercontent.com/sorgerlab/famplex/{version}"
145
143
  xrefs_url = f"{base_url}/equivalences.csv"
146
144
  xrefs_df = ensure_df(PREFIX, url=xrefs_url, version=version, header=None, sep=",", dtype=str)
pyobo/sources/flybase.py CHANGED
@@ -1,14 +1,13 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for FlyBase Genes."""
4
2
 
5
3
  import logging
6
- from typing import Iterable, Mapping, Set
4
+ from collections.abc import Iterable, Mapping
7
5
 
8
6
  import pandas as pd
9
7
  from tqdm.auto import tqdm
10
8
 
11
9
  from pyobo import Reference
10
+ from pyobo.resources.so import get_so_name
12
11
  from pyobo.struct import Obo, Term, from_species, orthologous
13
12
  from pyobo.utils.io import multisetdict
14
13
  from pyobo.utils.path import ensure_df
@@ -68,7 +67,7 @@ def _get_definitions(version: str, force: bool = False) -> Mapping[str, str]:
68
67
  return dict(df.values)
69
68
 
70
69
 
71
- def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, Set[str]]:
70
+ def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, set[str]]:
72
71
  url = (
73
72
  f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/"
74
73
  f"orthologs/dmel_human_orthologs_disease_fb_{version}.tsv.gz"
@@ -135,7 +134,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
135
134
  "FlyBase gene type is missing mapping to Sequence Ontology (SO): %s", gtype
136
135
  )
137
136
  else:
138
- so[gtype] = Reference.auto("SO", so_id)
137
+ so[gtype] = Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id))
139
138
 
140
139
  for _, reference in sorted(so.items()):
141
140
  yield Term(reference=reference)
@@ -155,7 +154,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
155
154
  for hgnc_curie in human_orthologs.get(identifier, []):
156
155
  if not hgnc_curie or pd.isna(hgnc_curie):
157
156
  continue
158
- hgnc_ortholog = Reference.from_curie(hgnc_curie, auto=True)
157
+ hgnc_ortholog = Reference.from_curie(hgnc_curie)
159
158
  if hgnc_ortholog is None:
160
159
  tqdm.write(f"[{PREFIX}] {identifier} had invalid ortholog: {hgnc_curie}")
161
160
  else:
pyobo/sources/geonames.py CHANGED
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import logging
6
- from typing import Collection, Iterable, Mapping
6
+ from collections.abc import Collection, Iterable, Mapping
7
7
 
8
8
  import pandas as pd
9
9
  from pystow.utils import read_zipfile_csv
@@ -1,12 +1,11 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """GMT utilities."""
4
2
 
3
+ from collections.abc import Iterable
5
4
  from pathlib import Path
6
- from typing import Iterable, Set, Tuple, Union
5
+ from typing import Union
7
6
 
8
- GMTSummary = Tuple[str, str, Set[str]]
9
- WikiPathwaysGMTSummary = Tuple[str, str, str, str, str, Set[str]]
7
+ GMTSummary = tuple[str, str, set[str]]
8
+ WikiPathwaysGMTSummary = tuple[str, str, str, str, str, set[str]]
10
9
 
11
10
 
12
11
  def parse_gmt_file(path: Union[str, Path]) -> Iterable[GMTSummary]:
@@ -20,7 +19,7 @@ def parse_gmt_file(path: Union[str, Path]) -> Iterable[GMTSummary]:
20
19
  yield _process_line(line)
21
20
 
22
21
 
23
- def _process_line(line: str) -> Tuple[str, str, Set[str]]:
22
+ def _process_line(line: str) -> tuple[str, str, set[str]]:
24
23
  """Return the pathway name, url, and gene sets associated.
25
24
 
26
25
  :param line: gmt file line
pyobo/sources/go.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Gene Ontology."""
4
2
 
5
3
  from pyobo import get_descendants
@@ -14,13 +12,13 @@ __all__ = [
14
12
  def is_biological_process(identifier: str) -> bool:
15
13
  """Return if the given GO identifier is a biological process.
16
14
 
17
- >>> is_biological_process('0006915')
15
+ >>> is_biological_process("0006915")
18
16
  True
19
- >>> is_biological_process('GO:0006915')
17
+ >>> is_biological_process("GO:0006915")
20
18
  True
21
- >>> is_molecular_function('0006915')
19
+ >>> is_molecular_function("0006915")
22
20
  False
23
- >>> is_cellular_component('0006915')
21
+ >>> is_cellular_component("0006915")
24
22
  False
25
23
  """
26
24
  return _is_descendant(identifier, "0008150")
@@ -1,9 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for GWAS Central Phenotypes."""
4
2
 
5
3
  import json
6
- from typing import Iterable
4
+ from collections.abc import Iterable
7
5
 
8
6
  from tqdm.auto import tqdm, trange
9
7
 
@@ -1,10 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for GWAS Central."""
4
2
 
5
3
  import logging
6
4
  import tarfile
7
- from typing import Iterable, Optional
5
+ from collections.abc import Iterable
6
+ from typing import Optional
8
7
  from xml.etree import ElementTree
9
8
 
10
9
  from pyobo.struct import Obo, Reference, Term, has_part
pyobo/sources/hgnc.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for HGNC."""
4
2
 
5
3
  import itertools as itt
@@ -7,13 +5,15 @@ import json
7
5
  import logging
8
6
  import typing
9
7
  from collections import Counter, defaultdict
8
+ from collections.abc import Iterable
10
9
  from operator import attrgetter
11
- from typing import DefaultDict, Dict, Iterable, Optional
10
+ from typing import Optional
12
11
 
13
12
  from tabulate import tabulate
14
13
  from tqdm.auto import tqdm
15
14
 
16
15
  from pyobo.api.utils import get_version
16
+ from pyobo.resources.so import get_so_name
17
17
  from pyobo.struct import (
18
18
  Obo,
19
19
  Reference,
@@ -38,8 +38,8 @@ logger = logging.getLogger(__name__)
38
38
 
39
39
  PREFIX = "hgnc"
40
40
  DEFINITIONS_URL_FMT = (
41
- "http://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/"
42
- "archive/monthly/json/hgnc_complete_set_{version}.json"
41
+ "https://storage.googleapis.com/public-download-files/hgnc/archive/archive/monthly/json/"
42
+ "hgnc_complete_set_{version}.json"
43
43
  )
44
44
 
45
45
  previous_symbol_type = SynonymTypeDef.from_text("previous_symbol")
@@ -223,7 +223,7 @@ class HGNCGetter(Obo):
223
223
  alias_symbol_type,
224
224
  ]
225
225
  root_terms = [
226
- Reference(prefix="so", identifier=so_id)
226
+ Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id))
227
227
  for so_id in sorted(set(LOCUS_TYPE_TO_SO.values()))
228
228
  if so_id
229
229
  ]
@@ -238,12 +238,12 @@ def get_obo(*, force: bool = False) -> Obo:
238
238
  return HGNCGetter(force=force)
239
239
 
240
240
 
241
- def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]: # noqa:C901
241
+ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
242
242
  """Get HGNC terms."""
243
243
  if version is None:
244
244
  version = get_version("hgnc")
245
245
  unhandled_entry_keys: typing.Counter[str] = Counter()
246
- unhandle_locus_types: DefaultDict[str, Dict[str, Term]] = defaultdict(dict)
246
+ unhandle_locus_types: defaultdict[str, dict[str, Term]] = defaultdict(dict)
247
247
  path = ensure_path(
248
248
  PREFIX,
249
249
  url=DEFINITIONS_URL_FMT.format(version=version),
@@ -257,7 +257,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
257
257
  yield Term.from_triple("NCBITaxon", "9606", "Homo sapiens")
258
258
  yield from sorted(
259
259
  {
260
- Term(reference=Reference.auto("SO", so_id))
260
+ Term(reference=Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)))
261
261
  for so_id in sorted(LOCUS_TYPE_TO_SO.values())
262
262
  if so_id
263
263
  },
@@ -364,23 +364,25 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
364
364
  xref_identifiers = entry.pop(key, None)
365
365
  if xref_identifiers is None:
366
366
  continue
367
-
368
367
  if isinstance(xref_identifiers, (str, int)):
368
+ xref_identifiers = [str(xref_identifiers)]
369
+
370
+ if xref_prefix == "merops.entry":
371
+ continue
372
+ # e.g., XM02-001 should be rewritten as XM02.001
373
+ xref_identifiers = [i.replace("-", ".") for i in xref_identifiers]
374
+
375
+ if xref_prefix == "refseq":
376
+ # e.g., strip off dots without substantiated record versions like in NM_021728.
377
+ xref_identifiers = [i.strip(".") for i in xref_identifiers]
378
+
379
+ if len(xref_identifiers) == 1:
369
380
  term.append_exact_match(
370
- Reference(prefix=xref_prefix, identifier=str(xref_identifiers))
381
+ Reference(prefix=xref_prefix, identifier=str(xref_identifiers[0]))
371
382
  )
372
- elif isinstance(xref_identifiers, list):
373
- if len(xref_identifiers) == 1:
374
- term.append_exact_match(
375
- Reference(prefix=xref_prefix, identifier=str(xref_identifiers[0]))
376
- )
377
- else:
378
- for xref_identifier in xref_identifiers:
379
- term.append_xref(
380
- Reference(prefix=xref_prefix, identifier=str(xref_identifier))
381
- )
382
383
  else:
383
- raise TypeError
384
+ for xref_identifier in xref_identifiers:
385
+ term.append_xref(Reference(prefix=xref_prefix, identifier=str(xref_identifier)))
384
386
 
385
387
  for pubmed_id in entry.pop("pubmed_id", []):
386
388
  term.append_provenance(Reference(prefix="pubmed", identifier=str(pubmed_id)))
@@ -417,9 +419,11 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
417
419
  locus_group = entry.pop("locus_group")
418
420
  so_id = LOCUS_TYPE_TO_SO.get(locus_type)
419
421
  if so_id:
420
- term.append_parent(Reference.auto("SO", so_id))
422
+ term.append_parent(Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)))
421
423
  else:
422
- term.append_parent(Reference.auto("SO", "0000704")) # gene
424
+ term.append_parent(
425
+ Reference(prefix="SO", identifier="0000704", name=get_so_name("0000704"))
426
+ ) # gene
423
427
  unhandle_locus_types[locus_type][identifier] = term
424
428
  term.append_property("locus_type", locus_type)
425
429
  term.append_property("locus_group", locus_group)
@@ -459,8 +463,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
459
463
  headers=["hgnc_id", "name", "obsolete", "link", "provenance"],
460
464
  tablefmt="github",
461
465
  )
462
- print(f"## {k} ({len(v)})", file=file) # noqa: T201
463
- print(t, "\n", file=file) # noqa: T201
466
+ print(f"## {k} ({len(v)})", file=file)
467
+ print(t, "\n", file=file)
464
468
 
465
469
  unhandle_locus_type_counter = Counter(
466
470
  {locus_type: len(d) for locus_type, d in unhandle_locus_types.items()}
@@ -1,9 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for HGNC Gene Families."""
4
2
 
5
3
  from collections import defaultdict
6
- from typing import Iterable, List, Mapping
4
+ from collections.abc import Iterable, Mapping
7
5
 
8
6
  import pandas as pd
9
7
 
@@ -23,13 +21,13 @@ __all__ = [
23
21
  ]
24
22
 
25
23
  PREFIX = "hgnc.genegroup"
26
- FAMILIES_URL = "ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/csv/genefamily_db_tables/family.csv"
24
+ FAMILIES_URL = "https://storage.googleapis.com/public-download-files/hgnc/csv/csv/genefamily_db_tables/family.csv"
27
25
  # TODO use family_alias.csv
28
- HIERARCHY_URL = (
29
- "ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/csv/genefamily_db_tables/hierarchy.csv"
30
- )
26
+ HIERARCHY_URL = "https://storage.googleapis.com/public-download-files/hgnc/csv/csv/genefamily_db_tables/hierarchy.csv"
31
27
 
32
- symbol_type = SynonymTypeDef.from_text("symbol")
28
+ symbol_type = SynonymTypeDef(
29
+ reference=Reference(prefix="OMO", identifier="0004000", name="has symbol")
30
+ )
33
31
 
34
32
 
35
33
  class HGNCGroupGetter(Obo):
@@ -50,7 +48,7 @@ def get_obo(force: bool = False) -> Obo:
50
48
  return HGNCGroupGetter(force=force)
51
49
 
52
50
 
53
- def get_hierarchy(force: bool = False) -> Mapping[str, List[str]]:
51
+ def get_hierarchy(force: bool = False) -> Mapping[str, list[str]]:
54
52
  """Get the HGNC Gene Families hierarchy as a dictionary."""
55
53
  path = ensure_path(PREFIX, url=HIERARCHY_URL, force=force)
56
54
  df = pd.read_csv(path, dtype={"parent_fam_id": str, "child_fam_id": str})
@@ -80,7 +78,7 @@ def get_terms(force: bool = False) -> Iterable[Term]:
80
78
  name=parent.name,
81
79
  )
82
80
  )
83
- gene_group = Reference.auto("SO", "0005855")
81
+ gene_group = Reference(prefix="SO", identifier="0005855", name="gene group")
84
82
  yield Term(reference=gene_group)
85
83
  for term in terms:
86
84
  if not term.parents:
@@ -100,7 +98,7 @@ def _get_terms_helper(force: bool = False) -> Iterable[Term]:
100
98
  definition=definition,
101
99
  )
102
100
  if pubmed_ids and pd.notna(pubmed_ids):
103
- for s in pubmed_ids.split(","):
101
+ for s in pubmed_ids.replace(" ", ",").split(","):
104
102
  term.append_provenance(Reference(prefix="pubmed", identifier=s.strip()))
105
103
  if desc_go and pd.notna(desc_go):
106
104
  go_id = desc_go[len("http://purl.uniprot.org/go/") :]
pyobo/sources/icd10.py CHANGED
@@ -1,12 +1,11 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Convert ICD-10 to OBO.
4
2
 
5
3
  Run with python -m pyobo.sources.icd10 -v
6
4
  """
7
5
 
8
6
  import logging
9
- from typing import Any, Iterable, Mapping, Set
7
+ from collections.abc import Iterable, Mapping
8
+ from typing import Any
10
9
 
11
10
  import click
12
11
  from more_click import verbose_option
@@ -57,7 +56,7 @@ def iter_terms() -> Iterable[Term]:
57
56
  chapter_urls = res_json["child"]
58
57
  tqdm.write(f"there are {len(chapter_urls)} chapters")
59
58
 
60
- visited_identifiers: Set[str] = set()
59
+ visited_identifiers: set[str] = set()
61
60
  for identifier in get_child_identifiers(ICD10_TOP_LEVEL_URL, res_json):
62
61
  yield from visiter(
63
62
  identifier,
pyobo/sources/icd11.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Convert ICD11 to OBO.
4
2
 
5
3
  Run with python -m pyobo.sources.icd11 -v
@@ -8,7 +6,8 @@ Run with python -m pyobo.sources.icd11 -v
8
6
  import json
9
7
  import logging
10
8
  import os
11
- from typing import Any, Iterable, Mapping, Set
9
+ from collections.abc import Iterable, Mapping
10
+ from typing import Any
12
11
 
13
12
  import click
14
13
  from more_click import verbose_option
@@ -67,7 +66,7 @@ def iterate_icd11() -> Iterable[Term]:
67
66
 
68
67
  tqdm.write(f'There are {len(res_json["child"])} top level entities')
69
68
 
70
- visited_identifiers: Set[str] = set()
69
+ visited_identifiers: set[str] = set()
71
70
  for identifier in get_child_identifiers(ICD11_TOP_LEVEL_URL, res_json):
72
71
  yield from visiter(
73
72
  identifier,
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Utilities or interacting with the ICD API.
4
2
 
5
3
  Want to get your own API cliend ID and client secret?
@@ -11,8 +9,9 @@ Want to get your own API cliend ID and client secret?
11
9
  import datetime
12
10
  import json
13
11
  import os
12
+ from collections.abc import Iterable, Mapping
14
13
  from pathlib import Path
15
- from typing import Any, Callable, Iterable, List, Mapping, Set, Union
14
+ from typing import Any, Callable, Union
16
15
 
17
16
  import pystow
18
17
  import requests
@@ -20,7 +19,7 @@ from cachier import cachier
20
19
  from pystow.config_api import ConfigError
21
20
  from tqdm.auto import tqdm
22
21
 
23
- from ..getters import NoBuild
22
+ from ..getters import NoBuildError
24
23
  from ..struct import Term
25
24
 
26
25
  TOKEN_URL = "https://icdaccessmanagement.who.int/connect/token" # noqa:S105
@@ -43,7 +42,7 @@ def _get_entity(endpoint: str, identifier: str):
43
42
  return res.json()
44
43
 
45
44
 
46
- def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) -> List[str]:
45
+ def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) -> list[str]:
47
46
  """Ge the child identifiers."""
48
47
  return [url[len(endpoint) :].lstrip("/") for url in res_json.get("child", [])]
49
48
 
@@ -55,7 +54,7 @@ def get_icd_api_headers() -> Mapping[str, str]:
55
54
  icd_client_id = pystow.get_config("pyobo", "icd_client_id", raise_on_missing=True)
56
55
  icd_client_secret = pystow.get_config("pyobo", "icd_client_secret", raise_on_missing=True)
57
56
  except ConfigError as e:
58
- raise NoBuild from e
57
+ raise NoBuildError from e
59
58
 
60
59
  grant_type = "client_credentials"
61
60
  body_params = {"grant_type": grant_type}
@@ -73,7 +72,7 @@ def get_icd_api_headers() -> Mapping[str, str]:
73
72
 
74
73
  def visiter(
75
74
  identifier: str,
76
- visited_identifiers: Set[str],
75
+ visited_identifiers: set[str],
77
76
  directory: Union[str, Path],
78
77
  *,
79
78
  endpoint: str,
pyobo/sources/interpro.py CHANGED
@@ -1,9 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for InterPro."""
4
2
 
5
3
  from collections import defaultdict
6
- from typing import DefaultDict, Iterable, List, Mapping, Set, Tuple
4
+ from collections.abc import Iterable, Mapping
7
5
 
8
6
  from .utils import get_go_mapping
9
7
  from ..struct import Obo, Reference, Term
@@ -82,7 +80,7 @@ def iter_terms(*, version: str, proteins: bool = False, force: bool = False) ->
82
80
  yield term
83
81
 
84
82
 
85
- def get_interpro_go_df(version: str, force: bool = False) -> Mapping[str, Set[Tuple[str, str]]]:
83
+ def get_interpro_go_df(version: str, force: bool = False) -> Mapping[str, set[tuple[str, str]]]:
86
84
  """Get InterPro to Gene Ontology molecular function mapping."""
87
85
  url = f"https://ftp.ebi.ac.uk/pub/databases/interpro/releases/{version}/interpro2go"
88
86
  path = ensure_path(PREFIX, url=url, name="interpro2go.tsv", version=version, force=force)
@@ -98,7 +96,7 @@ def get_interpro_tree(version: str, force: bool = False):
98
96
 
99
97
 
100
98
  def _parse_tree_helper(lines: Iterable[str]):
101
- rv1: DefaultDict[str, List[str]] = defaultdict(list)
99
+ rv1: defaultdict[str, list[str]] = defaultdict(list)
102
100
  previous_depth, previous_id = 0, ""
103
101
  stack = [previous_id]
104
102
 
pyobo/sources/itis.py CHANGED
@@ -1,13 +1,11 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for the Integrated Taxonomic Information System (ITIS)."""
4
2
 
5
3
  import os
6
4
  import shutil
7
5
  import sqlite3
8
6
  import zipfile
7
+ from collections.abc import Iterable
9
8
  from contextlib import closing
10
- from typing import Iterable
11
9
 
12
10
  from pyobo.struct import Obo, Reference, Term
13
11
  from pyobo.utils.io import multidict
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """KEGG Databases."""
4
2
 
5
3
  from .genes import KEGGGeneGetter
pyobo/sources/kegg/api.py CHANGED
@@ -1,10 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """API utilities for KEGG."""
4
2
 
5
3
  import urllib.error
4
+ from collections.abc import Mapping
6
5
  from dataclasses import dataclass
7
- from typing import Mapping, Optional
6
+ from typing import Optional
8
7
 
9
8
  from pyobo import Reference, Term, ensure_path
10
9
  from pyobo.struct import from_species
@@ -132,7 +131,7 @@ def _ensure_conv_genome_helper(
132
131
  version=version,
133
132
  )
134
133
  with path_rv.open("w") as file:
135
- print(file=file) # noqa: T201
134
+ print(file=file)
136
135
  return path_rv.as_posix()
137
136
  except FileNotFoundError:
138
137
  return None
@@ -1,12 +1,11 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Convert KEGG Genes to OBO.
4
2
 
5
3
  Run with ``python -m pyobo.sources.kegg.genes``
6
4
  """
7
5
 
8
6
  import logging
9
- from typing import Iterable, Optional
7
+ from collections.abc import Iterable
8
+ from typing import Optional
10
9
 
11
10
  import click
12
11
  from more_click import verbose_option
@@ -90,7 +89,7 @@ def _make_terms(
90
89
  )
91
90
  continue
92
91
  if ";" in line:
93
- *_extras, name = [part.strip() for part in extras.split(";")]
92
+ *_extras, name = (part.strip() for part in extras.split(";"))
94
93
  else:
95
94
  name = extras
96
95
 
@@ -1,12 +1,12 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Convert KEGG Genome to OBO.
4
2
 
5
3
  Run with ``python -m pyobo.sources.kegg.genome``
6
4
  """
7
5
 
6
+ from __future__ import annotations
7
+
8
8
  import logging
9
- from typing import Iterable
9
+ from collections.abc import Iterable
10
10
 
11
11
  from tqdm.auto import tqdm
12
12
 
@@ -48,8 +48,11 @@ def get_obo() -> Obo:
48
48
  return KEGGGenomeGetter()
49
49
 
50
50
 
51
- def parse_genome_line(line: str) -> KEGGGenome:
51
+ def parse_genome_line(line: str) -> KEGGGenome | None:
52
52
  """Parse a line from the KEGG Genome database."""
53
+ if not line.startswith("T"):
54
+ # This is for an NCBI Taxonomy
55
+ return None
53
56
  line = line.strip()
54
57
  identifier, rest = _s(line, "\t")
55
58
  identifier = identifier[len("gn:") :]
@@ -96,6 +99,8 @@ def iter_kegg_genomes(version: str, desc: str) -> Iterable[KEGGGenome]:
96
99
  it = tqdm(lines, desc=desc, unit_scale=True, unit="genome")
97
100
  for line in it:
98
101
  yv = parse_genome_line(line)
102
+ if yv is None:
103
+ continue
99
104
  it.set_postfix({"id": yv.identifier, "name": yv.name})
100
105
  yield yv
101
106
 
@@ -107,11 +112,16 @@ def iter_terms(version: str) -> Iterable[Term]:
107
112
  for kegg_genome in iter_kegg_genomes(version=version, desc="KEGG Genomes"):
108
113
  if kegg_genome.identifier in SKIP:
109
114
  continue
110
- term = Term.from_triple(
111
- prefix=KEGG_GENOME_PREFIX,
112
- identifier=kegg_genome.identifier,
113
- name=kegg_genome.name,
114
- )
115
+
116
+ try:
117
+ reference = Reference(
118
+ prefix=KEGG_GENOME_PREFIX, identifier=kegg_genome.identifier, name=kegg_genome.name
119
+ )
120
+ except ValueError:
121
+ tqdm.write(f"[{KEGG_GENOME_PREFIX}] invalid identifier: {kegg_genome}")
122
+ continue
123
+
124
+ term = Term(reference=reference)
115
125
  if kegg_genome.taxonomy_id is not None:
116
126
  taxonomy_name = get_ncbitaxon_name(kegg_genome.taxonomy_id)
117
127
  if taxonomy_name is None:
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Convert KEGG Pathways to OBO.
4
2
 
5
3
  Run with ``python -m pyobo.sources.kegg.pathway``
@@ -8,8 +6,9 @@ Run with ``python -m pyobo.sources.kegg.pathway``
8
6
  import logging
9
7
  import urllib.error
10
8
  from collections import defaultdict
9
+ from collections.abc import Iterable, Mapping
11
10
  from functools import partial
12
- from typing import Iterable, List, Mapping, Tuple, Union
11
+ from typing import Union
13
12
 
14
13
  from tqdm.auto import tqdm
15
14
  from tqdm.contrib.concurrent import thread_map
@@ -76,7 +75,7 @@ def iter_terms(version: str, skip_missing: bool = True) -> Iterable[Term]:
76
75
  )
77
76
 
78
77
 
79
- def _get_link_pathway_map(path: str) -> Mapping[str, List[str]]:
78
+ def _get_link_pathway_map(path: str) -> Mapping[str, list[str]]:
80
79
  rv = defaultdict(list)
81
80
  with open(path) as file:
82
81
  for line in file:
@@ -110,7 +109,7 @@ def _iter_genome_terms(
110
109
  list_pathway_lines = [line.strip() for line in file]
111
110
  for line in list_pathway_lines:
112
111
  line = line.strip()
113
- pathway_id, name = [part.strip() for part in line.split("\t")]
112
+ pathway_id, name = (part.strip() for part in line.split("\t"))
114
113
  pathway_id = pathway_id[len("path:") :]
115
114
 
116
115
  terms[pathway_id] = term = Term.from_triple(
@@ -149,7 +148,7 @@ def _iter_genome_terms(
149
148
 
150
149
  def iter_kegg_pathway_paths(
151
150
  version: str, skip_missing: bool = True
152
- ) -> Iterable[Union[Tuple[KEGGGenome, str, str], Tuple[None, None, None]]]:
151
+ ) -> Iterable[Union[tuple[KEGGGenome, str, str], tuple[None, None, None]]]:
153
152
  """Get paths for the KEGG Pathway files."""
154
153
  genomes = list(iter_kegg_genomes(version=version, desc="KEGG Pathways"))
155
154
  func = partial(_process_genome, version=version, skip_missing=skip_missing)