pyobo 0.10.12__py3-none-any.whl → 0.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. pyobo/__init__.py +0 -2
  2. pyobo/__main__.py +0 -2
  3. pyobo/api/__init__.py +0 -2
  4. pyobo/api/alts.py +6 -7
  5. pyobo/api/hierarchy.py +14 -15
  6. pyobo/api/metadata.py +3 -4
  7. pyobo/api/names.py +31 -32
  8. pyobo/api/properties.py +6 -7
  9. pyobo/api/relations.py +12 -11
  10. pyobo/api/species.py +5 -6
  11. pyobo/api/typedefs.py +1 -3
  12. pyobo/api/utils.py +61 -5
  13. pyobo/api/xrefs.py +4 -5
  14. pyobo/aws.py +3 -5
  15. pyobo/cli/__init__.py +0 -2
  16. pyobo/cli/aws.py +0 -2
  17. pyobo/cli/cli.py +0 -4
  18. pyobo/cli/database.py +1 -3
  19. pyobo/cli/lookup.py +0 -2
  20. pyobo/cli/utils.py +0 -2
  21. pyobo/constants.py +1 -33
  22. pyobo/getters.py +19 -26
  23. pyobo/gilda_utils.py +19 -17
  24. pyobo/identifier_utils.py +10 -10
  25. pyobo/mocks.py +5 -6
  26. pyobo/normalizer.py +24 -24
  27. pyobo/obographs.py +8 -5
  28. pyobo/plugins.py +3 -4
  29. pyobo/py.typed +0 -0
  30. pyobo/reader.py +19 -21
  31. pyobo/registries/__init__.py +0 -2
  32. pyobo/registries/metaregistry.py +6 -8
  33. pyobo/resource_utils.py +1 -3
  34. pyobo/resources/__init__.py +0 -2
  35. pyobo/resources/ncbitaxon.py +2 -3
  36. pyobo/resources/ro.py +2 -4
  37. pyobo/resources/so.py +55 -0
  38. pyobo/resources/so.tsv +2604 -0
  39. pyobo/sources/README.md +15 -0
  40. pyobo/sources/__init__.py +0 -2
  41. pyobo/sources/agrovoc.py +3 -3
  42. pyobo/sources/antibodyregistry.py +2 -3
  43. pyobo/sources/biogrid.py +4 -4
  44. pyobo/sources/ccle.py +3 -4
  45. pyobo/sources/cgnc.py +1 -3
  46. pyobo/sources/chebi.py +2 -4
  47. pyobo/sources/chembl.py +1 -3
  48. pyobo/sources/civic_gene.py +2 -3
  49. pyobo/sources/complexportal.py +57 -20
  50. pyobo/sources/conso.py +2 -4
  51. pyobo/sources/cpt.py +1 -3
  52. pyobo/sources/credit.py +1 -1
  53. pyobo/sources/cvx.py +1 -3
  54. pyobo/sources/depmap.py +3 -4
  55. pyobo/sources/dictybase_gene.py +15 -12
  56. pyobo/sources/drugbank.py +6 -7
  57. pyobo/sources/drugbank_salt.py +3 -4
  58. pyobo/sources/drugcentral.py +9 -8
  59. pyobo/sources/expasy.py +33 -16
  60. pyobo/sources/famplex.py +3 -5
  61. pyobo/sources/flybase.py +5 -6
  62. pyobo/sources/geonames.py +1 -1
  63. pyobo/sources/gmt_utils.py +5 -6
  64. pyobo/sources/go.py +4 -6
  65. pyobo/sources/gwascentral_phenotype.py +1 -3
  66. pyobo/sources/gwascentral_study.py +2 -3
  67. pyobo/sources/hgnc.py +30 -26
  68. pyobo/sources/hgncgenefamily.py +9 -11
  69. pyobo/sources/icd10.py +3 -4
  70. pyobo/sources/icd11.py +3 -4
  71. pyobo/sources/icd_utils.py +6 -7
  72. pyobo/sources/interpro.py +3 -5
  73. pyobo/sources/itis.py +1 -3
  74. pyobo/sources/kegg/__init__.py +0 -2
  75. pyobo/sources/kegg/api.py +3 -4
  76. pyobo/sources/kegg/genes.py +3 -4
  77. pyobo/sources/kegg/genome.py +19 -9
  78. pyobo/sources/kegg/pathway.py +5 -6
  79. pyobo/sources/mesh.py +19 -21
  80. pyobo/sources/mgi.py +1 -3
  81. pyobo/sources/mirbase.py +13 -9
  82. pyobo/sources/mirbase_constants.py +0 -2
  83. pyobo/sources/mirbase_family.py +1 -3
  84. pyobo/sources/mirbase_mature.py +1 -3
  85. pyobo/sources/msigdb.py +4 -5
  86. pyobo/sources/ncbigene.py +3 -5
  87. pyobo/sources/npass.py +2 -4
  88. pyobo/sources/omim_ps.py +1 -3
  89. pyobo/sources/pathbank.py +35 -28
  90. pyobo/sources/pfam.py +1 -3
  91. pyobo/sources/pfam_clan.py +1 -3
  92. pyobo/sources/pid.py +3 -5
  93. pyobo/sources/pombase.py +7 -6
  94. pyobo/sources/pubchem.py +2 -3
  95. pyobo/sources/reactome.py +30 -11
  96. pyobo/sources/rgd.py +3 -4
  97. pyobo/sources/rhea.py +7 -8
  98. pyobo/sources/ror.py +3 -2
  99. pyobo/sources/selventa/__init__.py +0 -2
  100. pyobo/sources/selventa/schem.py +1 -3
  101. pyobo/sources/selventa/scomp.py +1 -3
  102. pyobo/sources/selventa/sdis.py +1 -3
  103. pyobo/sources/selventa/sfam.py +1 -3
  104. pyobo/sources/sgd.py +1 -3
  105. pyobo/sources/slm.py +29 -17
  106. pyobo/sources/umls/__init__.py +0 -2
  107. pyobo/sources/umls/__main__.py +0 -2
  108. pyobo/sources/umls/get_synonym_types.py +1 -1
  109. pyobo/sources/umls/umls.py +2 -4
  110. pyobo/sources/uniprot/__init__.py +0 -2
  111. pyobo/sources/uniprot/uniprot.py +11 -10
  112. pyobo/sources/uniprot/uniprot_ptm.py +6 -5
  113. pyobo/sources/utils.py +3 -5
  114. pyobo/sources/wikipathways.py +1 -3
  115. pyobo/sources/zfin.py +20 -9
  116. pyobo/ssg/__init__.py +3 -2
  117. pyobo/struct/__init__.py +0 -2
  118. pyobo/struct/reference.py +22 -23
  119. pyobo/struct/struct.py +132 -116
  120. pyobo/struct/typedef.py +14 -10
  121. pyobo/struct/utils.py +0 -2
  122. pyobo/utils/__init__.py +0 -2
  123. pyobo/utils/cache.py +14 -6
  124. pyobo/utils/io.py +9 -10
  125. pyobo/utils/iter.py +5 -6
  126. pyobo/utils/misc.py +1 -3
  127. pyobo/utils/ndex_utils.py +6 -7
  128. pyobo/utils/path.py +4 -5
  129. pyobo/version.py +3 -5
  130. pyobo/xrefdb/__init__.py +0 -2
  131. pyobo/xrefdb/canonicalizer.py +27 -18
  132. pyobo/xrefdb/priority.py +0 -2
  133. pyobo/xrefdb/sources/__init__.py +3 -4
  134. pyobo/xrefdb/sources/biomappings.py +0 -2
  135. pyobo/xrefdb/sources/cbms2019.py +0 -2
  136. pyobo/xrefdb/sources/chembl.py +0 -2
  137. pyobo/xrefdb/sources/compath.py +1 -3
  138. pyobo/xrefdb/sources/famplex.py +3 -5
  139. pyobo/xrefdb/sources/gilda.py +0 -2
  140. pyobo/xrefdb/sources/intact.py +5 -5
  141. pyobo/xrefdb/sources/ncit.py +1 -3
  142. pyobo/xrefdb/sources/pubchem.py +2 -5
  143. pyobo/xrefdb/sources/wikidata.py +2 -4
  144. pyobo/xrefdb/xrefs_pipeline.py +15 -16
  145. {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/LICENSE +1 -1
  146. pyobo-0.11.1.dist-info/METADATA +711 -0
  147. pyobo-0.11.1.dist-info/RECORD +173 -0
  148. {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/WHEEL +1 -1
  149. pyobo-0.11.1.dist-info/entry_points.txt +2 -0
  150. pyobo-0.10.12.dist-info/METADATA +0 -499
  151. pyobo-0.10.12.dist-info/RECORD +0 -169
  152. pyobo-0.10.12.dist-info/entry_points.txt +0 -15
  153. {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/top_level.txt +0 -0
pyobo/aws.py CHANGED
@@ -1,10 +1,8 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Interface for caching data on AWS S3."""
4
2
 
5
3
  import logging
6
4
  import os
7
- from typing import Optional, Set
5
+ from typing import Optional
8
6
 
9
7
  import boto3
10
8
  import humanize
@@ -57,8 +55,8 @@ def download_artifacts(bucket: str, suffix: Optional[str] = None) -> None:
57
55
 
58
56
  def upload_artifacts(
59
57
  bucket: str,
60
- whitelist: Optional[Set[str]] = None,
61
- blacklist: Optional[Set[str]] = None,
58
+ whitelist: Optional[set[str]] = None,
59
+ blacklist: Optional[set[str]] = None,
62
60
  s3_client=None,
63
61
  ) -> None:
64
62
  """Upload all artifacts to AWS."""
pyobo/cli/__init__.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """CLI for PyOBO."""
4
2
 
5
3
  from .cli import main
pyobo/cli/aws.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """CLI for PyOBO's interface to S3."""
4
2
 
5
3
  import click
pyobo/cli/cli.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """CLI for PyOBO."""
4
2
 
5
3
  import logging
@@ -15,7 +13,6 @@ from tabulate import tabulate
15
13
  from .aws import main as aws_main
16
14
  from .database import main as database_main
17
15
  from .lookup import lookup
18
- from ..apps.cli import main as apps_main
19
16
  from ..constants import RAW_DIRECTORY
20
17
  from ..plugins import has_nomenclature_plugin, iter_nomenclature_plugins
21
18
  from ..registries import iter_cached_obo
@@ -116,7 +113,6 @@ def remapping(file):
116
113
 
117
114
 
118
115
  main.add_command(lookup)
119
- main.add_command(apps_main)
120
116
  main.add_command(aws_main)
121
117
  main.add_command(database_main)
122
118
 
pyobo/cli/database.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """CLI for PyOBO Database Generation."""
4
2
 
5
3
  import logging
@@ -317,7 +315,7 @@ def properties(directory: str, zenodo: bool, force: bool, no_strict: bool):
317
315
  @zenodo_option
318
316
  @force_option
319
317
  @no_strict_option
320
- def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool): # noqa: D202
318
+ def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool):
321
319
  """Make the prefix-identifier-xref dump."""
322
320
  with logging_redirect_tqdm():
323
321
  paths = db_output_helper(
pyobo/cli/lookup.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """CLI for PyOBO lookups."""
4
2
 
5
3
  import json
pyobo/cli/utils.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Utilities for the CLI."""
4
2
 
5
3
  import datetime
pyobo/constants.py CHANGED
@@ -1,10 +1,6 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Constants for PyOBO."""
4
2
 
5
- import json
6
3
  import logging
7
- import os
8
4
  import re
9
5
 
10
6
  import pystow
@@ -13,7 +9,6 @@ __all__ = [
13
9
  "RAW_DIRECTORY",
14
10
  "DATABASE_DIRECTORY",
15
11
  "SPECIES_REMAPPING",
16
- "VERSION_PINS",
17
12
  ]
18
13
 
19
14
  logger = logging.getLogger(__name__)
@@ -34,6 +29,7 @@ GLOBAL_SKIP = {
34
29
  "resid", # deprecated
35
30
  "adw", # deprecated
36
31
  }
32
+ GLOBAL_CHECK_IDS = False
37
33
 
38
34
  #: Default prefix
39
35
  DEFAULT_PREFIX = "debio"
@@ -101,31 +97,3 @@ PROVENANCE_PREFIXES = {
101
97
  "isbn",
102
98
  "issn",
103
99
  }
104
-
105
- # Load version pin dictionary from the environmental variable VERSION_PINS
106
- try:
107
- VERSION_PINS_STR = os.getenv("VERSION_PINS")
108
- if not VERSION_PINS_STR:
109
- VERSION_PINS = {}
110
- else:
111
- VERSION_PINS = json.loads(VERSION_PINS_STR)
112
- for k, v in VERSION_PINS.items():
113
- if not isinstance(k, str) or not isinstance(v, str):
114
- logger.error("The prefix and version name must both be " "strings")
115
- VERSION_PINS = {}
116
- break
117
- except ValueError as e:
118
- logger.error(
119
- "The value for the environment variable VERSION_PINS must be a valid JSON string: %s" % e
120
- )
121
- VERSION_PINS = {}
122
-
123
- if VERSION_PINS:
124
- logger.debug(
125
- f"These are the resource versions that are pinned.\n{VERSION_PINS}. "
126
- f"\nPyobo will download the latest version of a resource if it's "
127
- f"not pinned.\nIf you want to use a specific version of a "
128
- f"resource, edit your VERSION_PINS environmental "
129
- f"variable which is a JSON string to include a prefix and version "
130
- f"name."
131
- )
pyobo/getters.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Utilities for OBO files."""
4
2
 
5
3
  import datetime
@@ -11,16 +9,11 @@ import subprocess
11
9
  import typing
12
10
  import urllib.error
13
11
  from collections import Counter
12
+ from collections.abc import Iterable, Mapping, Sequence
14
13
  from pathlib import Path
15
14
  from typing import (
16
15
  Callable,
17
- Iterable,
18
- List,
19
- Mapping,
20
16
  Optional,
21
- Sequence,
22
- Set,
23
- Tuple,
24
17
  TypeVar,
25
18
  Union,
26
19
  )
@@ -30,7 +23,7 @@ from bioontologies import robot
30
23
  from tqdm.auto import tqdm
31
24
 
32
25
  from .constants import DATABASE_DIRECTORY
33
- from .identifier_utils import MissingPrefix, wrap_norm_prefix
26
+ from .identifier_utils import MissingPrefixError, wrap_norm_prefix
34
27
  from .plugins import has_nomenclature_plugin, run_nomenclature_plugin
35
28
  from .struct import Obo
36
29
  from .utils.io import get_writer
@@ -39,17 +32,17 @@ from .version import get_git_hash, get_version
39
32
 
40
33
  __all__ = [
41
34
  "get_ontology",
42
- "NoBuild",
35
+ "NoBuildError",
43
36
  ]
44
37
 
45
38
  logger = logging.getLogger(__name__)
46
39
 
47
40
 
48
- class NoBuild(RuntimeError):
41
+ class NoBuildError(RuntimeError):
49
42
  """Base exception for being unable to build."""
50
43
 
51
44
 
52
- class UnhandledFormat(NoBuild):
45
+ class UnhandledFormatError(NoBuildError):
53
46
  """Only OWL is available."""
54
47
 
55
48
 
@@ -117,7 +110,7 @@ def get_ontology(
117
110
 
118
111
  ontology_format, path = _ensure_ontology_path(prefix, force=force, version=version)
119
112
  if path is None:
120
- raise NoBuild(prefix)
113
+ raise NoBuildError(prefix)
121
114
  elif ontology_format == "obo":
122
115
  pass # all gucci
123
116
  elif ontology_format == "owl":
@@ -127,7 +120,7 @@ def get_ontology(
127
120
  robot.convert(path, _converted_obo_path, check=robot_check)
128
121
  path = _converted_obo_path
129
122
  else:
130
- raise UnhandledFormat(f"[{prefix}] unhandled ontology file format: {path.suffix}")
123
+ raise UnhandledFormatError(f"[{prefix}] unhandled ontology file format: {path.suffix}")
131
124
 
132
125
  from .reader import from_obo_path
133
126
 
@@ -147,8 +140,8 @@ def get_ontology(
147
140
 
148
141
  def _ensure_ontology_path(
149
142
  prefix: str, force, version
150
- ) -> Union[Tuple[str, Path], Tuple[None, None]]:
151
- for ontology_format, url in [ # noqa:B007
143
+ ) -> Union[tuple[str, Path], tuple[None, None]]:
144
+ for ontology_format, url in [
152
145
  ("obo", bioregistry.get_obo_download(prefix)),
153
146
  ("owl", bioregistry.get_owl_download(prefix)),
154
147
  ("json", bioregistry.get_json_download(prefix)),
@@ -246,7 +239,7 @@ def iter_helper(
246
239
  leave: bool = False,
247
240
  strict: bool = True,
248
241
  **kwargs,
249
- ) -> Iterable[Tuple[str, str, X]]:
242
+ ) -> Iterable[tuple[str, str, X]]:
250
243
  """Yield all mappings extracted from each database given."""
251
244
  for prefix, mapping in iter_helper_helper(f, strict=strict, **kwargs):
252
245
  it = tqdm(
@@ -266,7 +259,7 @@ def _prefixes(
266
259
  skip_below: Optional[str] = None,
267
260
  skip_below_inclusive: bool = True,
268
261
  skip_pyobo: bool = False,
269
- skip_set: Optional[Set[str]] = None,
262
+ skip_set: Optional[set[str]] = None,
270
263
  ) -> Iterable[str]:
271
264
  for prefix, resource in sorted(bioregistry.read_registry().items()):
272
265
  if resource.no_own_terms:
@@ -299,10 +292,10 @@ def iter_helper_helper(
299
292
  skip_below: Optional[str] = None,
300
293
  skip_below_inclusive: bool = True,
301
294
  skip_pyobo: bool = False,
302
- skip_set: Optional[Set[str]] = None,
295
+ skip_set: Optional[set[str]] = None,
303
296
  strict: bool = True,
304
297
  **kwargs,
305
- ) -> Iterable[Tuple[str, X]]:
298
+ ) -> Iterable[tuple[str, X]]:
306
299
  """Yield all mappings extracted from each database given.
307
300
 
308
301
  :param f: A function that takes a prefix and gives back something that will be used by an outer function.
@@ -342,13 +335,13 @@ def iter_helper_helper(
342
335
  logger.warning("[%s] unable to download", prefix)
343
336
  if strict and not bioregistry.is_deprecated(prefix):
344
337
  raise
345
- except MissingPrefix as e:
338
+ except MissingPrefixError as e:
346
339
  logger.warning("[%s] missing prefix: %s", prefix, e)
347
340
  if strict and not bioregistry.is_deprecated(prefix):
348
341
  raise e
349
342
  except subprocess.CalledProcessError:
350
343
  logger.warning("[%s] ROBOT was unable to convert OWL to OBO", prefix)
351
- except UnhandledFormat as e:
344
+ except UnhandledFormatError as e:
352
345
  logger.warning("[%s] %s", prefix, e)
353
346
  except ValueError as e:
354
347
  if _is_xml(e):
@@ -390,7 +383,7 @@ def _prep_dir(directory: Union[None, str, pathlib.Path]) -> pathlib.Path:
390
383
 
391
384
 
392
385
  def db_output_helper(
393
- f: Callable[..., Iterable[Tuple[str, ...]]],
386
+ f: Callable[..., Iterable[tuple[str, ...]]],
394
387
  db_name: str,
395
388
  columns: Sequence[str],
396
389
  *,
@@ -399,7 +392,7 @@ def db_output_helper(
399
392
  use_gzip: bool = True,
400
393
  summary_detailed: Optional[Sequence[int]] = None,
401
394
  **kwargs,
402
- ) -> List[pathlib.Path]:
395
+ ) -> list[pathlib.Path]:
403
396
  """Help output database builds.
404
397
 
405
398
  :param f: A function that takes a prefix and gives back something that will be used by an outer function.
@@ -413,7 +406,7 @@ def db_output_helper(
413
406
  directory = _prep_dir(directory)
414
407
 
415
408
  c: typing.Counter[str] = Counter()
416
- c_detailed: typing.Counter[Tuple[str, ...]] = Counter()
409
+ c_detailed: typing.Counter[tuple[str, ...]] = Counter()
417
410
 
418
411
  if use_gzip:
419
412
  db_path = directory.joinpath(f"{db_name}.tsv.gz")
@@ -475,7 +468,7 @@ def db_output_helper(
475
468
  indent=2,
476
469
  )
477
470
 
478
- rv: List[pathlib.Path] = [
471
+ rv: list[pathlib.Path] = [
479
472
  db_metadata_path,
480
473
  db_path,
481
474
  db_sample_path,
pyobo/gilda_utils.py CHANGED
@@ -1,10 +1,10 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """PyOBO's Gilda utilities."""
4
2
 
3
+ from __future__ import annotations
4
+
5
5
  import logging
6
+ from collections.abc import Iterable
6
7
  from subprocess import CalledProcessError
7
- from typing import Iterable, List, Optional, Tuple, Type, Union
8
8
 
9
9
  import bioregistry
10
10
  import gilda.api
@@ -22,7 +22,7 @@ from pyobo import (
22
22
  get_ids,
23
23
  get_obsolete,
24
24
  )
25
- from pyobo.getters import NoBuild
25
+ from pyobo.getters import NoBuildError
26
26
  from pyobo.utils.io import multidict
27
27
 
28
28
  __all__ = [
@@ -38,10 +38,10 @@ def iter_gilda_prediction_tuples(
38
38
  prefix: str,
39
39
  relation: str = "skos:exactMatch",
40
40
  *,
41
- grounder: Optional[Grounder] = None,
41
+ grounder: Grounder | None = None,
42
42
  identifiers_are_names: bool = False,
43
43
  strict: bool = False,
44
- ) -> Iterable[Tuple[str, str, str, str, str, str, str, str, float]]:
44
+ ) -> Iterable[tuple[str, str, str, str, str, str, str, str, float]]:
45
45
  """Iterate over prediction tuples for a given prefix."""
46
46
  if grounder is None:
47
47
  grounder = gilda.api.grounder
@@ -91,11 +91,11 @@ def normalize_identifier(prefix: str, identifier: str) -> str:
91
91
 
92
92
 
93
93
  def get_grounder(
94
- prefixes: Union[str, Iterable[str]],
94
+ prefixes: str | Iterable[str],
95
95
  *,
96
- unnamed: Optional[Iterable[str]] = None,
97
- grounder_cls: Optional[Type[Grounder]] = None,
98
- versions: Union[None, str, Iterable[Union[str, None]]] = None,
96
+ unnamed: Iterable[str] | None = None,
97
+ grounder_cls: type[Grounder] | None = None,
98
+ versions: None | str | Iterable[str | None] | dict[str, str] = None,
99
99
  strict: bool = True,
100
100
  skip_obsolete: bool = False,
101
101
  progress: bool = True,
@@ -110,12 +110,14 @@ def get_grounder(
110
110
  versions = [None] * len(prefixes)
111
111
  elif isinstance(versions, str):
112
112
  versions = [versions]
113
+ elif isinstance(versions, dict):
114
+ versions = [versions.get(prefix) for prefix in prefixes]
113
115
  else:
114
116
  versions = list(versions)
115
117
  if len(prefixes) != len(versions):
116
118
  raise ValueError
117
119
 
118
- terms: List[gilda.term.Term] = []
120
+ terms: list[gilda.term.Term] = []
119
121
  for prefix, version in zip(tqdm(prefixes, leave=False, disable=not progress), versions):
120
122
  try:
121
123
  p_terms = list(
@@ -128,7 +130,7 @@ def get_grounder(
128
130
  progress=progress,
129
131
  )
130
132
  )
131
- except (NoBuild, CalledProcessError):
133
+ except (NoBuildError, CalledProcessError):
132
134
  continue
133
135
  else:
134
136
  terms.extend(p_terms)
@@ -147,8 +149,8 @@ def _fast_term(
147
149
  identifier: str,
148
150
  name: str,
149
151
  status: str,
150
- organism: Optional[str] = None,
151
- ) -> Optional[gilda.term.Term]:
152
+ organism: str | None = None,
153
+ ) -> gilda.term.Term | None:
152
154
  try:
153
155
  term = gilda.term.Term(
154
156
  norm_text=normalize(text),
@@ -169,7 +171,7 @@ def get_gilda_terms(
169
171
  prefix: str,
170
172
  *,
171
173
  identifiers_are_names: bool = False,
172
- version: Optional[str] = None,
174
+ version: str | None = None,
173
175
  strict: bool = True,
174
176
  skip_obsolete: bool = False,
175
177
  progress: bool = True,
@@ -251,7 +253,7 @@ def get_gilda_terms(
251
253
 
252
254
 
253
255
  def get_gilda_term_subset(
254
- source: str, ancestors: Union[str, List[str]], **kwargs
256
+ source: str, ancestors: str | list[str], **kwargs
255
257
  ) -> Iterable[gilda.term.Term]:
256
258
  """Get a subset of terms."""
257
259
  subset = {
@@ -264,7 +266,7 @@ def get_gilda_term_subset(
264
266
  yield term
265
267
 
266
268
 
267
- def _ensure_list(s: Union[str, List[str]]) -> List[str]:
269
+ def _ensure_list(s: str | list[str]) -> list[str]:
268
270
  if isinstance(s, str):
269
271
  return [s]
270
272
  return s
pyobo/identifier_utils.py CHANGED
@@ -1,12 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Utilities for handling prefixes."""
4
2
 
5
3
  from __future__ import annotations
6
4
 
7
5
  import logging
8
6
  from functools import wraps
9
- from typing import Optional, Tuple, Union
10
7
 
11
8
  import bioregistry
12
9
  from curies import Reference, ReferenceTuple
@@ -28,10 +25,15 @@ __all__ = [
28
25
  logger = logging.getLogger(__name__)
29
26
 
30
27
 
31
- class MissingPrefix(ValueError):
28
+ class MissingPrefixError(ValueError):
32
29
  """Raised on a missing prefix."""
33
30
 
34
- def __init__(self, prefix, curie, xref=None, ontology=None):
31
+ reference: Reference | None
32
+
33
+ def __init__(
34
+ self, prefix: str, curie: str, xref: str | None = None, ontology: str | None = None
35
+ ):
36
+ """Initialize the error."""
35
37
  self.prefix = prefix
36
38
  self.curie = curie
37
39
  self.xref = xref
@@ -50,13 +52,13 @@ class MissingPrefix(ValueError):
50
52
  return s
51
53
 
52
54
 
53
- def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True) -> Optional[str]:
55
+ def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True) -> str | None:
54
56
  """Normalize a namespace and return, if possible."""
55
57
  norm_prefix = bioregistry.normalize_prefix(prefix)
56
58
  if norm_prefix is not None:
57
59
  return norm_prefix
58
60
  elif strict:
59
- raise MissingPrefix(prefix=prefix, curie=curie, xref=xref)
61
+ raise MissingPrefixError(prefix=prefix, curie=curie, xref=xref)
60
62
  else:
61
63
  return None
62
64
 
@@ -64,9 +66,7 @@ def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True
64
66
  BAD_CURIES = set()
65
67
 
66
68
 
67
- def normalize_curie(
68
- curie: str, *, strict: bool = True
69
- ) -> Union[Tuple[str, str], Tuple[None, None]]:
69
+ def normalize_curie(curie: str, *, strict: bool = True) -> tuple[str, str] | tuple[None, None]:
70
70
  """Parse a string that looks like a CURIE.
71
71
 
72
72
  :param curie: A compact uniform resource identifier (CURIE)
pyobo/mocks.py CHANGED
@@ -1,8 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Mocks for PyOBO."""
4
2
 
5
- from typing import List, Mapping, Optional, Tuple, TypeVar, Union
3
+ from collections.abc import Mapping
4
+ from typing import Optional, TypeVar, Union
6
5
  from unittest import mock
7
6
 
8
7
  import pandas as pd
@@ -25,7 +24,7 @@ def get_mock_id_name_mapping(data: Mapping[str, Mapping[str, str]]) -> mock._pat
25
24
  return _replace_mapping_getter("pyobo.api.names.get_id_name_mapping", data)
26
25
 
27
26
 
28
- def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, List[str]]]) -> mock._patch:
27
+ def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, list[str]]]) -> mock._patch:
29
28
  """Mock the :func:`pyobo.extract.get_id_synonyms_mapping` function.
30
29
 
31
30
  :param data: A mapping from prefix to mappings of identifier to lists of synonyms.
@@ -33,7 +32,7 @@ def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, List[str]]]) ->
33
32
  return _replace_mapping_getter("pyobo.api.names.get_id_synonyms_mapping", data)
34
33
 
35
34
 
36
- def get_mock_id_alts_mapping(data: Mapping[str, Mapping[str, List[str]]]) -> mock._patch:
35
+ def get_mock_id_alts_mapping(data: Mapping[str, Mapping[str, list[str]]]) -> mock._patch:
37
36
  """Mock the :func:`pyobo.extract.get_id_to_alts` function.
38
37
 
39
38
  :param data: A mapping from prefix to mappings of identifier to lists of alternative identifiers.
@@ -52,7 +51,7 @@ def _replace_mapping_getter(name: str, data: Mapping[str, Mapping[str, X]]) -> m
52
51
 
53
52
 
54
53
  def get_mock_get_xrefs_df(
55
- df: Union[List[Tuple[str, str, str, str, str]], pd.DataFrame]
54
+ df: Union[list[tuple[str, str, str, str, str]], pd.DataFrame],
56
55
  ) -> mock._patch:
57
56
  """Mock the :func:`pyobo.xrefsdb.xrefs_pipeline.get_xref_df` function.
58
57
 
pyobo/normalizer.py CHANGED
@@ -1,12 +1,11 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Use synonyms from OBO to normalize names."""
4
2
 
5
3
  import logging
6
4
  from abc import ABC, abstractmethod
5
+ from collections.abc import Iterable, Mapping
7
6
  from dataclasses import dataclass
8
7
  from functools import lru_cache
9
- from typing import Dict, Iterable, List, Mapping, Optional, Set, Tuple, Union
8
+ from typing import Optional, Union
10
9
 
11
10
  import bioregistry
12
11
 
@@ -23,29 +22,29 @@ __all__ = [
23
22
 
24
23
  logger = logging.getLogger(__name__)
25
24
 
26
- NormalizationSuccess = Tuple[str, str, str]
27
- NormalizationFailure = Tuple[None, None, str]
25
+ NormalizationSuccess = tuple[str, str, str]
26
+ NormalizationFailure = tuple[None, None, str]
28
27
  NormalizationResult = Union[NormalizationSuccess, NormalizationFailure]
29
28
 
30
29
 
31
30
  class Normalizer(ABC):
32
31
  """A normalizer."""
33
32
 
34
- id_to_name: Dict[str, str]
35
- id_to_synonyms: Dict[str, List[str]]
33
+ id_to_name: dict[str, str]
34
+ id_to_synonyms: dict[str, list[str]]
36
35
 
37
36
  #: A mapping from all synonyms to the set of identifiers that they point to.
38
37
  #: In a perfect world, each would only be a single element.
39
- synonym_to_identifiers_mapping: Dict[str, Set[str]]
38
+ synonym_to_identifiers_mapping: dict[str, set[str]]
40
39
  #: A mapping from normalized names to the actual ones that they came from
41
- norm_name_to_name: Dict[str, Set[str]]
40
+ norm_name_to_name: dict[str, set[str]]
42
41
 
43
42
  def __init__(
44
43
  self,
45
- id_to_name: Dict[str, str],
46
- id_to_synonyms: Dict[str, List[str]],
44
+ id_to_name: dict[str, str],
45
+ id_to_synonyms: dict[str, list[str]],
47
46
  remove_prefix: Optional[str] = None,
48
- ) -> None: # noqa: D107
47
+ ) -> None:
49
48
  """Initialize the normalizer.
50
49
 
51
50
  :param id_to_name: An identifier to name dictionary.
@@ -64,7 +63,7 @@ class Normalizer(ABC):
64
63
  self.norm_name_to_name = self._get_norm_name_to_names(self.synonym_to_identifiers_mapping)
65
64
 
66
65
  @classmethod
67
- def _get_norm_name_to_names(cls, synonyms: Iterable[str]) -> Dict[str, Set[str]]:
66
+ def _get_norm_name_to_names(cls, synonyms: Iterable[str]) -> dict[str, set[str]]:
68
67
  return multisetdict((cls._normalize_text(synonym), synonym) for synonym in synonyms)
69
68
 
70
69
  @staticmethod
@@ -81,7 +80,7 @@ class Normalizer(ABC):
81
80
  id_to_name: Mapping[str, str],
82
81
  id_to_synonyms: Mapping[str, Iterable[str]],
83
82
  remove_prefix: Optional[str] = None,
84
- ) -> Iterable[Tuple[str, str]]:
83
+ ) -> Iterable[tuple[str, str]]:
85
84
  if remove_prefix is not None:
86
85
  remove_prefix = f'{remove_prefix.lower().rstrip(":")}:'
87
86
 
@@ -101,7 +100,7 @@ class Normalizer(ABC):
101
100
  # it might overwrite but this is probably always due to alternate ids
102
101
  yield synonym, identifier
103
102
 
104
- def get_names(self, query: str) -> List[str]:
103
+ def get_names(self, query: str) -> list[str]:
105
104
  """Get all names to which the query text maps."""
106
105
  norm_text = self._normalize_text(query)
107
106
  return list(self.norm_name_to_name.get(norm_text, []))
@@ -112,7 +111,7 @@ class Normalizer(ABC):
112
111
  raise NotImplementedError
113
112
 
114
113
 
115
- @lru_cache()
114
+ @lru_cache
116
115
  def get_normalizer(prefix: str) -> Normalizer:
117
116
  """Get an OBO normalizer."""
118
117
  norm_prefix = bioregistry.normalize_prefix(prefix)
@@ -149,7 +148,8 @@ def ground(prefix: Union[str, Iterable[str]], query: str) -> NormalizationResult
149
148
  class OboNormalizer(Normalizer):
150
149
  """A utility for normalizing by names."""
151
150
 
152
- def __init__(self, prefix: str) -> None: # noqa: D107
151
+ def __init__(self, prefix: str) -> None:
152
+ """Initialize the normalizer by an ontology's Bioregistry prefix."""
153
153
  self.prefix = prefix
154
154
  self._len_prefix = len(prefix)
155
155
  id_to_name = names.get_id_name_mapping(prefix)
@@ -160,7 +160,7 @@ class OboNormalizer(Normalizer):
160
160
  remove_prefix=prefix,
161
161
  )
162
162
 
163
- def __repr__(self) -> str: # noqa: D105
163
+ def __repr__(self) -> str:
164
164
  return f'OboNormalizer(prefix="{self.prefix}")'
165
165
 
166
166
  def normalize(self, query: str) -> NormalizationResult:
@@ -188,20 +188,20 @@ class MultiNormalizer:
188
188
  If you're looking for taxa of exotic plants, you might use:
189
189
 
190
190
  >>> from pyobo.normalizer import MultiNormalizer
191
- >>> normalizer = MultiNormalizer(prefixes=['ncbitaxon', 'itis'])
192
- >>> normalizer.normalize('Homo sapiens')
191
+ >>> normalizer = MultiNormalizer(prefixes=["ncbitaxon", "itis"])
192
+ >>> normalizer.normalize("Homo sapiens")
193
193
  ('ncbitaxon', '9606', 'Homo sapiens')
194
- >>> normalizer.normalize('Abies bifolia') # variety not listed in NCBI
194
+ >>> normalizer.normalize("Abies bifolia") # variety not listed in NCBI
195
195
  ('itis', '507501', 'Abies bifolia')
196
- >>> normalizer.normalize('vulcan') # nice try, nerds
196
+ >>> normalizer.normalize("vulcan") # nice try, nerds
197
197
  (None, None, None)
198
198
  """
199
199
 
200
200
  #: The normalizers for each prefix
201
- normalizers: List[Normalizer]
201
+ normalizers: list[Normalizer]
202
202
 
203
203
  @staticmethod
204
- def from_prefixes(prefixes: List[str]) -> "MultiNormalizer":
204
+ def from_prefixes(prefixes: list[str]) -> "MultiNormalizer":
205
205
  """Instantiate normalizers based on the given prefixes, in preferred order.."""
206
206
  return MultiNormalizer([get_normalizer(prefix) for prefix in prefixes])
207
207
 
pyobo/obographs.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """Convert PyOBO into OBO Graph."""
2
2
 
3
- from typing import Iterable, List
3
+ from collections.abc import Iterable
4
4
 
5
5
  import bioregistry
6
6
  import curies
@@ -17,6 +17,7 @@ from bioontologies.obograph import (
17
17
  Xref,
18
18
  )
19
19
  from bioontologies.robot import ParseResults
20
+ from tqdm import tqdm
20
21
 
21
22
  from pyobo.struct import Obo, Reference, Term
22
23
  from pyobo.struct.typedef import definition_source, is_a
@@ -33,11 +34,13 @@ def parse_results_from_obo(obo: Obo) -> ParseResults:
33
34
  return ParseResults(graph_document=GraphDocument(graphs=[graph]))
34
35
 
35
36
 
36
- def graph_from_obo(obo: Obo) -> Graph:
37
+ def graph_from_obo(obo: Obo, use_tqdm: bool = True) -> Graph:
37
38
  """Get an OBO Graph object from a PyOBO object."""
38
- nodes: List[Node] = []
39
- edges: List[Edge] = []
40
- for term in obo:
39
+ nodes: list[Node] = []
40
+ edges: list[Edge] = []
41
+ for term in tqdm(
42
+ obo, disable=not use_tqdm, unit="term", unit_scale=True, desc=f"[{obo.ontology}] to JSON"
43
+ ):
41
44
  nodes.append(_get_class_node(term))
42
45
  edges.extend(_iter_edges(term))
43
46
  return Graph(