pyobo 0.10.11__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. pyobo/__init__.py +0 -2
  2. pyobo/__main__.py +0 -2
  3. pyobo/api/__init__.py +0 -2
  4. pyobo/api/alts.py +6 -7
  5. pyobo/api/hierarchy.py +14 -15
  6. pyobo/api/metadata.py +3 -4
  7. pyobo/api/names.py +51 -31
  8. pyobo/api/properties.py +6 -7
  9. pyobo/api/relations.py +12 -11
  10. pyobo/api/species.py +5 -6
  11. pyobo/api/typedefs.py +1 -3
  12. pyobo/api/utils.py +63 -2
  13. pyobo/api/xrefs.py +4 -5
  14. pyobo/aws.py +3 -5
  15. pyobo/cli/__init__.py +0 -2
  16. pyobo/cli/aws.py +0 -2
  17. pyobo/cli/cli.py +0 -4
  18. pyobo/cli/database.py +1 -3
  19. pyobo/cli/lookup.py +2 -4
  20. pyobo/cli/utils.py +0 -2
  21. pyobo/constants.py +0 -3
  22. pyobo/getters.py +19 -26
  23. pyobo/gilda_utils.py +28 -8
  24. pyobo/identifier_utils.py +32 -15
  25. pyobo/mocks.py +5 -6
  26. pyobo/normalizer.py +24 -24
  27. pyobo/obographs.py +3 -3
  28. pyobo/plugins.py +3 -4
  29. pyobo/py.typed +0 -0
  30. pyobo/reader.py +19 -21
  31. pyobo/registries/__init__.py +0 -2
  32. pyobo/registries/metaregistry.py +6 -8
  33. pyobo/resource_utils.py +1 -3
  34. pyobo/resources/__init__.py +0 -2
  35. pyobo/resources/ncbitaxon.py +2 -3
  36. pyobo/resources/ro.py +2 -4
  37. pyobo/sources/README.md +15 -0
  38. pyobo/sources/__init__.py +2 -2
  39. pyobo/sources/agrovoc.py +3 -3
  40. pyobo/sources/antibodyregistry.py +4 -5
  41. pyobo/sources/biogrid.py +7 -7
  42. pyobo/sources/ccle.py +3 -4
  43. pyobo/sources/cgnc.py +1 -3
  44. pyobo/sources/chebi.py +2 -4
  45. pyobo/sources/chembl.py +1 -3
  46. pyobo/sources/civic_gene.py +2 -3
  47. pyobo/sources/complexportal.py +3 -5
  48. pyobo/sources/conso.py +2 -4
  49. pyobo/sources/cpt.py +1 -3
  50. pyobo/sources/credit.py +68 -0
  51. pyobo/sources/cvx.py +1 -3
  52. pyobo/sources/depmap.py +3 -4
  53. pyobo/sources/dictybase_gene.py +1 -3
  54. pyobo/sources/drugbank.py +6 -7
  55. pyobo/sources/drugbank_salt.py +3 -4
  56. pyobo/sources/drugcentral.py +5 -7
  57. pyobo/sources/expasy.py +11 -12
  58. pyobo/sources/famplex.py +3 -5
  59. pyobo/sources/flybase.py +2 -4
  60. pyobo/sources/geonames.py +28 -10
  61. pyobo/sources/gmt_utils.py +5 -6
  62. pyobo/sources/go.py +4 -6
  63. pyobo/sources/gwascentral_phenotype.py +1 -3
  64. pyobo/sources/gwascentral_study.py +2 -3
  65. pyobo/sources/hgnc.py +8 -9
  66. pyobo/sources/hgncgenefamily.py +2 -4
  67. pyobo/sources/icd10.py +3 -4
  68. pyobo/sources/icd11.py +3 -4
  69. pyobo/sources/icd_utils.py +6 -7
  70. pyobo/sources/interpro.py +3 -5
  71. pyobo/sources/itis.py +1 -3
  72. pyobo/sources/kegg/__init__.py +0 -2
  73. pyobo/sources/kegg/api.py +3 -4
  74. pyobo/sources/kegg/genes.py +3 -4
  75. pyobo/sources/kegg/genome.py +1 -3
  76. pyobo/sources/kegg/pathway.py +5 -6
  77. pyobo/sources/mesh.py +19 -21
  78. pyobo/sources/mgi.py +1 -3
  79. pyobo/sources/mirbase.py +4 -6
  80. pyobo/sources/mirbase_constants.py +0 -2
  81. pyobo/sources/mirbase_family.py +1 -3
  82. pyobo/sources/mirbase_mature.py +1 -3
  83. pyobo/sources/msigdb.py +4 -5
  84. pyobo/sources/ncbigene.py +3 -5
  85. pyobo/sources/npass.py +2 -4
  86. pyobo/sources/omim_ps.py +1 -3
  87. pyobo/sources/pathbank.py +3 -5
  88. pyobo/sources/pfam.py +1 -3
  89. pyobo/sources/pfam_clan.py +1 -3
  90. pyobo/sources/pid.py +3 -5
  91. pyobo/sources/pombase.py +1 -3
  92. pyobo/sources/pubchem.py +5 -6
  93. pyobo/sources/reactome.py +2 -4
  94. pyobo/sources/rgd.py +3 -4
  95. pyobo/sources/rhea.py +9 -10
  96. pyobo/sources/ror.py +69 -22
  97. pyobo/sources/selventa/__init__.py +0 -2
  98. pyobo/sources/selventa/schem.py +1 -3
  99. pyobo/sources/selventa/scomp.py +1 -3
  100. pyobo/sources/selventa/sdis.py +1 -3
  101. pyobo/sources/selventa/sfam.py +1 -3
  102. pyobo/sources/sgd.py +1 -3
  103. pyobo/sources/slm.py +1 -3
  104. pyobo/sources/umls/__init__.py +0 -2
  105. pyobo/sources/umls/__main__.py +0 -2
  106. pyobo/sources/umls/get_synonym_types.py +1 -1
  107. pyobo/sources/umls/umls.py +2 -4
  108. pyobo/sources/uniprot/__init__.py +0 -2
  109. pyobo/sources/uniprot/uniprot.py +6 -6
  110. pyobo/sources/uniprot/uniprot_ptm.py +6 -5
  111. pyobo/sources/utils.py +3 -5
  112. pyobo/sources/wikipathways.py +1 -3
  113. pyobo/sources/zfin.py +2 -3
  114. pyobo/ssg/__init__.py +3 -2
  115. pyobo/struct/__init__.py +0 -2
  116. pyobo/struct/reference.py +13 -15
  117. pyobo/struct/struct.py +106 -99
  118. pyobo/struct/typedef.py +19 -10
  119. pyobo/struct/utils.py +0 -2
  120. pyobo/utils/__init__.py +0 -2
  121. pyobo/utils/cache.py +14 -6
  122. pyobo/utils/io.py +9 -10
  123. pyobo/utils/iter.py +5 -6
  124. pyobo/utils/misc.py +1 -3
  125. pyobo/utils/ndex_utils.py +6 -7
  126. pyobo/utils/path.py +5 -5
  127. pyobo/version.py +3 -5
  128. pyobo/xrefdb/__init__.py +0 -2
  129. pyobo/xrefdb/canonicalizer.py +27 -18
  130. pyobo/xrefdb/priority.py +0 -2
  131. pyobo/xrefdb/sources/__init__.py +9 -7
  132. pyobo/xrefdb/sources/biomappings.py +0 -2
  133. pyobo/xrefdb/sources/cbms2019.py +0 -2
  134. pyobo/xrefdb/sources/chembl.py +5 -7
  135. pyobo/xrefdb/sources/compath.py +1 -3
  136. pyobo/xrefdb/sources/famplex.py +3 -5
  137. pyobo/xrefdb/sources/gilda.py +0 -2
  138. pyobo/xrefdb/sources/intact.py +5 -5
  139. pyobo/xrefdb/sources/ncit.py +1 -3
  140. pyobo/xrefdb/sources/pubchem.py +2 -4
  141. pyobo/xrefdb/sources/wikidata.py +10 -5
  142. pyobo/xrefdb/xrefs_pipeline.py +15 -16
  143. {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/LICENSE +1 -1
  144. pyobo-0.11.0.dist-info/METADATA +723 -0
  145. pyobo-0.11.0.dist-info/RECORD +171 -0
  146. {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/WHEEL +1 -1
  147. pyobo-0.11.0.dist-info/entry_points.txt +2 -0
  148. pyobo/xrefdb/bengo.py +0 -44
  149. pyobo-0.10.11.dist-info/METADATA +0 -499
  150. pyobo-0.10.11.dist-info/RECORD +0 -169
  151. pyobo-0.10.11.dist-info/entry_points.txt +0 -15
  152. {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/top_level.txt +0 -0
pyobo/api/xrefs.py CHANGED
@@ -1,10 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """High-level API for synonyms."""
4
2
 
5
3
  import logging
4
+ from collections.abc import Mapping
6
5
  from functools import lru_cache
7
- from typing import List, Mapping, Optional, Tuple, Union
6
+ from typing import Optional, Union
8
7
 
9
8
  import pandas as pd
10
9
  from tqdm.auto import tqdm
@@ -43,7 +42,7 @@ def get_xref(
43
42
  return filtered_xrefs.get(identifier)
44
43
 
45
44
 
46
- @lru_cache()
45
+ @lru_cache
47
46
  @wrap_norm_prefix
48
47
  def get_filtered_xrefs(
49
48
  prefix: str,
@@ -147,7 +146,7 @@ def get_sssom_df(
147
146
  prefix = prefix.ontology
148
147
  else:
149
148
  df = get_xrefs_df(prefix=prefix, **kwargs)
150
- rows: List[Tuple[str, ...]] = []
149
+ rows: list[tuple[str, ...]] = []
151
150
  with logging_redirect_tqdm():
152
151
  for source_id, target_prefix, target_id in tqdm(
153
152
  df.values, unit="mapping", unit_scale=True, desc=f"[{prefix}] SSSOM"
pyobo/aws.py CHANGED
@@ -1,10 +1,8 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Interface for caching data on AWS S3."""
4
2
 
5
3
  import logging
6
4
  import os
7
- from typing import Optional, Set
5
+ from typing import Optional
8
6
 
9
7
  import boto3
10
8
  import humanize
@@ -57,8 +55,8 @@ def download_artifacts(bucket: str, suffix: Optional[str] = None) -> None:
57
55
 
58
56
  def upload_artifacts(
59
57
  bucket: str,
60
- whitelist: Optional[Set[str]] = None,
61
- blacklist: Optional[Set[str]] = None,
58
+ whitelist: Optional[set[str]] = None,
59
+ blacklist: Optional[set[str]] = None,
62
60
  s3_client=None,
63
61
  ) -> None:
64
62
  """Upload all artifacts to AWS."""
pyobo/cli/__init__.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """CLI for PyOBO."""
4
2
 
5
3
  from .cli import main
pyobo/cli/aws.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """CLI for PyOBO's interface to S3."""
4
2
 
5
3
  import click
pyobo/cli/cli.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """CLI for PyOBO."""
4
2
 
5
3
  import logging
@@ -15,7 +13,6 @@ from tabulate import tabulate
15
13
  from .aws import main as aws_main
16
14
  from .database import main as database_main
17
15
  from .lookup import lookup
18
- from ..apps.cli import main as apps_main
19
16
  from ..constants import RAW_DIRECTORY
20
17
  from ..plugins import has_nomenclature_plugin, iter_nomenclature_plugins
21
18
  from ..registries import iter_cached_obo
@@ -116,7 +113,6 @@ def remapping(file):
116
113
 
117
114
 
118
115
  main.add_command(lookup)
119
- main.add_command(apps_main)
120
116
  main.add_command(aws_main)
121
117
  main.add_command(database_main)
122
118
 
pyobo/cli/database.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """CLI for PyOBO Database Generation."""
4
2
 
5
3
  import logging
@@ -317,7 +315,7 @@ def properties(directory: str, zenodo: bool, force: bool, no_strict: bool):
317
315
  @zenodo_option
318
316
  @force_option
319
317
  @no_strict_option
320
- def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool): # noqa: D202
318
+ def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool):
321
319
  """Make the prefix-identifier-xref dump."""
322
320
  with logging_redirect_tqdm():
323
321
  paths = db_output_helper(
pyobo/cli/lookup.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """CLI for PyOBO lookups."""
4
2
 
5
3
  import json
@@ -282,7 +280,7 @@ def ancestors(prefix: str, identifier: str, force: bool, version: Optional[str])
282
280
  """Look up ancestors."""
283
281
  curies = get_ancestors(prefix=prefix, identifier=identifier, force=force, version=version)
284
282
  for curie in sorted(curies or []):
285
- click.echo(f"{curie}\t{get_name_by_curie(curie)}")
283
+ click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")
286
284
 
287
285
 
288
286
  @lookup.command()
@@ -295,7 +293,7 @@ def descendants(prefix: str, identifier: str, force: bool, version: Optional[str
295
293
  """Look up descendants."""
296
294
  curies = get_descendants(prefix=prefix, identifier=identifier, force=force, version=version)
297
295
  for curie in sorted(curies or []):
298
- click.echo(f"{curie}\t{get_name_by_curie(curie)}")
296
+ click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")
299
297
 
300
298
 
301
299
  @lookup.command()
pyobo/cli/utils.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Utilities for the CLI."""
4
2
 
5
3
  import datetime
pyobo/constants.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Constants for PyOBO."""
4
2
 
5
3
  import logging
@@ -80,7 +78,6 @@ TYPEDEFS_FILE = "typedefs.tsv.gz"
80
78
  SPECIES_RECORD = "5334738"
81
79
  SPECIES_FILE = "species.tsv.gz"
82
80
 
83
-
84
81
  NCBITAXON_PREFIX = "NCBITaxon"
85
82
  DATE_FORMAT = "%d:%m:%Y %H:%M"
86
83
  PROVENANCE_PREFIXES = {
pyobo/getters.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Utilities for OBO files."""
4
2
 
5
3
  import datetime
@@ -11,16 +9,11 @@ import subprocess
11
9
  import typing
12
10
  import urllib.error
13
11
  from collections import Counter
12
+ from collections.abc import Iterable, Mapping, Sequence
14
13
  from pathlib import Path
15
14
  from typing import (
16
15
  Callable,
17
- Iterable,
18
- List,
19
- Mapping,
20
16
  Optional,
21
- Sequence,
22
- Set,
23
- Tuple,
24
17
  TypeVar,
25
18
  Union,
26
19
  )
@@ -30,7 +23,7 @@ from bioontologies import robot
30
23
  from tqdm.auto import tqdm
31
24
 
32
25
  from .constants import DATABASE_DIRECTORY
33
- from .identifier_utils import MissingPrefix, wrap_norm_prefix
26
+ from .identifier_utils import MissingPrefixError, wrap_norm_prefix
34
27
  from .plugins import has_nomenclature_plugin, run_nomenclature_plugin
35
28
  from .struct import Obo
36
29
  from .utils.io import get_writer
@@ -39,17 +32,17 @@ from .version import get_git_hash, get_version
39
32
 
40
33
  __all__ = [
41
34
  "get_ontology",
42
- "NoBuild",
35
+ "NoBuildError",
43
36
  ]
44
37
 
45
38
  logger = logging.getLogger(__name__)
46
39
 
47
40
 
48
- class NoBuild(RuntimeError):
41
+ class NoBuildError(RuntimeError):
49
42
  """Base exception for being unable to build."""
50
43
 
51
44
 
52
- class UnhandledFormat(NoBuild):
45
+ class UnhandledFormatError(NoBuildError):
53
46
  """Only OWL is available."""
54
47
 
55
48
 
@@ -117,7 +110,7 @@ def get_ontology(
117
110
 
118
111
  ontology_format, path = _ensure_ontology_path(prefix, force=force, version=version)
119
112
  if path is None:
120
- raise NoBuild(prefix)
113
+ raise NoBuildError(prefix)
121
114
  elif ontology_format == "obo":
122
115
  pass # all gucci
123
116
  elif ontology_format == "owl":
@@ -127,7 +120,7 @@ def get_ontology(
127
120
  robot.convert(path, _converted_obo_path, check=robot_check)
128
121
  path = _converted_obo_path
129
122
  else:
130
- raise UnhandledFormat(f"[{prefix}] unhandled ontology file format: {path.suffix}")
123
+ raise UnhandledFormatError(f"[{prefix}] unhandled ontology file format: {path.suffix}")
131
124
 
132
125
  from .reader import from_obo_path
133
126
 
@@ -147,8 +140,8 @@ def get_ontology(
147
140
 
148
141
  def _ensure_ontology_path(
149
142
  prefix: str, force, version
150
- ) -> Union[Tuple[str, Path], Tuple[None, None]]:
151
- for ontology_format, url in [ # noqa:B007
143
+ ) -> Union[tuple[str, Path], tuple[None, None]]:
144
+ for ontology_format, url in [
152
145
  ("obo", bioregistry.get_obo_download(prefix)),
153
146
  ("owl", bioregistry.get_owl_download(prefix)),
154
147
  ("json", bioregistry.get_json_download(prefix)),
@@ -246,7 +239,7 @@ def iter_helper(
246
239
  leave: bool = False,
247
240
  strict: bool = True,
248
241
  **kwargs,
249
- ) -> Iterable[Tuple[str, str, X]]:
242
+ ) -> Iterable[tuple[str, str, X]]:
250
243
  """Yield all mappings extracted from each database given."""
251
244
  for prefix, mapping in iter_helper_helper(f, strict=strict, **kwargs):
252
245
  it = tqdm(
@@ -266,7 +259,7 @@ def _prefixes(
266
259
  skip_below: Optional[str] = None,
267
260
  skip_below_inclusive: bool = True,
268
261
  skip_pyobo: bool = False,
269
- skip_set: Optional[Set[str]] = None,
262
+ skip_set: Optional[set[str]] = None,
270
263
  ) -> Iterable[str]:
271
264
  for prefix, resource in sorted(bioregistry.read_registry().items()):
272
265
  if resource.no_own_terms:
@@ -299,10 +292,10 @@ def iter_helper_helper(
299
292
  skip_below: Optional[str] = None,
300
293
  skip_below_inclusive: bool = True,
301
294
  skip_pyobo: bool = False,
302
- skip_set: Optional[Set[str]] = None,
295
+ skip_set: Optional[set[str]] = None,
303
296
  strict: bool = True,
304
297
  **kwargs,
305
- ) -> Iterable[Tuple[str, X]]:
298
+ ) -> Iterable[tuple[str, X]]:
306
299
  """Yield all mappings extracted from each database given.
307
300
 
308
301
  :param f: A function that takes a prefix and gives back something that will be used by an outer function.
@@ -342,13 +335,13 @@ def iter_helper_helper(
342
335
  logger.warning("[%s] unable to download", prefix)
343
336
  if strict and not bioregistry.is_deprecated(prefix):
344
337
  raise
345
- except MissingPrefix as e:
338
+ except MissingPrefixError as e:
346
339
  logger.warning("[%s] missing prefix: %s", prefix, e)
347
340
  if strict and not bioregistry.is_deprecated(prefix):
348
341
  raise e
349
342
  except subprocess.CalledProcessError:
350
343
  logger.warning("[%s] ROBOT was unable to convert OWL to OBO", prefix)
351
- except UnhandledFormat as e:
344
+ except UnhandledFormatError as e:
352
345
  logger.warning("[%s] %s", prefix, e)
353
346
  except ValueError as e:
354
347
  if _is_xml(e):
@@ -390,7 +383,7 @@ def _prep_dir(directory: Union[None, str, pathlib.Path]) -> pathlib.Path:
390
383
 
391
384
 
392
385
  def db_output_helper(
393
- f: Callable[..., Iterable[Tuple[str, ...]]],
386
+ f: Callable[..., Iterable[tuple[str, ...]]],
394
387
  db_name: str,
395
388
  columns: Sequence[str],
396
389
  *,
@@ -399,7 +392,7 @@ def db_output_helper(
399
392
  use_gzip: bool = True,
400
393
  summary_detailed: Optional[Sequence[int]] = None,
401
394
  **kwargs,
402
- ) -> List[pathlib.Path]:
395
+ ) -> list[pathlib.Path]:
403
396
  """Help output database builds.
404
397
 
405
398
  :param f: A function that takes a prefix and gives back something that will be used by an outer function.
@@ -413,7 +406,7 @@ def db_output_helper(
413
406
  directory = _prep_dir(directory)
414
407
 
415
408
  c: typing.Counter[str] = Counter()
416
- c_detailed: typing.Counter[Tuple[str, ...]] = Counter()
409
+ c_detailed: typing.Counter[tuple[str, ...]] = Counter()
417
410
 
418
411
  if use_gzip:
419
412
  db_path = directory.joinpath(f"{db_name}.tsv.gz")
@@ -475,7 +468,7 @@ def db_output_helper(
475
468
  indent=2,
476
469
  )
477
470
 
478
- rv: List[pathlib.Path] = [
471
+ rv: list[pathlib.Path] = [
479
472
  db_metadata_path,
480
473
  db_path,
481
474
  db_sample_path,
pyobo/gilda_utils.py CHANGED
@@ -1,10 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """PyOBO's Gilda utilities."""
4
2
 
5
3
  import logging
4
+ from collections.abc import Iterable
6
5
  from subprocess import CalledProcessError
7
- from typing import Iterable, List, Optional, Tuple, Type, Union
6
+ from typing import Optional, Union
8
7
 
9
8
  import bioregistry
10
9
  import gilda.api
@@ -15,13 +14,14 @@ from gilda.term import filter_out_duplicates
15
14
  from tqdm.auto import tqdm
16
15
 
17
16
  from pyobo import (
17
+ get_descendants,
18
18
  get_id_name_mapping,
19
19
  get_id_species_mapping,
20
20
  get_id_synonyms_mapping,
21
21
  get_ids,
22
22
  get_obsolete,
23
23
  )
24
- from pyobo.getters import NoBuild
24
+ from pyobo.getters import NoBuildError
25
25
  from pyobo.utils.io import multidict
26
26
 
27
27
  __all__ = [
@@ -40,7 +40,7 @@ def iter_gilda_prediction_tuples(
40
40
  grounder: Optional[Grounder] = None,
41
41
  identifiers_are_names: bool = False,
42
42
  strict: bool = False,
43
- ) -> Iterable[Tuple[str, str, str, str, str, str, str, str, float]]:
43
+ ) -> Iterable[tuple[str, str, str, str, str, str, str, str, float]]:
44
44
  """Iterate over prediction tuples for a given prefix."""
45
45
  if grounder is None:
46
46
  grounder = gilda.api.grounder
@@ -93,7 +93,7 @@ def get_grounder(
93
93
  prefixes: Union[str, Iterable[str]],
94
94
  *,
95
95
  unnamed: Optional[Iterable[str]] = None,
96
- grounder_cls: Optional[Type[Grounder]] = None,
96
+ grounder_cls: Optional[type[Grounder]] = None,
97
97
  versions: Union[None, str, Iterable[Union[str, None]]] = None,
98
98
  strict: bool = True,
99
99
  skip_obsolete: bool = False,
@@ -114,7 +114,7 @@ def get_grounder(
114
114
  if len(prefixes) != len(versions):
115
115
  raise ValueError
116
116
 
117
- terms: List[gilda.term.Term] = []
117
+ terms: list[gilda.term.Term] = []
118
118
  for prefix, version in zip(tqdm(prefixes, leave=False, disable=not progress), versions):
119
119
  try:
120
120
  p_terms = list(
@@ -127,7 +127,7 @@ def get_grounder(
127
127
  progress=progress,
128
128
  )
129
129
  )
130
- except (NoBuild, CalledProcessError):
130
+ except (NoBuildError, CalledProcessError):
131
131
  continue
132
132
  else:
133
133
  terms.extend(p_terms)
@@ -247,3 +247,23 @@ def get_gilda_terms(
247
247
  )
248
248
  if term is not None:
249
249
  yield term
250
+
251
+
252
+ def get_gilda_term_subset(
253
+ source: str, ancestors: Union[str, list[str]], **kwargs
254
+ ) -> Iterable[gilda.term.Term]:
255
+ """Get a subset of terms."""
256
+ subset = {
257
+ descendant
258
+ for parent_curie in _ensure_list(ancestors)
259
+ for descendant in get_descendants(*parent_curie.split(":")) or []
260
+ }
261
+ for term in get_gilda_terms(source, **kwargs):
262
+ if bioregistry.curie_to_str(term.db, term.id) in subset:
263
+ yield term
264
+
265
+
266
+ def _ensure_list(s: Union[str, list[str]]) -> list[str]:
267
+ if isinstance(s, str):
268
+ return [s]
269
+ return s
pyobo/identifier_utils.py CHANGED
@@ -1,12 +1,12 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Utilities for handling prefixes."""
4
2
 
3
+ from __future__ import annotations
4
+
5
5
  import logging
6
6
  from functools import wraps
7
- from typing import Optional, Tuple, Union
8
7
 
9
8
  import bioregistry
9
+ from curies import Reference, ReferenceTuple
10
10
 
11
11
  from .registries import (
12
12
  curie_has_blacklisted_prefix,
@@ -25,10 +25,15 @@ __all__ = [
25
25
  logger = logging.getLogger(__name__)
26
26
 
27
27
 
28
- class MissingPrefix(ValueError):
28
+ class MissingPrefixError(ValueError):
29
29
  """Raised on a missing prefix."""
30
30
 
31
- def __init__(self, prefix, curie, xref=None, ontology=None):
31
+ reference: Reference | None
32
+
33
+ def __init__(
34
+ self, prefix: str, curie: str, xref: str | None = None, ontology: str | None = None
35
+ ):
36
+ """Initialize the error."""
32
37
  self.prefix = prefix
33
38
  self.curie = curie
34
39
  self.xref = xref
@@ -47,13 +52,13 @@ class MissingPrefix(ValueError):
47
52
  return s
48
53
 
49
54
 
50
- def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True) -> Optional[str]:
55
+ def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True) -> str | None:
51
56
  """Normalize a namespace and return, if possible."""
52
57
  norm_prefix = bioregistry.normalize_prefix(prefix)
53
58
  if norm_prefix is not None:
54
59
  return norm_prefix
55
60
  elif strict:
56
- raise MissingPrefix(prefix=prefix, curie=curie, xref=xref)
61
+ raise MissingPrefixError(prefix=prefix, curie=curie, xref=xref)
57
62
  else:
58
63
  return None
59
64
 
@@ -61,9 +66,7 @@ def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True
61
66
  BAD_CURIES = set()
62
67
 
63
68
 
64
- def normalize_curie(
65
- curie: str, *, strict: bool = True
66
- ) -> Union[Tuple[str, str], Tuple[None, None]]:
69
+ def normalize_curie(curie: str, *, strict: bool = True) -> tuple[str, str] | tuple[None, None]:
67
70
  """Parse a string that looks like a CURIE.
68
71
 
69
72
  :param curie: A compact uniform resource identifier (CURIE)
@@ -108,11 +111,25 @@ def wrap_norm_prefix(f):
108
111
  """Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
109
112
 
110
113
  @wraps(f)
111
- def _wrapped(prefix, *args, **kwargs):
112
- norm_prefix = bioregistry.normalize_prefix(prefix)
113
- if norm_prefix is None:
114
- raise ValueError(f"Invalid prefix: {prefix}")
115
- return f(norm_prefix, *args, **kwargs)
114
+ def _wrapped(prefix: str | Reference | ReferenceTuple, *args, **kwargs):
115
+ if isinstance(prefix, str):
116
+ norm_prefix = bioregistry.normalize_prefix(prefix)
117
+ if norm_prefix is None:
118
+ raise ValueError(f"Invalid prefix: {prefix}")
119
+ prefix = norm_prefix
120
+ elif isinstance(prefix, Reference):
121
+ norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
122
+ if norm_prefix is None:
123
+ raise ValueError(f"Invalid prefix: {prefix.prefix}")
124
+ prefix = Reference(prefix=norm_prefix, identifier=prefix.identifier)
125
+ elif isinstance(prefix, ReferenceTuple):
126
+ norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
127
+ if norm_prefix is None:
128
+ raise ValueError(f"Invalid prefix: {prefix.prefix}")
129
+ prefix = ReferenceTuple(norm_prefix, prefix.identifier)
130
+ else:
131
+ raise TypeError
132
+ return f(prefix, *args, **kwargs)
116
133
 
117
134
  return _wrapped
118
135
 
pyobo/mocks.py CHANGED
@@ -1,8 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Mocks for PyOBO."""
4
2
 
5
- from typing import List, Mapping, Optional, Tuple, TypeVar, Union
3
+ from collections.abc import Mapping
4
+ from typing import Optional, TypeVar, Union
6
5
  from unittest import mock
7
6
 
8
7
  import pandas as pd
@@ -25,7 +24,7 @@ def get_mock_id_name_mapping(data: Mapping[str, Mapping[str, str]]) -> mock._pat
25
24
  return _replace_mapping_getter("pyobo.api.names.get_id_name_mapping", data)
26
25
 
27
26
 
28
- def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, List[str]]]) -> mock._patch:
27
+ def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, list[str]]]) -> mock._patch:
29
28
  """Mock the :func:`pyobo.extract.get_id_synonyms_mapping` function.
30
29
 
31
30
  :param data: A mapping from prefix to mappings of identifier to lists of synonyms.
@@ -33,7 +32,7 @@ def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, List[str]]]) ->
33
32
  return _replace_mapping_getter("pyobo.api.names.get_id_synonyms_mapping", data)
34
33
 
35
34
 
36
- def get_mock_id_alts_mapping(data: Mapping[str, Mapping[str, List[str]]]) -> mock._patch:
35
+ def get_mock_id_alts_mapping(data: Mapping[str, Mapping[str, list[str]]]) -> mock._patch:
37
36
  """Mock the :func:`pyobo.extract.get_id_to_alts` function.
38
37
 
39
38
  :param data: A mapping from prefix to mappings of identifier to lists of alternative identifiers.
@@ -52,7 +51,7 @@ def _replace_mapping_getter(name: str, data: Mapping[str, Mapping[str, X]]) -> m
52
51
 
53
52
 
54
53
  def get_mock_get_xrefs_df(
55
- df: Union[List[Tuple[str, str, str, str, str]], pd.DataFrame]
54
+ df: Union[list[tuple[str, str, str, str, str]], pd.DataFrame],
56
55
  ) -> mock._patch:
57
56
  """Mock the :func:`pyobo.xrefsdb.xrefs_pipeline.get_xref_df` function.
58
57
 
pyobo/normalizer.py CHANGED
@@ -1,12 +1,11 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Use synonyms from OBO to normalize names."""
4
2
 
5
3
  import logging
6
4
  from abc import ABC, abstractmethod
5
+ from collections.abc import Iterable, Mapping
7
6
  from dataclasses import dataclass
8
7
  from functools import lru_cache
9
- from typing import Dict, Iterable, List, Mapping, Optional, Set, Tuple, Union
8
+ from typing import Optional, Union
10
9
 
11
10
  import bioregistry
12
11
 
@@ -23,29 +22,29 @@ __all__ = [
23
22
 
24
23
  logger = logging.getLogger(__name__)
25
24
 
26
- NormalizationSuccess = Tuple[str, str, str]
27
- NormalizationFailure = Tuple[None, None, str]
25
+ NormalizationSuccess = tuple[str, str, str]
26
+ NormalizationFailure = tuple[None, None, str]
28
27
  NormalizationResult = Union[NormalizationSuccess, NormalizationFailure]
29
28
 
30
29
 
31
30
  class Normalizer(ABC):
32
31
  """A normalizer."""
33
32
 
34
- id_to_name: Dict[str, str]
35
- id_to_synonyms: Dict[str, List[str]]
33
+ id_to_name: dict[str, str]
34
+ id_to_synonyms: dict[str, list[str]]
36
35
 
37
36
  #: A mapping from all synonyms to the set of identifiers that they point to.
38
37
  #: In a perfect world, each would only be a single element.
39
- synonym_to_identifiers_mapping: Dict[str, Set[str]]
38
+ synonym_to_identifiers_mapping: dict[str, set[str]]
40
39
  #: A mapping from normalized names to the actual ones that they came from
41
- norm_name_to_name: Dict[str, Set[str]]
40
+ norm_name_to_name: dict[str, set[str]]
42
41
 
43
42
  def __init__(
44
43
  self,
45
- id_to_name: Dict[str, str],
46
- id_to_synonyms: Dict[str, List[str]],
44
+ id_to_name: dict[str, str],
45
+ id_to_synonyms: dict[str, list[str]],
47
46
  remove_prefix: Optional[str] = None,
48
- ) -> None: # noqa: D107
47
+ ) -> None:
49
48
  """Initialize the normalizer.
50
49
 
51
50
  :param id_to_name: An identifier to name dictionary.
@@ -64,7 +63,7 @@ class Normalizer(ABC):
64
63
  self.norm_name_to_name = self._get_norm_name_to_names(self.synonym_to_identifiers_mapping)
65
64
 
66
65
  @classmethod
67
- def _get_norm_name_to_names(cls, synonyms: Iterable[str]) -> Dict[str, Set[str]]:
66
+ def _get_norm_name_to_names(cls, synonyms: Iterable[str]) -> dict[str, set[str]]:
68
67
  return multisetdict((cls._normalize_text(synonym), synonym) for synonym in synonyms)
69
68
 
70
69
  @staticmethod
@@ -81,7 +80,7 @@ class Normalizer(ABC):
81
80
  id_to_name: Mapping[str, str],
82
81
  id_to_synonyms: Mapping[str, Iterable[str]],
83
82
  remove_prefix: Optional[str] = None,
84
- ) -> Iterable[Tuple[str, str]]:
83
+ ) -> Iterable[tuple[str, str]]:
85
84
  if remove_prefix is not None:
86
85
  remove_prefix = f'{remove_prefix.lower().rstrip(":")}:'
87
86
 
@@ -101,7 +100,7 @@ class Normalizer(ABC):
101
100
  # it might overwrite but this is probably always due to alternate ids
102
101
  yield synonym, identifier
103
102
 
104
- def get_names(self, query: str) -> List[str]:
103
+ def get_names(self, query: str) -> list[str]:
105
104
  """Get all names to which the query text maps."""
106
105
  norm_text = self._normalize_text(query)
107
106
  return list(self.norm_name_to_name.get(norm_text, []))
@@ -112,7 +111,7 @@ class Normalizer(ABC):
112
111
  raise NotImplementedError
113
112
 
114
113
 
115
- @lru_cache()
114
+ @lru_cache
116
115
  def get_normalizer(prefix: str) -> Normalizer:
117
116
  """Get an OBO normalizer."""
118
117
  norm_prefix = bioregistry.normalize_prefix(prefix)
@@ -149,7 +148,8 @@ def ground(prefix: Union[str, Iterable[str]], query: str) -> NormalizationResult
149
148
  class OboNormalizer(Normalizer):
150
149
  """A utility for normalizing by names."""
151
150
 
152
- def __init__(self, prefix: str) -> None: # noqa: D107
151
+ def __init__(self, prefix: str) -> None:
152
+ """Initialize the normalizer by an ontology's Bioregistry prefix."""
153
153
  self.prefix = prefix
154
154
  self._len_prefix = len(prefix)
155
155
  id_to_name = names.get_id_name_mapping(prefix)
@@ -160,7 +160,7 @@ class OboNormalizer(Normalizer):
160
160
  remove_prefix=prefix,
161
161
  )
162
162
 
163
- def __repr__(self) -> str: # noqa: D105
163
+ def __repr__(self) -> str:
164
164
  return f'OboNormalizer(prefix="{self.prefix}")'
165
165
 
166
166
  def normalize(self, query: str) -> NormalizationResult:
@@ -188,20 +188,20 @@ class MultiNormalizer:
188
188
  If you're looking for taxa of exotic plants, you might use:
189
189
 
190
190
  >>> from pyobo.normalizer import MultiNormalizer
191
- >>> normalizer = MultiNormalizer(prefixes=['ncbitaxon', 'itis'])
192
- >>> normalizer.normalize('Homo sapiens')
191
+ >>> normalizer = MultiNormalizer(prefixes=["ncbitaxon", "itis"])
192
+ >>> normalizer.normalize("Homo sapiens")
193
193
  ('ncbitaxon', '9606', 'Homo sapiens')
194
- >>> normalizer.normalize('Abies bifolia') # variety not listed in NCBI
194
+ >>> normalizer.normalize("Abies bifolia") # variety not listed in NCBI
195
195
  ('itis', '507501', 'Abies bifolia')
196
- >>> normalizer.normalize('vulcan') # nice try, nerds
196
+ >>> normalizer.normalize("vulcan") # nice try, nerds
197
197
  (None, None, None)
198
198
  """
199
199
 
200
200
  #: The normalizers for each prefix
201
- normalizers: List[Normalizer]
201
+ normalizers: list[Normalizer]
202
202
 
203
203
  @staticmethod
204
- def from_prefixes(prefixes: List[str]) -> "MultiNormalizer":
204
+ def from_prefixes(prefixes: list[str]) -> "MultiNormalizer":
205
205
  """Instantiate normalizers based on the given prefixes, in preferred order.."""
206
206
  return MultiNormalizer([get_normalizer(prefix) for prefix in prefixes])
207
207
 
pyobo/obographs.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """Convert PyOBO into OBO Graph."""
2
2
 
3
- from typing import Iterable, List
3
+ from collections.abc import Iterable
4
4
 
5
5
  import bioregistry
6
6
  import curies
@@ -35,8 +35,8 @@ def parse_results_from_obo(obo: Obo) -> ParseResults:
35
35
 
36
36
  def graph_from_obo(obo: Obo) -> Graph:
37
37
  """Get an OBO Graph object from a PyOBO object."""
38
- nodes: List[Node] = []
39
- edges: List[Edge] = []
38
+ nodes: list[Node] = []
39
+ edges: list[Edge] = []
40
40
  for term in obo:
41
41
  nodes.append(_get_class_node(term))
42
42
  edges.extend(_iter_edges(term))