pyobo 0.10.11__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. pyobo/__init__.py +0 -2
  2. pyobo/__main__.py +0 -2
  3. pyobo/api/__init__.py +0 -2
  4. pyobo/api/alts.py +6 -7
  5. pyobo/api/hierarchy.py +14 -15
  6. pyobo/api/metadata.py +3 -4
  7. pyobo/api/names.py +51 -31
  8. pyobo/api/properties.py +6 -7
  9. pyobo/api/relations.py +12 -11
  10. pyobo/api/species.py +5 -6
  11. pyobo/api/typedefs.py +1 -3
  12. pyobo/api/utils.py +63 -2
  13. pyobo/api/xrefs.py +4 -5
  14. pyobo/aws.py +3 -5
  15. pyobo/cli/__init__.py +0 -2
  16. pyobo/cli/aws.py +0 -2
  17. pyobo/cli/cli.py +0 -4
  18. pyobo/cli/database.py +1 -3
  19. pyobo/cli/lookup.py +2 -4
  20. pyobo/cli/utils.py +0 -2
  21. pyobo/constants.py +0 -3
  22. pyobo/getters.py +19 -26
  23. pyobo/gilda_utils.py +28 -8
  24. pyobo/identifier_utils.py +32 -15
  25. pyobo/mocks.py +5 -6
  26. pyobo/normalizer.py +24 -24
  27. pyobo/obographs.py +3 -3
  28. pyobo/plugins.py +3 -4
  29. pyobo/py.typed +0 -0
  30. pyobo/reader.py +19 -21
  31. pyobo/registries/__init__.py +0 -2
  32. pyobo/registries/metaregistry.py +6 -8
  33. pyobo/resource_utils.py +1 -3
  34. pyobo/resources/__init__.py +0 -2
  35. pyobo/resources/ncbitaxon.py +2 -3
  36. pyobo/resources/ro.py +2 -4
  37. pyobo/sources/README.md +15 -0
  38. pyobo/sources/__init__.py +2 -2
  39. pyobo/sources/agrovoc.py +3 -3
  40. pyobo/sources/antibodyregistry.py +4 -5
  41. pyobo/sources/biogrid.py +7 -7
  42. pyobo/sources/ccle.py +3 -4
  43. pyobo/sources/cgnc.py +1 -3
  44. pyobo/sources/chebi.py +2 -4
  45. pyobo/sources/chembl.py +1 -3
  46. pyobo/sources/civic_gene.py +2 -3
  47. pyobo/sources/complexportal.py +3 -5
  48. pyobo/sources/conso.py +2 -4
  49. pyobo/sources/cpt.py +1 -3
  50. pyobo/sources/credit.py +68 -0
  51. pyobo/sources/cvx.py +1 -3
  52. pyobo/sources/depmap.py +3 -4
  53. pyobo/sources/dictybase_gene.py +1 -3
  54. pyobo/sources/drugbank.py +6 -7
  55. pyobo/sources/drugbank_salt.py +3 -4
  56. pyobo/sources/drugcentral.py +5 -7
  57. pyobo/sources/expasy.py +11 -12
  58. pyobo/sources/famplex.py +3 -5
  59. pyobo/sources/flybase.py +2 -4
  60. pyobo/sources/geonames.py +28 -10
  61. pyobo/sources/gmt_utils.py +5 -6
  62. pyobo/sources/go.py +4 -6
  63. pyobo/sources/gwascentral_phenotype.py +1 -3
  64. pyobo/sources/gwascentral_study.py +2 -3
  65. pyobo/sources/hgnc.py +8 -9
  66. pyobo/sources/hgncgenefamily.py +2 -4
  67. pyobo/sources/icd10.py +3 -4
  68. pyobo/sources/icd11.py +3 -4
  69. pyobo/sources/icd_utils.py +6 -7
  70. pyobo/sources/interpro.py +3 -5
  71. pyobo/sources/itis.py +1 -3
  72. pyobo/sources/kegg/__init__.py +0 -2
  73. pyobo/sources/kegg/api.py +3 -4
  74. pyobo/sources/kegg/genes.py +3 -4
  75. pyobo/sources/kegg/genome.py +1 -3
  76. pyobo/sources/kegg/pathway.py +5 -6
  77. pyobo/sources/mesh.py +19 -21
  78. pyobo/sources/mgi.py +1 -3
  79. pyobo/sources/mirbase.py +4 -6
  80. pyobo/sources/mirbase_constants.py +0 -2
  81. pyobo/sources/mirbase_family.py +1 -3
  82. pyobo/sources/mirbase_mature.py +1 -3
  83. pyobo/sources/msigdb.py +4 -5
  84. pyobo/sources/ncbigene.py +3 -5
  85. pyobo/sources/npass.py +2 -4
  86. pyobo/sources/omim_ps.py +1 -3
  87. pyobo/sources/pathbank.py +3 -5
  88. pyobo/sources/pfam.py +1 -3
  89. pyobo/sources/pfam_clan.py +1 -3
  90. pyobo/sources/pid.py +3 -5
  91. pyobo/sources/pombase.py +1 -3
  92. pyobo/sources/pubchem.py +5 -6
  93. pyobo/sources/reactome.py +2 -4
  94. pyobo/sources/rgd.py +3 -4
  95. pyobo/sources/rhea.py +9 -10
  96. pyobo/sources/ror.py +69 -22
  97. pyobo/sources/selventa/__init__.py +0 -2
  98. pyobo/sources/selventa/schem.py +1 -3
  99. pyobo/sources/selventa/scomp.py +1 -3
  100. pyobo/sources/selventa/sdis.py +1 -3
  101. pyobo/sources/selventa/sfam.py +1 -3
  102. pyobo/sources/sgd.py +1 -3
  103. pyobo/sources/slm.py +1 -3
  104. pyobo/sources/umls/__init__.py +0 -2
  105. pyobo/sources/umls/__main__.py +0 -2
  106. pyobo/sources/umls/get_synonym_types.py +1 -1
  107. pyobo/sources/umls/umls.py +2 -4
  108. pyobo/sources/uniprot/__init__.py +0 -2
  109. pyobo/sources/uniprot/uniprot.py +6 -6
  110. pyobo/sources/uniprot/uniprot_ptm.py +6 -5
  111. pyobo/sources/utils.py +3 -5
  112. pyobo/sources/wikipathways.py +1 -3
  113. pyobo/sources/zfin.py +2 -3
  114. pyobo/ssg/__init__.py +3 -2
  115. pyobo/struct/__init__.py +0 -2
  116. pyobo/struct/reference.py +13 -15
  117. pyobo/struct/struct.py +106 -99
  118. pyobo/struct/typedef.py +19 -10
  119. pyobo/struct/utils.py +0 -2
  120. pyobo/utils/__init__.py +0 -2
  121. pyobo/utils/cache.py +14 -6
  122. pyobo/utils/io.py +9 -10
  123. pyobo/utils/iter.py +5 -6
  124. pyobo/utils/misc.py +1 -3
  125. pyobo/utils/ndex_utils.py +6 -7
  126. pyobo/utils/path.py +5 -5
  127. pyobo/version.py +3 -5
  128. pyobo/xrefdb/__init__.py +0 -2
  129. pyobo/xrefdb/canonicalizer.py +27 -18
  130. pyobo/xrefdb/priority.py +0 -2
  131. pyobo/xrefdb/sources/__init__.py +9 -7
  132. pyobo/xrefdb/sources/biomappings.py +0 -2
  133. pyobo/xrefdb/sources/cbms2019.py +0 -2
  134. pyobo/xrefdb/sources/chembl.py +5 -7
  135. pyobo/xrefdb/sources/compath.py +1 -3
  136. pyobo/xrefdb/sources/famplex.py +3 -5
  137. pyobo/xrefdb/sources/gilda.py +0 -2
  138. pyobo/xrefdb/sources/intact.py +5 -5
  139. pyobo/xrefdb/sources/ncit.py +1 -3
  140. pyobo/xrefdb/sources/pubchem.py +2 -4
  141. pyobo/xrefdb/sources/wikidata.py +10 -5
  142. pyobo/xrefdb/xrefs_pipeline.py +15 -16
  143. {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/LICENSE +1 -1
  144. pyobo-0.11.0.dist-info/METADATA +723 -0
  145. pyobo-0.11.0.dist-info/RECORD +171 -0
  146. {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/WHEEL +1 -1
  147. pyobo-0.11.0.dist-info/entry_points.txt +2 -0
  148. pyobo/xrefdb/bengo.py +0 -44
  149. pyobo-0.10.11.dist-info/METADATA +0 -499
  150. pyobo-0.10.11.dist-info/RECORD +0 -169
  151. pyobo-0.10.11.dist-info/entry_points.txt +0 -15
  152. {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/top_level.txt +0 -0
pyobo/plugins.py CHANGED
@@ -1,9 +1,8 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Tools for loading entry points."""
4
2
 
3
+ from collections.abc import Iterable, Mapping
5
4
  from functools import lru_cache
6
- from typing import Callable, Iterable, Mapping, Optional
5
+ from typing import Callable, Optional
7
6
 
8
7
  from .struct import Obo
9
8
 
@@ -14,7 +13,7 @@ __all__ = [
14
13
  ]
15
14
 
16
15
 
17
- @lru_cache()
16
+ @lru_cache
18
17
  def _get_nomenclature_plugins() -> Mapping[str, Callable[[], Obo]]:
19
18
  from .sources import ontology_resolver
20
19
 
pyobo/py.typed ADDED
File without changes
pyobo/reader.py CHANGED
@@ -1,11 +1,10 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """OBO Readers."""
4
2
 
5
3
  import logging
4
+ from collections.abc import Iterable, Mapping
6
5
  from datetime import datetime
7
6
  from pathlib import Path
8
- from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
7
+ from typing import Any, Optional, Union
9
8
 
10
9
  import bioregistry
11
10
  import networkx as nx
@@ -13,7 +12,7 @@ from more_itertools import pairwise
13
12
  from tqdm.auto import tqdm
14
13
 
15
14
  from .constants import DATE_FORMAT, PROVENANCE_PREFIXES
16
- from .identifier_utils import MissingPrefix, normalize_curie
15
+ from .identifier_utils import MissingPrefixError, normalize_curie
17
16
  from .registries import curie_has_blacklisted_prefix, curie_is_blacklisted, remap_prefix
18
17
  from .struct import (
19
18
  Obo,
@@ -39,7 +38,7 @@ logger = logging.getLogger(__name__)
39
38
 
40
39
  # FIXME use bioontologies
41
40
  # RELATION_REMAPPINGS: Mapping[str, Tuple[str, str]] = bioontologies.upgrade.load()
42
- RELATION_REMAPPINGS: Mapping[str, Tuple[str, str]] = {
41
+ RELATION_REMAPPINGS: Mapping[str, tuple[str, str]] = {
43
42
  "part_of": part_of.pair,
44
43
  "has_part": has_part.pair,
45
44
  "develops_from": develops_from.pair,
@@ -75,7 +74,7 @@ def from_obo_path(
75
74
  return from_obonet(graph, strict=strict, **kwargs)
76
75
 
77
76
 
78
- def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noqa:C901
77
+ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo":
79
78
  """Get all of the terms from a OBO graph."""
80
79
  _ontology = graph.graph["ontology"]
81
80
  ontology = bioregistry.normalize_prefix(_ontology) # probably always okay
@@ -126,12 +125,12 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
126
125
  )
127
126
  for prefix, identifier, data in _iter_obo_graph(graph=graph, strict=strict)
128
127
  )
129
- references: Mapping[Tuple[str, str], Reference] = {
128
+ references: Mapping[tuple[str, str], Reference] = {
130
129
  reference.pair: reference for reference in reference_it
131
130
  }
132
131
 
133
132
  #: CURIEs to typedefs
134
- typedefs: Mapping[Tuple[str, str], TypeDef] = {
133
+ typedefs: Mapping[tuple[str, str], TypeDef] = {
135
134
  typedef.pair: typedef for typedef in iterate_graph_typedefs(graph, ontology)
136
135
  }
137
136
 
@@ -152,7 +151,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
152
151
 
153
152
  try:
154
153
  node_xrefs = list(iterate_node_xrefs(prefix=prefix, data=data, strict=strict))
155
- except MissingPrefix as e:
154
+ except MissingPrefixError as e:
156
155
  e.reference = reference
157
156
  raise e
158
157
  xrefs, provenance = [], []
@@ -171,7 +170,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
171
170
 
172
171
  try:
173
172
  alt_ids = list(iterate_node_alt_ids(data, strict=strict))
174
- except MissingPrefix as e:
173
+ except MissingPrefixError as e:
175
174
  e.reference = reference
176
175
  raise e
177
176
  n_alt_ids += len(alt_ids)
@@ -185,7 +184,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
185
184
  strict=strict,
186
185
  )
187
186
  )
188
- except MissingPrefix as e:
187
+ except MissingPrefixError as e:
189
188
  e.reference = reference
190
189
  raise e
191
190
  n_parents += len(parents)
@@ -220,7 +219,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
220
219
  strict=strict,
221
220
  )
222
221
  )
223
- except MissingPrefix as e:
222
+ except MissingPrefixError as e:
224
223
  e.reference = reference
225
224
  raise e
226
225
  for relation, reference in relations_references:
@@ -278,7 +277,7 @@ def _iter_obo_graph(
278
277
  graph: nx.MultiDiGraph,
279
278
  *,
280
279
  strict: bool = True,
281
- ) -> Iterable[Tuple[str, str, Mapping[str, Any]]]:
280
+ ) -> Iterable[tuple[str, str, Mapping[str, Any]]]:
282
281
  """Iterate over the nodes in the graph with the prefix stripped (if it's there)."""
283
282
  for node, data in graph.nodes(data=True):
284
283
  prefix, identifier = normalize_curie(node, strict=strict)
@@ -366,7 +365,8 @@ def iterate_graph_typedefs(
366
365
 
367
366
  def get_definition(
368
367
  data, *, prefix: str, identifier: str
369
- ) -> Union[Tuple[None, None], Tuple[str, List[Reference]]]:
368
+ ) -> Union[tuple[None, None], tuple[str, list[Reference]]]:
369
+ """Extract the definition from the data."""
370
370
  definition = data.get("def") # it's allowed not to have a definition
371
371
  if not definition:
372
372
  return None, None
@@ -379,7 +379,7 @@ def _extract_definition(
379
379
  prefix: str,
380
380
  identifier: str,
381
381
  strict: bool = False,
382
- ) -> Union[Tuple[None, None], Tuple[str, List[Reference]]]:
382
+ ) -> Union[tuple[None, None], tuple[str, list[Reference]]]:
383
383
  """Extract the definitions."""
384
384
  if not s.startswith('"'):
385
385
  raise ValueError("definition does not start with a quote")
@@ -405,7 +405,7 @@ def _get_first_nonquoted(s: str) -> Optional[int]:
405
405
  return None
406
406
 
407
407
 
408
- def _quote_split(s: str) -> Tuple[str, str]:
408
+ def _quote_split(s: str) -> tuple[str, str]:
409
409
  s = s.lstrip('"')
410
410
  i = _get_first_nonquoted(s)
411
411
  if i is None:
@@ -416,9 +416,7 @@ def _quote_split(s: str) -> Tuple[str, str]:
416
416
  def _clean_definition(s: str) -> str:
417
417
  # if '\t' in s:
418
418
  # logger.warning('has tab')
419
- return (
420
- s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace("\d", "") # noqa:W605
421
- )
419
+ return s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace(r"\d", "")
422
420
 
423
421
 
424
422
  def _extract_synonym(
@@ -516,7 +514,7 @@ HANDLED_PROPERTY_TYPES = {
516
514
 
517
515
  def iterate_node_properties(
518
516
  data: Mapping[str, Any], *, property_prefix: Optional[str] = None, term=None
519
- ) -> Iterable[Tuple[str, str]]:
517
+ ) -> Iterable[tuple[str, str]]:
520
518
  """Extract properties from a :mod:`obonet` node's data."""
521
519
  for prop_value_type in data.get("property_value", []):
522
520
  try:
@@ -568,7 +566,7 @@ def iterate_node_relationships(
568
566
  prefix: str,
569
567
  identifier: str,
570
568
  strict: bool = True,
571
- ) -> Iterable[Tuple[Reference, Reference]]:
569
+ ) -> Iterable[tuple[Reference, Reference]]:
572
570
  """Extract relationships from a :mod:`obonet` node's data."""
573
571
  for s in data.get("relationship", []):
574
572
  relation_curie, target_curie = s.split(" ")
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Extract registry information."""
4
2
 
5
3
  from .metaregistry import ( # noqa: F401
@@ -1,13 +1,11 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Load the manually curated metaregistry."""
4
2
 
5
3
  import itertools as itt
6
4
  import json
7
5
  import os
6
+ from collections.abc import Iterable, Mapping
8
7
  from functools import lru_cache
9
8
  from pathlib import Path
10
- from typing import Iterable, Mapping, Set, Tuple
11
9
 
12
10
  import bioregistry
13
11
 
@@ -25,7 +23,7 @@ def has_no_download(prefix: str) -> bool:
25
23
 
26
24
 
27
25
  @lru_cache(maxsize=1)
28
- def _no_download() -> Set[str]:
26
+ def _no_download() -> set[str]:
29
27
  """Get the list of prefixes not available as OBO."""
30
28
  return {
31
29
  prefix
@@ -41,7 +39,7 @@ def curie_has_blacklisted_prefix(curie: str) -> bool:
41
39
 
42
40
 
43
41
  @lru_cache(maxsize=1)
44
- def get_xrefs_prefix_blacklist() -> Set[str]:
42
+ def get_xrefs_prefix_blacklist() -> set[str]:
45
43
  """Get the set of blacklisted xref prefixes."""
46
44
  #: Xrefs starting with these prefixes will be ignored
47
45
  prefixes = set(
@@ -65,7 +63,7 @@ def curie_has_blacklisted_suffix(curie: str) -> bool:
65
63
 
66
64
 
67
65
  @lru_cache(maxsize=1)
68
- def get_xrefs_suffix_blacklist() -> Set[str]:
66
+ def get_xrefs_suffix_blacklist() -> set[str]:
69
67
  """Get the set of blacklisted xref suffixes."""
70
68
  #: Xrefs ending with these suffixes will be ignored
71
69
  return set(CURATED_REGISTRY["blacklists"]["suffix"])
@@ -77,7 +75,7 @@ def curie_is_blacklisted(curie: str) -> bool:
77
75
 
78
76
 
79
77
  @lru_cache(maxsize=1)
80
- def get_xrefs_blacklist() -> Set[str]:
78
+ def get_xrefs_blacklist() -> set[str]:
81
79
  """Get the set of blacklisted xrefs."""
82
80
  rv = set()
83
81
  for x in CURATED_REGISTRY["blacklists"]["full"]:
@@ -123,7 +121,7 @@ def remap_prefix(curie: str) -> str:
123
121
  return curie
124
122
 
125
123
 
126
- def iter_cached_obo() -> Iterable[Tuple[str, str]]:
124
+ def iter_cached_obo() -> Iterable[tuple[str, str]]:
127
125
  """Iterate over cached OBO paths."""
128
126
  for prefix in os.listdir(RAW_DIRECTORY):
129
127
  if prefix in GLOBAL_SKIP or has_no_download(prefix) or bioregistry.is_deprecated(prefix):
pyobo/resource_utils.py CHANGED
@@ -1,9 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Resource utilities for PyOBO."""
4
2
 
3
+ from collections.abc import Sequence
5
4
  from functools import lru_cache
6
- from typing import Sequence
7
5
 
8
6
  import click
9
7
  import pandas as pd
@@ -1,3 +1 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Pre-cached resources for PyOBO."""
@@ -1,12 +1,11 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Loading of the NCBI Taxonomy names."""
4
2
 
5
3
  import csv
6
4
  import gzip
5
+ from collections.abc import Mapping
7
6
  from functools import lru_cache
8
7
  from pathlib import Path
9
- from typing import Mapping, Optional, Union
8
+ from typing import Optional, Union
10
9
 
11
10
  import requests
12
11
 
pyobo/resources/ro.py CHANGED
@@ -1,11 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Loading of the relations ontology names."""
4
2
 
5
3
  import csv
6
4
  import os
5
+ from collections.abc import Mapping
7
6
  from functools import lru_cache
8
- from typing import Mapping, Tuple
9
7
 
10
8
  import requests
11
9
 
@@ -20,7 +18,7 @@ PREFIX = "http://purl.obolibrary.org/obo/"
20
18
 
21
19
 
22
20
  @lru_cache(maxsize=1)
23
- def load_ro() -> Mapping[Tuple[str, str], str]:
21
+ def load_ro() -> Mapping[tuple[str, str], str]:
24
22
  """Load the relation ontology names."""
25
23
  if not os.path.exists(PATH):
26
24
  download()
@@ -0,0 +1,15 @@
1
+ # Sources
2
+
3
+ 1. Create a new module in `pyobo.sources` named with the prefix for the resource you're ontologizing
4
+ 2. Make sure your resource has a corresponding prefix in [the Bioregistry](https://github.com/biopragmatics/bioregistry)
5
+ 3. Subclass the `pyobo.Obo` class to represent your resource
6
+ 4. Add your resource to the list in `pyobo.sources.__init__`
7
+
8
+ ## What is in scope?
9
+
10
+ 1. Biomedical, semantic web, bibliographic, life sciences, and related natural sciences resources are welcome
11
+ 2. The source you want to ontologize should be an identifier resource, i.e., it mints its own identifiers. If you want
12
+ to ontologize some database that reuses some other identifier resource's identifiers, then this isn't the right
13
+ place.
14
+ 3. Resources that are not possible to download automatically are not in scope for PyOBO. Reproducibility and reusability
15
+ are core values of this software
pyobo/sources/__init__.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Sources of OBO content."""
4
2
 
5
3
  from class_resolver import ClassResolver
@@ -12,6 +10,7 @@ from .civic_gene import CIVICGeneGetter
12
10
  from .complexportal import ComplexPortalGetter
13
11
  from .conso import CONSOGetter
14
12
  from .cpt import CPTGetter
13
+ from .credit import CreditGetter
15
14
  from .cvx import CVXGetter
16
15
  from .depmap import DepMapGetter
17
16
  from .dictybase_gene import DictybaseGetter
@@ -69,6 +68,7 @@ __all__ = [
69
68
  "CVXGetter",
70
69
  "ChEMBLCompoundGetter",
71
70
  "ComplexPortalGetter",
71
+ "CreditGetter",
72
72
  "DepMapGetter",
73
73
  "DictybaseGetter",
74
74
  "DrugBankGetter",
pyobo/sources/agrovoc.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for AGROVOC."""
4
2
 
5
3
  import pystow
@@ -11,6 +9,8 @@ __all__ = [
11
9
  "ensure_agrovoc_graph",
12
10
  ]
13
11
 
12
+ PREFIX = "agrovoc"
13
+
14
14
 
15
15
  def ensure_agrovoc_graph(version: str) -> Graph:
16
16
  """Download and parse the given version of AGROVOC."""
@@ -20,5 +20,5 @@ def ensure_agrovoc_graph(version: str) -> Graph:
20
20
  graph.bind("skosxl", "http://www.w3.org/2008/05/skos-xl#")
21
21
  graph.bind("skos", SKOS)
22
22
  graph.bind("dcterms", DCTERMS)
23
- graph.bind("agrovoc", "http://aims.fao.org/aos/agrontology#")
23
+ graph.bind(PREFIX, "http://aims.fao.org/aos/agrontology#")
24
24
  return graph
@@ -1,16 +1,15 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for the Antibody Registry."""
4
2
 
5
3
  import logging
6
- from typing import Iterable, Mapping, Optional
4
+ from collections.abc import Iterable, Mapping
5
+ from typing import Optional
7
6
 
8
- import bioversions
9
7
  import pandas as pd
10
8
  from bioregistry.utils import removeprefix
11
9
  from tqdm.auto import tqdm
12
10
 
13
11
  from pyobo import Obo, Term
12
+ from pyobo.api.utils import get_version
14
13
  from pyobo.utils.path import ensure_df
15
14
 
16
15
  __all__ = [
@@ -27,7 +26,7 @@ CHUNKSIZE = 20_000
27
26
  def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame:
28
27
  """Get the BioGRID identifiers mapping dataframe."""
29
28
  if version is None:
30
- version = bioversions.get_version(PREFIX)
29
+ version = get_version(PREFIX)
31
30
  df = ensure_df(
32
31
  PREFIX,
33
32
  url=URL,
pyobo/sources/biogrid.py CHANGED
@@ -1,13 +1,12 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Extract and convert BioGRID identifiers."""
4
2
 
3
+ from collections.abc import Mapping
5
4
  from functools import partial
6
- from typing import Mapping, Optional
5
+ from typing import Optional
7
6
 
8
- import bioversions
9
7
  import pandas as pd
10
8
 
9
+ from pyobo.api.utils import get_version
11
10
  from pyobo.resources.ncbitaxon import get_ncbitaxon_id
12
11
  from pyobo.utils.cache import cached_mapping
13
12
  from pyobo.utils.path import ensure_df, prefix_directory_join
@@ -52,7 +51,7 @@ def _lookup(name: str) -> Optional[str]:
52
51
 
53
52
  def get_df() -> pd.DataFrame:
54
53
  """Get the BioGRID identifiers mapping dataframe."""
55
- version = bioversions.get_version("biogrid")
54
+ version = get_version("biogrid")
56
55
  url = f"{BASE_URL}/BIOGRID-{version}/BIOGRID-IDENTIFIERS-{version}.tab.zip"
57
56
  df = ensure_df(PREFIX, url=url, skiprows=28, dtype=str, version=version)
58
57
  df["taxonomy_id"] = df["ORGANISM_OFFICIAL_NAME"].map(_lookup)
@@ -65,7 +64,7 @@ def get_df() -> pd.DataFrame:
65
64
  "cache",
66
65
  "xrefs",
67
66
  name="ncbigene.tsv",
68
- version=partial(bioversions.get_version, PREFIX),
67
+ version=partial(get_version, PREFIX),
69
68
  ),
70
69
  header=["biogrid_id", "ncbigene_id"],
71
70
  )
@@ -77,7 +76,8 @@ def get_ncbigene_mapping() -> Mapping[str, str]:
77
76
  .. code-block:: python
78
77
 
79
78
  from pyobo import get_filtered_xrefs
80
- biogrid_ncbigene_mapping = get_filtered_xrefs('biogrid', 'ncbigene')
79
+
80
+ biogrid_ncbigene_mapping = get_filtered_xrefs("biogrid", "ncbigene")
81
81
  """
82
82
  df = get_df()
83
83
  df = df.loc[df["IDENTIFIER_TYPE"] == "ENTREZ_GENE", ["BIOGRID_ID", "IDENTIFIER_VALUE"]]
pyobo/sources/ccle.py CHANGED
@@ -1,10 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Get the CCLE Cells, provided by cBioPortal."""
4
2
 
5
3
  import tarfile
4
+ from collections.abc import Iterable
6
5
  from pathlib import Path
7
- from typing import Iterable, Optional
6
+ from typing import Optional
8
7
 
9
8
  import pandas as pd
10
9
  import pystow
@@ -25,7 +24,7 @@ class CCLEGetter(Obo):
25
24
 
26
25
  ontology = bioregistry_key = PREFIX
27
26
 
28
- def __post_init__(self): # noqa: D105
27
+ def __post_init__(self):
29
28
  self.data_version = VERSION
30
29
 
31
30
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
pyobo/sources/cgnc.py CHANGED
@@ -1,9 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for CGNC."""
4
2
 
5
3
  import logging
6
- from typing import Iterable
4
+ from collections.abc import Iterable
7
5
 
8
6
  import pandas as pd
9
7
 
pyobo/sources/chebi.py CHANGED
@@ -1,8 +1,6 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for ChEBI."""
4
2
 
5
- from typing import Mapping, Set, Tuple
3
+ from collections.abc import Mapping
6
4
 
7
5
  from ..api import get_filtered_properties_mapping, get_filtered_relations_df
8
6
  from ..struct import Reference, TypeDef
@@ -33,7 +31,7 @@ def get_chebi_smiles_id_mapping() -> Mapping[str, str]:
33
31
  has_role = TypeDef(reference=Reference(prefix="chebi", identifier="has_role"))
34
32
 
35
33
 
36
- def get_chebi_role_to_children() -> Mapping[str, Set[Tuple[str, str]]]:
34
+ def get_chebi_role_to_children() -> Mapping[str, set[tuple[str, str]]]:
37
35
  """Get the ChEBI role to children mapping."""
38
36
  df = get_filtered_relations_df("chebi", relation=has_role)
39
37
  return multisetdict((role_id, ("chebi", chemical_id)) for chemical_id, _, role_id in df.values)
pyobo/sources/chembl.py CHANGED
@@ -1,13 +1,11 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for ChEMBL.
4
2
 
5
3
  Run with ``python -m pyobo.sources.chembl -vv``.
6
4
  """
7
5
 
8
6
  import logging
7
+ from collections.abc import Iterable
9
8
  from contextlib import closing
10
- from typing import Iterable
11
9
 
12
10
  import chembl_downloader
13
11
 
@@ -1,8 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for CiVIC Genes."""
4
2
 
5
- from typing import Iterable, Optional
3
+ from collections.abc import Iterable
4
+ from typing import Optional
6
5
 
7
6
  import pandas as pd
8
7
 
@@ -1,9 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for ComplexPortal."""
4
2
 
5
3
  import logging
6
- from typing import Iterable, List, Tuple
4
+ from collections.abc import Iterable
7
5
 
8
6
  import pandas as pd
9
7
  from tqdm.auto import tqdm
@@ -52,7 +50,7 @@ DTYPE = {
52
50
  }
53
51
 
54
52
 
55
- def _parse_members(s) -> List[Tuple[Reference, str]]:
53
+ def _parse_members(s) -> list[tuple[Reference, str]]:
56
54
  if pd.isna(s):
57
55
  return []
58
56
 
@@ -68,7 +66,7 @@ def _parse_members(s) -> List[Tuple[Reference, str]]:
68
66
  return rv
69
67
 
70
68
 
71
- def _parse_xrefs(s) -> List[Tuple[Reference, str]]:
69
+ def _parse_xrefs(s) -> list[tuple[Reference, str]]:
72
70
  if pd.isna(s):
73
71
  return []
74
72
 
pyobo/sources/conso.py CHANGED
@@ -1,8 +1,6 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for CONSO."""
4
2
 
5
- from typing import Iterable, List
3
+ from collections.abc import Iterable
6
4
 
7
5
  import pandas as pd
8
6
 
@@ -68,7 +66,7 @@ def iter_terms() -> Iterable[Term]:
68
66
  for _, row in terms_df.iterrows():
69
67
  if row["Name"] == "WITHDRAWN":
70
68
  continue
71
- provenance: List[Reference] = []
69
+ provenance: list[Reference] = []
72
70
  for curie in row["References"].split(","):
73
71
  curie = curie.strip()
74
72
  if not curie:
pyobo/sources/cpt.py CHANGED
@@ -1,8 +1,6 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for CPT."""
4
2
 
5
- from typing import Iterable
3
+ from collections.abc import Iterable
6
4
 
7
5
  import pandas as pd
8
6
 
@@ -0,0 +1,68 @@
1
+ """Converter for the Contributor Roles Taxonomy."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from collections.abc import Iterable
7
+
8
+ from more_itertools import chunked
9
+
10
+ from pyobo.struct import Obo, Term
11
+ from pyobo.utils.path import ensure_path
12
+
13
+ __all__ = [
14
+ "CreditGetter",
15
+ ]
16
+
17
+ url = "https://api.github.com/repos/CASRAI-CRedIT/Dictionary/contents/Picklists/Contributor%20Roles"
18
+ PREFIX = "credit"
19
+
20
+
21
+ class CreditGetter(Obo):
22
+ """An ontology representation of the Contributor Roles Taxonomy."""
23
+
24
+ ontology = PREFIX
25
+ static_version = "2022"
26
+ idspaces = {
27
+ PREFIX: "https://credit.niso.org/contributor-roles/",
28
+ }
29
+
30
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
31
+ """Iterate over terms in the ontology."""
32
+ return get_terms(force=force)
33
+
34
+
35
+ def get_obo(force: bool = False) -> Obo:
36
+ """Get RGD as OBO."""
37
+ return CreditGetter(force=force)
38
+
39
+
40
+ def get_terms(force: bool = False) -> list[Term]:
41
+ """Get terms from the Contributor Roles Taxonomy via GitHub."""
42
+ path = ensure_path(PREFIX, url=url, name="picklist-api.json", force=force)
43
+ with open(path) as f:
44
+ data = json.load(f)
45
+ terms = []
46
+ for x in data:
47
+ name = x["name"].removesuffix(".md").lower()
48
+
49
+ pp = ensure_path(PREFIX, "picklist", url=x["download_url"], backend="requests")
50
+ with open(pp) as f:
51
+ header, *rest = f.read().splitlines()
52
+ name = header = header.removeprefix("# Contributor Roles/")
53
+ dd = {k.removeprefix("## "): v for k, v in chunked(rest, 2)}
54
+ identifier = (
55
+ dd["Canonical URL"]
56
+ .removeprefix("https://credit.niso.org/contributor-roles/")
57
+ .rstrip("/")
58
+ )
59
+ desc = dd["Short definition"]
60
+ terms.append(
61
+ Term.from_triple(prefix=PREFIX, identifier=identifier, name=name, definition=desc)
62
+ )
63
+
64
+ return terms
65
+
66
+
67
+ if __name__ == "__main__":
68
+ get_obo(force=True).write_default(write_obo=True)
pyobo/sources/cvx.py CHANGED
@@ -1,9 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for CVX."""
4
2
 
5
3
  from collections import defaultdict
6
- from typing import Iterable
4
+ from collections.abc import Iterable
7
5
 
8
6
  import pandas as pd
9
7
 
pyobo/sources/depmap.py CHANGED
@@ -1,8 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """DepMap cell lines."""
4
2
 
5
- from typing import Iterable, Optional
3
+ from collections.abc import Iterable
4
+ from typing import Optional
6
5
 
7
6
  import pandas as pd
8
7
  import pystow
@@ -113,7 +112,7 @@ def ensure(version: str, force: bool = False) -> pd.DataFrame:
113
112
  url=get_url(version=version),
114
113
  name="sample_info.tsv",
115
114
  force=force,
116
- read_csv_kwargs=dict(sep=",", dtype=str),
115
+ read_csv_kwargs={"sep": ",", "dtype": str},
117
116
  )
118
117
 
119
118