pyobo 0.10.10__py3-none-any.whl → 0.10.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. pyobo/api/alts.py +13 -8
  2. pyobo/api/hierarchy.py +9 -5
  3. pyobo/api/metadata.py +6 -3
  4. pyobo/api/names.py +34 -11
  5. pyobo/api/relations.py +11 -3
  6. pyobo/api/species.py +3 -3
  7. pyobo/api/typedefs.py +6 -2
  8. pyobo/api/utils.py +5 -0
  9. pyobo/api/xrefs.py +10 -3
  10. pyobo/aws.py +12 -7
  11. pyobo/cli/lookup.py +5 -4
  12. pyobo/constants.py +31 -10
  13. pyobo/gilda_utils.py +21 -0
  14. pyobo/identifier_utils.py +22 -5
  15. pyobo/reader.py +1 -1
  16. pyobo/sources/__init__.py +2 -0
  17. pyobo/sources/antibodyregistry.py +7 -6
  18. pyobo/sources/biogrid.py +8 -4
  19. pyobo/sources/ccle.py +5 -5
  20. pyobo/sources/credit.py +68 -0
  21. pyobo/sources/geonames.py +27 -9
  22. pyobo/sources/hgnc.py +2 -2
  23. pyobo/sources/mesh.py +9 -7
  24. pyobo/sources/msigdb.py +1 -1
  25. pyobo/sources/npass.py +1 -1
  26. pyobo/sources/pubchem.py +3 -3
  27. pyobo/sources/rgd.py +1 -1
  28. pyobo/sources/rhea.py +2 -2
  29. pyobo/sources/ror.py +67 -21
  30. pyobo/sources/uniprot/uniprot.py +2 -2
  31. pyobo/struct/struct.py +4 -3
  32. pyobo/struct/typedef.py +10 -0
  33. pyobo/utils/path.py +2 -1
  34. pyobo/version.py +1 -1
  35. pyobo/xrefdb/sources/__init__.py +6 -3
  36. pyobo/xrefdb/sources/chembl.py +5 -5
  37. pyobo/xrefdb/sources/pubchem.py +3 -2
  38. pyobo/xrefdb/sources/wikidata.py +8 -1
  39. {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/METADATA +23 -23
  40. {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/RECORD +44 -44
  41. {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/WHEEL +1 -1
  42. pyobo/xrefdb/bengo.py +0 -44
  43. {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/LICENSE +0 -0
  44. {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/entry_points.txt +0 -0
  45. {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/top_level.txt +0 -0
pyobo/api/alts.py CHANGED
@@ -28,12 +28,15 @@ NO_ALTS = {
28
28
 
29
29
  @lru_cache()
30
30
  @wrap_norm_prefix
31
- def get_id_to_alts(prefix: str, force: bool = False) -> Mapping[str, List[str]]:
31
+ def get_id_to_alts(
32
+ prefix: str, *, force: bool = False, version: Optional[str] = None
33
+ ) -> Mapping[str, List[str]]:
32
34
  """Get alternate identifiers."""
33
35
  if prefix in NO_ALTS:
34
36
  return {}
35
37
 
36
- version = get_version(prefix)
38
+ if version is None:
39
+ version = get_version(prefix)
37
40
  path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version)
38
41
  header = [f"{prefix}_id", "alt_id"]
39
42
 
@@ -51,26 +54,28 @@ def get_id_to_alts(prefix: str, force: bool = False) -> Mapping[str, List[str]]:
51
54
 
52
55
  @lru_cache()
53
56
  @wrap_norm_prefix
54
- def get_alts_to_id(prefix: str, force: bool = False) -> Mapping[str, str]:
57
+ def get_alts_to_id(
58
+ prefix: str, *, force: bool = False, version: Optional[str] = None
59
+ ) -> Mapping[str, str]:
55
60
  """Get alternative id to primary id mapping."""
56
61
  return {
57
62
  alt: primary
58
- for primary, alts in get_id_to_alts(prefix, force=force).items()
63
+ for primary, alts in get_id_to_alts(prefix, force=force, version=version).items()
59
64
  for alt in alts
60
65
  }
61
66
 
62
67
 
63
- def get_primary_curie(curie: str) -> Optional[str]:
68
+ def get_primary_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]:
64
69
  """Get the primary curie for an entity."""
65
70
  prefix, identifier = normalize_curie(curie)
66
- primary_identifier = get_primary_identifier(prefix, identifier)
71
+ primary_identifier = get_primary_identifier(prefix, identifier, version=version)
67
72
  if primary_identifier is not None:
68
73
  return f"{prefix}:{primary_identifier}"
69
74
  return None
70
75
 
71
76
 
72
77
  @wrap_norm_prefix
73
- def get_primary_identifier(prefix: str, identifier: str) -> str:
78
+ def get_primary_identifier(prefix: str, identifier: str, *, version: Optional[str] = None) -> str:
74
79
  """Get the primary identifier for an entity.
75
80
 
76
81
  :param prefix: The name of the resource
@@ -82,7 +87,7 @@ def get_primary_identifier(prefix: str, identifier: str) -> str:
82
87
  if prefix in NO_ALTS: # TODO later expand list to other namespaces with no alts
83
88
  return identifier
84
89
 
85
- alts_to_id = get_alts_to_id(prefix)
90
+ alts_to_id = get_alts_to_id(prefix, version=version)
86
91
  if alts_to_id and identifier in alts_to_id:
87
92
  return alts_to_id[identifier]
88
93
  return identifier
pyobo/api/hierarchy.py CHANGED
@@ -13,6 +13,7 @@ from .properties import get_filtered_properties_mapping
13
13
  from .relations import get_filtered_relations_df
14
14
  from ..identifier_utils import wrap_norm_prefix
15
15
  from ..struct import TypeDef, has_member, is_a, part_of
16
+ from ..struct.reference import Reference
16
17
 
17
18
  __all__ = [
18
19
  "get_hierarchy",
@@ -24,7 +25,6 @@ __all__ = [
24
25
  "get_children",
25
26
  ]
26
27
 
27
- from ..struct.reference import Reference
28
28
 
29
29
  logger = logging.getLogger(__name__)
30
30
 
@@ -154,14 +154,16 @@ def _get_hierarchy_helper(
154
154
  return rv
155
155
 
156
156
 
157
- def is_descendent(prefix, identifier, ancestor_prefix, ancestor_identifier) -> bool:
157
+ def is_descendent(
158
+ prefix, identifier, ancestor_prefix, ancestor_identifier, *, version: Optional[str] = None
159
+ ) -> bool:
158
160
  """Check that the first identifier has the second as a descendent.
159
161
 
160
162
  Check that go:0070246 ! natural killer cell apoptotic process is a
161
163
  descendant of go:0006915 ! apoptotic process::
162
164
  >>> assert is_descendent('go', '0070246', 'go', '0006915')
163
165
  """
164
- descendants = get_descendants(ancestor_prefix, ancestor_identifier)
166
+ descendants = get_descendants(ancestor_prefix, ancestor_identifier, version=version)
165
167
  return descendants is not None and f"{prefix}:{identifier}" in descendants
166
168
 
167
169
 
@@ -224,13 +226,15 @@ def get_children(
224
226
  return set(hierarchy.predecessors(curie))
225
227
 
226
228
 
227
- def has_ancestor(prefix, identifier, ancestor_prefix, ancestor_identifier) -> bool:
229
+ def has_ancestor(
230
+ prefix, identifier, ancestor_prefix, ancestor_identifier, *, version: Optional[str] = None
231
+ ) -> bool:
228
232
  """Check that the first identifier has the second as an ancestor.
229
233
 
230
234
  Check that go:0008219 ! cell death is an ancestor of go:0006915 ! apoptotic process::
231
235
  >>> assert has_ancestor('go', '0006915', 'go', '0008219')
232
236
  """
233
- ancestors = get_ancestors(prefix, identifier)
237
+ ancestors = get_ancestors(prefix, identifier, version=version)
234
238
  return ancestors is not None and f"{ancestor_prefix}:{ancestor_identifier}" in ancestors
235
239
 
236
240
 
pyobo/api/metadata.py CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  import logging
6
6
  from functools import lru_cache
7
- from typing import Mapping
7
+ from typing import Mapping, Optional
8
8
 
9
9
  from .utils import get_version
10
10
  from ..getters import get_ontology
@@ -21,9 +21,12 @@ logger = logging.getLogger(__name__)
21
21
 
22
22
  @lru_cache()
23
23
  @wrap_norm_prefix
24
- def get_metadata(prefix: str, force: bool = False) -> Mapping[str, str]:
24
+ def get_metadata(
25
+ prefix: str, *, force: bool = False, version: Optional[str] = None
26
+ ) -> Mapping[str, str]:
25
27
  """Get metadata for the ontology."""
26
- version = get_version(prefix)
28
+ if version is None:
29
+ version = get_version(prefix)
27
30
  path = prefix_cache_join(prefix, name="metadata.json", version=version)
28
31
 
29
32
  @cached_json(path=path, force=force)
pyobo/api/names.py CHANGED
@@ -2,11 +2,15 @@
2
2
 
3
3
  """High-level API for nomenclature."""
4
4
 
5
+ from __future__ import annotations
6
+
5
7
  import logging
6
8
  import subprocess
7
9
  from functools import lru_cache
8
10
  from typing import Callable, List, Mapping, Optional, Set, TypeVar
9
11
 
12
+ from curies import Reference, ReferenceTuple
13
+
10
14
  from .alts import get_primary_identifier
11
15
  from .utils import get_version
12
16
  from ..getters import NoBuild, get_ontology
@@ -32,6 +36,8 @@ logger = logging.getLogger(__name__)
32
36
 
33
37
  def get_name_by_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]:
34
38
  """Get the name for a CURIE, if possible."""
39
+ if version is None:
40
+ version = get_version(curie.split(":")[0])
35
41
  prefix, identifier = normalize_curie(curie)
36
42
  if prefix and identifier:
37
43
  return get_name(prefix, identifier, version=version)
@@ -40,7 +46,8 @@ def get_name_by_curie(curie: str, *, version: Optional[str] = None) -> Optional[
40
46
 
41
47
  X = TypeVar("X")
42
48
 
43
- NO_BUILD_PREFIXES = set()
49
+ NO_BUILD_PREFIXES: Set[str] = set()
50
+ NO_BUILD_LOGGED: Set = set()
44
51
 
45
52
 
46
53
  def _help_get(
@@ -59,8 +66,10 @@ def _help_get(
59
66
  logger.warning("[%s] unable to look up results with %s", prefix, f)
60
67
  NO_BUILD_PREFIXES.add(prefix)
61
68
  return None
62
- except ValueError:
63
- logger.warning("[%s] unable to look up results with %s", prefix, f)
69
+ except ValueError as e:
70
+ if prefix not in NO_BUILD_PREFIXES:
71
+ logger.warning("[%s] value error while looking up results with %s: %s", prefix, f, e)
72
+ NO_BUILD_PREFIXES.add(prefix)
64
73
  return None
65
74
 
66
75
  if not mapping:
@@ -69,20 +78,28 @@ def _help_get(
69
78
  NO_BUILD_PREFIXES.add(prefix)
70
79
  return None
71
80
 
72
- primary_id = get_primary_identifier(prefix, identifier)
81
+ primary_id = get_primary_identifier(prefix, identifier, version=version)
73
82
  return mapping.get(primary_id)
74
83
 
75
84
 
76
85
  @wrap_norm_prefix
77
- def get_name(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
86
+ def get_name(
87
+ prefix: str | Reference | ReferenceTuple,
88
+ identifier: Optional[str] = None,
89
+ /,
90
+ *,
91
+ version: Optional[str] = None,
92
+ ) -> Optional[str]:
78
93
  """Get the name for an entity."""
79
- return _help_get(get_id_name_mapping, prefix, identifier, version=version)
94
+ if isinstance(prefix, (ReferenceTuple, Reference)):
95
+ prefix, identifier = prefix.prefix, prefix.identifier
96
+ return _help_get(get_id_name_mapping, prefix, identifier, version=version) # type:ignore
80
97
 
81
98
 
82
99
  @lru_cache()
83
100
  @wrap_norm_prefix
84
101
  def get_ids(
85
- prefix: str, force: bool = False, strict: bool = False, version: Optional[str] = None
102
+ prefix: str, *, force: bool = False, strict: bool = False, version: Optional[str] = None
86
103
  ) -> Set[str]:
87
104
  """Get the set of identifiers for this prefix."""
88
105
  if prefix == "ncbigene":
@@ -150,16 +167,22 @@ def get_id_name_mapping(
150
167
 
151
168
  @lru_cache()
152
169
  @wrap_norm_prefix
153
- def get_name_id_mapping(prefix: str, force: bool = False) -> Mapping[str, str]:
170
+ def get_name_id_mapping(
171
+ prefix: str, *, force: bool = False, version: Optional[str] = None
172
+ ) -> Mapping[str, str]:
154
173
  """Get a name to identifier mapping for the OBO file."""
155
- id_name = get_id_name_mapping(prefix=prefix, force=force)
174
+ id_name = get_id_name_mapping(prefix=prefix, force=force, version=version)
156
175
  return {v: k for k, v in id_name.items()}
157
176
 
158
177
 
159
178
  @wrap_norm_prefix
160
- def get_definition(prefix: str, identifier: str) -> Optional[str]:
179
+ def get_definition(
180
+ prefix: str, identifier: str | None = None, *, version: Optional[str] = None
181
+ ) -> Optional[str]:
161
182
  """Get the definition for an entity."""
162
- return _help_get(get_id_definition_mapping, prefix, identifier)
183
+ if identifier is None:
184
+ prefix, _, identifier = prefix.rpartition(":")
185
+ return _help_get(get_id_definition_mapping, prefix, identifier, version=version)
163
186
 
164
187
 
165
188
  def get_id_definition_mapping(
pyobo/api/relations.py CHANGED
@@ -48,9 +48,11 @@ def get_relations_df(
48
48
  force: bool = False,
49
49
  wide: bool = False,
50
50
  strict: bool = True,
51
+ version: Optional[str] = None,
51
52
  ) -> pd.DataFrame:
52
53
  """Get all relations from the OBO."""
53
- version = get_version(prefix)
54
+ if version is None:
55
+ version = get_version(prefix)
54
56
  path = prefix_cache_join(prefix, name="relations.tsv", version=version)
55
57
 
56
58
  @cached_df(path=path, dtype=str, force=force)
@@ -118,9 +120,11 @@ def get_id_multirelations_mapping(
118
120
  *,
119
121
  use_tqdm: bool = False,
120
122
  force: bool = False,
123
+ version: Optional[str] = None,
121
124
  ) -> Mapping[str, List[Reference]]:
122
125
  """Get the OBO file and output a synonym dictionary."""
123
- version = get_version(prefix)
126
+ if version is None:
127
+ version = get_version(prefix)
124
128
  ontology = get_ontology(prefix, force=force, version=version)
125
129
  return ontology.get_id_multirelations_mapping(typedef=typedef, use_tqdm=use_tqdm)
126
130
 
@@ -134,6 +138,7 @@ def get_relation_mapping(
134
138
  *,
135
139
  use_tqdm: bool = False,
136
140
  force: bool = False,
141
+ version: Optional[str] = None,
137
142
  ) -> Mapping[str, str]:
138
143
  """Get relations from identifiers in the source prefix to target prefix with the given relation.
139
144
 
@@ -147,7 +152,8 @@ def get_relation_mapping(
147
152
  >>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping('hgnc', 'ro:HOM0000017', 'mgi')
148
153
  >>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id]
149
154
  """
150
- version = get_version(prefix)
155
+ if version is None:
156
+ version = get_version(prefix)
151
157
  ontology = get_ontology(prefix, force=force, version=version)
152
158
  return ontology.get_relation_mapping(
153
159
  relation=relation, target_prefix=target_prefix, use_tqdm=use_tqdm
@@ -163,6 +169,7 @@ def get_relation(
163
169
  *,
164
170
  use_tqdm: bool = False,
165
171
  force: bool = False,
172
+ **kwargs,
166
173
  ) -> Optional[str]:
167
174
  """Get the target identifier corresponding to the given relationship from the source prefix/identifier pair.
168
175
 
@@ -181,6 +188,7 @@ def get_relation(
181
188
  target_prefix=target_prefix,
182
189
  use_tqdm=use_tqdm,
183
190
  force=force,
191
+ **kwargs,
184
192
  )
185
193
  return relation_mapping.get(source_identifier)
186
194
 
pyobo/api/species.py CHANGED
@@ -22,13 +22,13 @@ logger = logging.getLogger(__name__)
22
22
 
23
23
 
24
24
  @wrap_norm_prefix
25
- def get_species(prefix: str, identifier: str) -> Optional[str]:
25
+ def get_species(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
26
26
  """Get the species."""
27
27
  if prefix == "uniprot":
28
28
  raise NotImplementedError
29
29
 
30
30
  try:
31
- id_species = get_id_species_mapping(prefix)
31
+ id_species = get_id_species_mapping(prefix, version=version)
32
32
  except NoBuild:
33
33
  logger.warning("unable to look up species for prefix %s", prefix)
34
34
  return None
@@ -37,7 +37,7 @@ def get_species(prefix: str, identifier: str) -> Optional[str]:
37
37
  logger.warning("no results produced for prefix %s", prefix)
38
38
  return None
39
39
 
40
- primary_id = get_primary_identifier(prefix, identifier)
40
+ primary_id = get_primary_identifier(prefix, identifier, version=version)
41
41
  return id_species.get(primary_id)
42
42
 
43
43
 
pyobo/api/typedefs.py CHANGED
@@ -4,6 +4,7 @@
4
4
 
5
5
  import logging
6
6
  from functools import lru_cache
7
+ from typing import Optional
7
8
 
8
9
  import pandas as pd
9
10
 
@@ -22,9 +23,12 @@ logger = logging.getLogger(__name__)
22
23
 
23
24
  @lru_cache()
24
25
  @wrap_norm_prefix
25
- def get_typedef_df(prefix: str, force: bool = False) -> pd.DataFrame:
26
+ def get_typedef_df(
27
+ prefix: str, *, force: bool = False, version: Optional[str] = None
28
+ ) -> pd.DataFrame:
26
29
  """Get an identifier to name mapping for the typedefs in an OBO file."""
27
- version = get_version(prefix)
30
+ if version is None:
31
+ version = get_version(prefix)
28
32
  path = prefix_cache_join(prefix, name="typedefs.tsv", version=version)
29
33
 
30
34
  @cached_df(path=path, dtype=str, force=force)
pyobo/api/utils.py CHANGED
@@ -7,6 +7,7 @@ from typing import Optional
7
7
 
8
8
  import bioversions
9
9
 
10
+ from ..constants import VERSION_PINS
10
11
  from ..utils.path import prefix_directory_join
11
12
 
12
13
  __all__ = [
@@ -25,6 +26,10 @@ def get_version(prefix: str) -> Optional[str]:
25
26
  :param prefix: the resource name
26
27
  :return: The version if available else None
27
28
  """
29
+ # Prioritize loaded environmental variable VERSION_PINS dictionary
30
+ version = VERSION_PINS.get(prefix)
31
+ if version:
32
+ return version
28
33
  try:
29
34
  version = bioversions.get_version(prefix)
30
35
  except KeyError:
pyobo/api/xrefs.py CHANGED
@@ -30,9 +30,16 @@ logger = logging.getLogger(__name__)
30
30
 
31
31
 
32
32
  @wrap_norm_prefix
33
- def get_xref(prefix: str, identifier: str, new_prefix: str, flip: bool = False) -> Optional[str]:
33
+ def get_xref(
34
+ prefix: str,
35
+ identifier: str,
36
+ new_prefix: str,
37
+ *,
38
+ flip: bool = False,
39
+ version: Optional[str] = None,
40
+ ) -> Optional[str]:
34
41
  """Get the xref with the new prefix if a direct path exists."""
35
- filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip)
42
+ filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip, version=version)
36
43
  return filtered_xrefs.get(identifier)
37
44
 
38
45
 
@@ -41,8 +48,8 @@ def get_xref(prefix: str, identifier: str, new_prefix: str, flip: bool = False)
41
48
  def get_filtered_xrefs(
42
49
  prefix: str,
43
50
  xref_prefix: str,
44
- flip: bool = False,
45
51
  *,
52
+ flip: bool = False,
46
53
  use_tqdm: bool = False,
47
54
  force: bool = False,
48
55
  strict: bool = False,
pyobo/aws.py CHANGED
@@ -77,14 +77,19 @@ def upload_artifacts(
77
77
  upload_artifacts_for_prefix(prefix=prefix, bucket=bucket, s3_client=s3_client)
78
78
 
79
79
 
80
- def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
80
+ def upload_artifacts_for_prefix(
81
+ *, prefix: str, bucket: str, s3_client=None, version: Optional[str] = None
82
+ ):
81
83
  """Upload compiled parts for the given prefix to AWS."""
82
84
  if s3_client is None:
83
85
  s3_client = boto3.client("s3")
84
86
 
87
+ if version is None:
88
+ version = get_version(prefix)
89
+
85
90
  logger.info("[%s] getting id->name mapping", prefix)
86
91
  get_id_name_mapping(prefix)
87
- id_name_path = prefix_cache_join(prefix, name="names.tsv", version=get_version(prefix))
92
+ id_name_path = prefix_cache_join(prefix, name="names.tsv", version=version)
88
93
  if not id_name_path.exists():
89
94
  raise FileNotFoundError
90
95
  id_name_key = os.path.join(prefix, "cache", "names.tsv")
@@ -93,7 +98,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
93
98
 
94
99
  logger.info("[%s] getting id->synonyms mapping", prefix)
95
100
  get_id_synonyms_mapping(prefix)
96
- id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=get_version(prefix))
101
+ id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=version)
97
102
  if not id_synonyms_path.exists():
98
103
  raise FileNotFoundError
99
104
  id_synonyms_key = os.path.join(prefix, "cache", "synonyms.tsv")
@@ -102,7 +107,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
102
107
 
103
108
  logger.info("[%s] getting xrefs", prefix)
104
109
  get_xrefs_df(prefix)
105
- xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=get_version(prefix))
110
+ xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=version)
106
111
  if not xrefs_path.exists():
107
112
  raise FileNotFoundError
108
113
  xrefs_key = os.path.join(prefix, "cache", "xrefs.tsv")
@@ -111,7 +116,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
111
116
 
112
117
  logger.info("[%s] getting relations", prefix)
113
118
  get_relations_df(prefix)
114
- relations_path = prefix_cache_join(prefix, name="relations.tsv", version=get_version(prefix))
119
+ relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
115
120
  if not relations_path.exists():
116
121
  raise FileNotFoundError
117
122
  relations_key = os.path.join(prefix, "cache", "relations.tsv")
@@ -120,7 +125,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
120
125
 
121
126
  logger.info("[%s] getting properties", prefix)
122
127
  get_properties_df(prefix)
123
- properties_path = prefix_cache_join(prefix, name="properties.tsv", version=get_version(prefix))
128
+ properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
124
129
  if not properties_path.exists():
125
130
  raise FileNotFoundError
126
131
  properties_key = os.path.join(prefix, "cache", "properties.tsv")
@@ -129,7 +134,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
129
134
 
130
135
  logger.info("[%s] getting alternative identifiers", prefix)
131
136
  get_id_to_alts(prefix)
132
- alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=get_version(prefix))
137
+ alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version)
133
138
  if not alts_path.exists():
134
139
  raise FileNotFoundError
135
140
  alts_key = os.path.join(prefix, "cache", "alt_ids.tsv")
pyobo/cli/lookup.py CHANGED
@@ -76,9 +76,10 @@ def xrefs(prefix: str, target: str, force: bool, no_strict: bool, version: Optio
76
76
  @prefix_argument
77
77
  @verbose_option
78
78
  @force_option
79
- def metadata(prefix: str, force: bool):
79
+ @version_option
80
+ def metadata(prefix: str, force: bool, version: Optional[str]):
80
81
  """Print the metadata for the given namespace."""
81
- metadata = get_metadata(prefix, force=force)
82
+ metadata = get_metadata(prefix, force=force, version=version)
82
83
  click.echo(json.dumps(metadata, indent=2))
83
84
 
84
85
 
@@ -281,7 +282,7 @@ def ancestors(prefix: str, identifier: str, force: bool, version: Optional[str])
281
282
  """Look up ancestors."""
282
283
  curies = get_ancestors(prefix=prefix, identifier=identifier, force=force, version=version)
283
284
  for curie in sorted(curies or []):
284
- click.echo(f"{curie}\t{get_name_by_curie(curie)}")
285
+ click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")
285
286
 
286
287
 
287
288
  @lookup.command()
@@ -294,7 +295,7 @@ def descendants(prefix: str, identifier: str, force: bool, version: Optional[str
294
295
  """Look up descendants."""
295
296
  curies = get_descendants(prefix=prefix, identifier=identifier, force=force, version=version)
296
297
  for curie in sorted(curies or []):
297
- click.echo(f"{curie}\t{get_name_by_curie(curie)}")
298
+ click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")
298
299
 
299
300
 
300
301
  @lookup.command()
pyobo/constants.py CHANGED
@@ -2,19 +2,18 @@
2
2
 
3
3
  """Constants for PyOBO."""
4
4
 
5
+ import json
5
6
  import logging
7
+ import os
6
8
  import re
7
- from functools import partial
8
- from typing import Callable
9
9
 
10
- import bioversions
11
10
  import pystow
12
11
 
13
12
  __all__ = [
14
13
  "RAW_DIRECTORY",
15
14
  "DATABASE_DIRECTORY",
16
15
  "SPECIES_REMAPPING",
17
- "version_getter",
16
+ "VERSION_PINS",
18
17
  ]
19
18
 
20
19
  logger = logging.getLogger(__name__)
@@ -84,12 +83,6 @@ TYPEDEFS_FILE = "typedefs.tsv.gz"
84
83
  SPECIES_RECORD = "5334738"
85
84
  SPECIES_FILE = "species.tsv.gz"
86
85
 
87
-
88
- def version_getter(name: str) -> Callable[[], str]:
89
- """Make a function appropriate for getting versions."""
90
- return partial(bioversions.get_version, name)
91
-
92
-
93
86
  NCBITAXON_PREFIX = "NCBITaxon"
94
87
  DATE_FORMAT = "%d:%m:%Y %H:%M"
95
88
  PROVENANCE_PREFIXES = {
@@ -108,3 +101,31 @@ PROVENANCE_PREFIXES = {
108
101
  "isbn",
109
102
  "issn",
110
103
  }
104
+
105
+ # Load version pin dictionary from the environmental variable VERSION_PINS
106
+ try:
107
+ VERSION_PINS_STR = os.getenv("VERSION_PINS")
108
+ if not VERSION_PINS_STR:
109
+ VERSION_PINS = {}
110
+ else:
111
+ VERSION_PINS = json.loads(VERSION_PINS_STR)
112
+ for k, v in VERSION_PINS.items():
113
+ if not isinstance(k, str) or not isinstance(v, str):
114
+ logger.error("The prefix and version name must both be " "strings")
115
+ VERSION_PINS = {}
116
+ break
117
+ except ValueError as e:
118
+ logger.error(
119
+ "The value for the environment variable VERSION_PINS must be a valid JSON string: %s" % e
120
+ )
121
+ VERSION_PINS = {}
122
+
123
+ if VERSION_PINS:
124
+ logger.debug(
125
+ f"These are the resource versions that are pinned.\n{VERSION_PINS}. "
126
+ f"\nPyobo will download the latest version of a resource if it's "
127
+ f"not pinned.\nIf you want to use a specific version of a "
128
+ f"resource, edit your VERSION_PINS environmental "
129
+ f"variable which is a JSON string to include a prefix and version "
130
+ f"name."
131
+ )
pyobo/gilda_utils.py CHANGED
@@ -15,6 +15,7 @@ from gilda.term import filter_out_duplicates
15
15
  from tqdm.auto import tqdm
16
16
 
17
17
  from pyobo import (
18
+ get_descendants,
18
19
  get_id_name_mapping,
19
20
  get_id_species_mapping,
20
21
  get_id_synonyms_mapping,
@@ -247,3 +248,23 @@ def get_gilda_terms(
247
248
  )
248
249
  if term is not None:
249
250
  yield term
251
+
252
+
253
+ def get_gilda_term_subset(
254
+ source: str, ancestors: Union[str, List[str]], **kwargs
255
+ ) -> Iterable[gilda.term.Term]:
256
+ """Get a subset of terms."""
257
+ subset = {
258
+ descendant
259
+ for parent_curie in _ensure_list(ancestors)
260
+ for descendant in get_descendants(*parent_curie.split(":")) or []
261
+ }
262
+ for term in get_gilda_terms(source, **kwargs):
263
+ if bioregistry.curie_to_str(term.db, term.id) in subset:
264
+ yield term
265
+
266
+
267
+ def _ensure_list(s: Union[str, List[str]]) -> List[str]:
268
+ if isinstance(s, str):
269
+ return [s]
270
+ return s
pyobo/identifier_utils.py CHANGED
@@ -2,11 +2,14 @@
2
2
 
3
3
  """Utilities for handling prefixes."""
4
4
 
5
+ from __future__ import annotations
6
+
5
7
  import logging
6
8
  from functools import wraps
7
9
  from typing import Optional, Tuple, Union
8
10
 
9
11
  import bioregistry
12
+ from curies import Reference, ReferenceTuple
10
13
 
11
14
  from .registries import (
12
15
  curie_has_blacklisted_prefix,
@@ -108,11 +111,25 @@ def wrap_norm_prefix(f):
108
111
  """Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
109
112
 
110
113
  @wraps(f)
111
- def _wrapped(prefix, *args, **kwargs):
112
- norm_prefix = bioregistry.normalize_prefix(prefix)
113
- if norm_prefix is None:
114
- raise ValueError(f"Invalid prefix: {prefix}")
115
- return f(norm_prefix, *args, **kwargs)
114
+ def _wrapped(prefix: str | Reference | ReferenceTuple, *args, **kwargs):
115
+ if isinstance(prefix, str):
116
+ norm_prefix = bioregistry.normalize_prefix(prefix)
117
+ if norm_prefix is None:
118
+ raise ValueError(f"Invalid prefix: {prefix}")
119
+ prefix = norm_prefix
120
+ elif isinstance(prefix, Reference):
121
+ norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
122
+ if norm_prefix is None:
123
+ raise ValueError(f"Invalid prefix: {prefix.prefix}")
124
+ prefix = Reference(prefix=norm_prefix, identifier=prefix.identifier)
125
+ elif isinstance(prefix, ReferenceTuple):
126
+ norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
127
+ if norm_prefix is None:
128
+ raise ValueError(f"Invalid prefix: {prefix.prefix}")
129
+ prefix = ReferenceTuple(norm_prefix, prefix.identifier)
130
+ else:
131
+ raise TypeError
132
+ return f(prefix, *args, **kwargs)
116
133
 
117
134
  return _wrapped
118
135
 
pyobo/reader.py CHANGED
@@ -417,7 +417,7 @@ def _clean_definition(s: str) -> str:
417
417
  # if '\t' in s:
418
418
  # logger.warning('has tab')
419
419
  return (
420
- s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace("\d", "") # noqa:W605
420
+ s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace(r"\d", "") # noqa:W605
421
421
  )
422
422
 
423
423
 
pyobo/sources/__init__.py CHANGED
@@ -12,6 +12,7 @@ from .civic_gene import CIVICGeneGetter
12
12
  from .complexportal import ComplexPortalGetter
13
13
  from .conso import CONSOGetter
14
14
  from .cpt import CPTGetter
15
+ from .credit import CreditGetter
15
16
  from .cvx import CVXGetter
16
17
  from .depmap import DepMapGetter
17
18
  from .dictybase_gene import DictybaseGetter
@@ -69,6 +70,7 @@ __all__ = [
69
70
  "CVXGetter",
70
71
  "ChEMBLCompoundGetter",
71
72
  "ComplexPortalGetter",
73
+ "CreditGetter",
72
74
  "DepMapGetter",
73
75
  "DictybaseGetter",
74
76
  "DrugBankGetter",