pyobo 0.10.10__py3-none-any.whl → 0.10.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/api/alts.py +13 -8
- pyobo/api/hierarchy.py +9 -5
- pyobo/api/metadata.py +6 -3
- pyobo/api/names.py +34 -11
- pyobo/api/relations.py +11 -3
- pyobo/api/species.py +3 -3
- pyobo/api/typedefs.py +6 -2
- pyobo/api/utils.py +5 -0
- pyobo/api/xrefs.py +10 -3
- pyobo/aws.py +12 -7
- pyobo/cli/lookup.py +5 -4
- pyobo/constants.py +31 -10
- pyobo/gilda_utils.py +21 -0
- pyobo/identifier_utils.py +22 -5
- pyobo/reader.py +1 -1
- pyobo/sources/__init__.py +2 -0
- pyobo/sources/antibodyregistry.py +7 -6
- pyobo/sources/biogrid.py +8 -4
- pyobo/sources/ccle.py +5 -5
- pyobo/sources/credit.py +68 -0
- pyobo/sources/geonames.py +27 -9
- pyobo/sources/hgnc.py +2 -2
- pyobo/sources/mesh.py +9 -7
- pyobo/sources/msigdb.py +1 -1
- pyobo/sources/npass.py +1 -1
- pyobo/sources/pubchem.py +3 -3
- pyobo/sources/rgd.py +1 -1
- pyobo/sources/rhea.py +2 -2
- pyobo/sources/ror.py +67 -21
- pyobo/sources/uniprot/uniprot.py +2 -2
- pyobo/struct/struct.py +4 -3
- pyobo/struct/typedef.py +10 -0
- pyobo/utils/path.py +2 -1
- pyobo/version.py +1 -1
- pyobo/xrefdb/sources/__init__.py +6 -3
- pyobo/xrefdb/sources/chembl.py +5 -5
- pyobo/xrefdb/sources/pubchem.py +3 -2
- pyobo/xrefdb/sources/wikidata.py +8 -1
- {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/METADATA +23 -23
- {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/RECORD +44 -44
- {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/WHEEL +1 -1
- pyobo/xrefdb/bengo.py +0 -44
- {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/LICENSE +0 -0
- {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/entry_points.txt +0 -0
- {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/top_level.txt +0 -0
pyobo/api/alts.py
CHANGED
|
@@ -28,12 +28,15 @@ NO_ALTS = {
|
|
|
28
28
|
|
|
29
29
|
@lru_cache()
|
|
30
30
|
@wrap_norm_prefix
|
|
31
|
-
def get_id_to_alts(
|
|
31
|
+
def get_id_to_alts(
|
|
32
|
+
prefix: str, *, force: bool = False, version: Optional[str] = None
|
|
33
|
+
) -> Mapping[str, List[str]]:
|
|
32
34
|
"""Get alternate identifiers."""
|
|
33
35
|
if prefix in NO_ALTS:
|
|
34
36
|
return {}
|
|
35
37
|
|
|
36
|
-
version
|
|
38
|
+
if version is None:
|
|
39
|
+
version = get_version(prefix)
|
|
37
40
|
path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version)
|
|
38
41
|
header = [f"{prefix}_id", "alt_id"]
|
|
39
42
|
|
|
@@ -51,26 +54,28 @@ def get_id_to_alts(prefix: str, force: bool = False) -> Mapping[str, List[str]]:
|
|
|
51
54
|
|
|
52
55
|
@lru_cache()
|
|
53
56
|
@wrap_norm_prefix
|
|
54
|
-
def get_alts_to_id(
|
|
57
|
+
def get_alts_to_id(
|
|
58
|
+
prefix: str, *, force: bool = False, version: Optional[str] = None
|
|
59
|
+
) -> Mapping[str, str]:
|
|
55
60
|
"""Get alternative id to primary id mapping."""
|
|
56
61
|
return {
|
|
57
62
|
alt: primary
|
|
58
|
-
for primary, alts in get_id_to_alts(prefix, force=force).items()
|
|
63
|
+
for primary, alts in get_id_to_alts(prefix, force=force, version=version).items()
|
|
59
64
|
for alt in alts
|
|
60
65
|
}
|
|
61
66
|
|
|
62
67
|
|
|
63
|
-
def get_primary_curie(curie: str) -> Optional[str]:
|
|
68
|
+
def get_primary_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]:
|
|
64
69
|
"""Get the primary curie for an entity."""
|
|
65
70
|
prefix, identifier = normalize_curie(curie)
|
|
66
|
-
primary_identifier = get_primary_identifier(prefix, identifier)
|
|
71
|
+
primary_identifier = get_primary_identifier(prefix, identifier, version=version)
|
|
67
72
|
if primary_identifier is not None:
|
|
68
73
|
return f"{prefix}:{primary_identifier}"
|
|
69
74
|
return None
|
|
70
75
|
|
|
71
76
|
|
|
72
77
|
@wrap_norm_prefix
|
|
73
|
-
def get_primary_identifier(prefix: str, identifier: str) -> str:
|
|
78
|
+
def get_primary_identifier(prefix: str, identifier: str, *, version: Optional[str] = None) -> str:
|
|
74
79
|
"""Get the primary identifier for an entity.
|
|
75
80
|
|
|
76
81
|
:param prefix: The name of the resource
|
|
@@ -82,7 +87,7 @@ def get_primary_identifier(prefix: str, identifier: str) -> str:
|
|
|
82
87
|
if prefix in NO_ALTS: # TODO later expand list to other namespaces with no alts
|
|
83
88
|
return identifier
|
|
84
89
|
|
|
85
|
-
alts_to_id = get_alts_to_id(prefix)
|
|
90
|
+
alts_to_id = get_alts_to_id(prefix, version=version)
|
|
86
91
|
if alts_to_id and identifier in alts_to_id:
|
|
87
92
|
return alts_to_id[identifier]
|
|
88
93
|
return identifier
|
pyobo/api/hierarchy.py
CHANGED
|
@@ -13,6 +13,7 @@ from .properties import get_filtered_properties_mapping
|
|
|
13
13
|
from .relations import get_filtered_relations_df
|
|
14
14
|
from ..identifier_utils import wrap_norm_prefix
|
|
15
15
|
from ..struct import TypeDef, has_member, is_a, part_of
|
|
16
|
+
from ..struct.reference import Reference
|
|
16
17
|
|
|
17
18
|
__all__ = [
|
|
18
19
|
"get_hierarchy",
|
|
@@ -24,7 +25,6 @@ __all__ = [
|
|
|
24
25
|
"get_children",
|
|
25
26
|
]
|
|
26
27
|
|
|
27
|
-
from ..struct.reference import Reference
|
|
28
28
|
|
|
29
29
|
logger = logging.getLogger(__name__)
|
|
30
30
|
|
|
@@ -154,14 +154,16 @@ def _get_hierarchy_helper(
|
|
|
154
154
|
return rv
|
|
155
155
|
|
|
156
156
|
|
|
157
|
-
def is_descendent(
|
|
157
|
+
def is_descendent(
|
|
158
|
+
prefix, identifier, ancestor_prefix, ancestor_identifier, *, version: Optional[str] = None
|
|
159
|
+
) -> bool:
|
|
158
160
|
"""Check that the first identifier has the second as a descendent.
|
|
159
161
|
|
|
160
162
|
Check that go:0070246 ! natural killer cell apoptotic process is a
|
|
161
163
|
descendant of go:0006915 ! apoptotic process::
|
|
162
164
|
>>> assert is_descendent('go', '0070246', 'go', '0006915')
|
|
163
165
|
"""
|
|
164
|
-
descendants = get_descendants(ancestor_prefix, ancestor_identifier)
|
|
166
|
+
descendants = get_descendants(ancestor_prefix, ancestor_identifier, version=version)
|
|
165
167
|
return descendants is not None and f"{prefix}:{identifier}" in descendants
|
|
166
168
|
|
|
167
169
|
|
|
@@ -224,13 +226,15 @@ def get_children(
|
|
|
224
226
|
return set(hierarchy.predecessors(curie))
|
|
225
227
|
|
|
226
228
|
|
|
227
|
-
def has_ancestor(
|
|
229
|
+
def has_ancestor(
|
|
230
|
+
prefix, identifier, ancestor_prefix, ancestor_identifier, *, version: Optional[str] = None
|
|
231
|
+
) -> bool:
|
|
228
232
|
"""Check that the first identifier has the second as an ancestor.
|
|
229
233
|
|
|
230
234
|
Check that go:0008219 ! cell death is an ancestor of go:0006915 ! apoptotic process::
|
|
231
235
|
>>> assert has_ancestor('go', '0006915', 'go', '0008219')
|
|
232
236
|
"""
|
|
233
|
-
ancestors = get_ancestors(prefix, identifier)
|
|
237
|
+
ancestors = get_ancestors(prefix, identifier, version=version)
|
|
234
238
|
return ancestors is not None and f"{ancestor_prefix}:{ancestor_identifier}" in ancestors
|
|
235
239
|
|
|
236
240
|
|
pyobo/api/metadata.py
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
from functools import lru_cache
|
|
7
|
-
from typing import Mapping
|
|
7
|
+
from typing import Mapping, Optional
|
|
8
8
|
|
|
9
9
|
from .utils import get_version
|
|
10
10
|
from ..getters import get_ontology
|
|
@@ -21,9 +21,12 @@ logger = logging.getLogger(__name__)
|
|
|
21
21
|
|
|
22
22
|
@lru_cache()
|
|
23
23
|
@wrap_norm_prefix
|
|
24
|
-
def get_metadata(
|
|
24
|
+
def get_metadata(
|
|
25
|
+
prefix: str, *, force: bool = False, version: Optional[str] = None
|
|
26
|
+
) -> Mapping[str, str]:
|
|
25
27
|
"""Get metadata for the ontology."""
|
|
26
|
-
version
|
|
28
|
+
if version is None:
|
|
29
|
+
version = get_version(prefix)
|
|
27
30
|
path = prefix_cache_join(prefix, name="metadata.json", version=version)
|
|
28
31
|
|
|
29
32
|
@cached_json(path=path, force=force)
|
pyobo/api/names.py
CHANGED
|
@@ -2,11 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
"""High-level API for nomenclature."""
|
|
4
4
|
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
5
7
|
import logging
|
|
6
8
|
import subprocess
|
|
7
9
|
from functools import lru_cache
|
|
8
10
|
from typing import Callable, List, Mapping, Optional, Set, TypeVar
|
|
9
11
|
|
|
12
|
+
from curies import Reference, ReferenceTuple
|
|
13
|
+
|
|
10
14
|
from .alts import get_primary_identifier
|
|
11
15
|
from .utils import get_version
|
|
12
16
|
from ..getters import NoBuild, get_ontology
|
|
@@ -32,6 +36,8 @@ logger = logging.getLogger(__name__)
|
|
|
32
36
|
|
|
33
37
|
def get_name_by_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]:
|
|
34
38
|
"""Get the name for a CURIE, if possible."""
|
|
39
|
+
if version is None:
|
|
40
|
+
version = get_version(curie.split(":")[0])
|
|
35
41
|
prefix, identifier = normalize_curie(curie)
|
|
36
42
|
if prefix and identifier:
|
|
37
43
|
return get_name(prefix, identifier, version=version)
|
|
@@ -40,7 +46,8 @@ def get_name_by_curie(curie: str, *, version: Optional[str] = None) -> Optional[
|
|
|
40
46
|
|
|
41
47
|
X = TypeVar("X")
|
|
42
48
|
|
|
43
|
-
NO_BUILD_PREFIXES = set()
|
|
49
|
+
NO_BUILD_PREFIXES: Set[str] = set()
|
|
50
|
+
NO_BUILD_LOGGED: Set = set()
|
|
44
51
|
|
|
45
52
|
|
|
46
53
|
def _help_get(
|
|
@@ -59,8 +66,10 @@ def _help_get(
|
|
|
59
66
|
logger.warning("[%s] unable to look up results with %s", prefix, f)
|
|
60
67
|
NO_BUILD_PREFIXES.add(prefix)
|
|
61
68
|
return None
|
|
62
|
-
except ValueError:
|
|
63
|
-
|
|
69
|
+
except ValueError as e:
|
|
70
|
+
if prefix not in NO_BUILD_PREFIXES:
|
|
71
|
+
logger.warning("[%s] value error while looking up results with %s: %s", prefix, f, e)
|
|
72
|
+
NO_BUILD_PREFIXES.add(prefix)
|
|
64
73
|
return None
|
|
65
74
|
|
|
66
75
|
if not mapping:
|
|
@@ -69,20 +78,28 @@ def _help_get(
|
|
|
69
78
|
NO_BUILD_PREFIXES.add(prefix)
|
|
70
79
|
return None
|
|
71
80
|
|
|
72
|
-
primary_id = get_primary_identifier(prefix, identifier)
|
|
81
|
+
primary_id = get_primary_identifier(prefix, identifier, version=version)
|
|
73
82
|
return mapping.get(primary_id)
|
|
74
83
|
|
|
75
84
|
|
|
76
85
|
@wrap_norm_prefix
|
|
77
|
-
def get_name(
|
|
86
|
+
def get_name(
|
|
87
|
+
prefix: str | Reference | ReferenceTuple,
|
|
88
|
+
identifier: Optional[str] = None,
|
|
89
|
+
/,
|
|
90
|
+
*,
|
|
91
|
+
version: Optional[str] = None,
|
|
92
|
+
) -> Optional[str]:
|
|
78
93
|
"""Get the name for an entity."""
|
|
79
|
-
|
|
94
|
+
if isinstance(prefix, (ReferenceTuple, Reference)):
|
|
95
|
+
prefix, identifier = prefix.prefix, prefix.identifier
|
|
96
|
+
return _help_get(get_id_name_mapping, prefix, identifier, version=version) # type:ignore
|
|
80
97
|
|
|
81
98
|
|
|
82
99
|
@lru_cache()
|
|
83
100
|
@wrap_norm_prefix
|
|
84
101
|
def get_ids(
|
|
85
|
-
prefix: str, force: bool = False, strict: bool = False, version: Optional[str] = None
|
|
102
|
+
prefix: str, *, force: bool = False, strict: bool = False, version: Optional[str] = None
|
|
86
103
|
) -> Set[str]:
|
|
87
104
|
"""Get the set of identifiers for this prefix."""
|
|
88
105
|
if prefix == "ncbigene":
|
|
@@ -150,16 +167,22 @@ def get_id_name_mapping(
|
|
|
150
167
|
|
|
151
168
|
@lru_cache()
|
|
152
169
|
@wrap_norm_prefix
|
|
153
|
-
def get_name_id_mapping(
|
|
170
|
+
def get_name_id_mapping(
|
|
171
|
+
prefix: str, *, force: bool = False, version: Optional[str] = None
|
|
172
|
+
) -> Mapping[str, str]:
|
|
154
173
|
"""Get a name to identifier mapping for the OBO file."""
|
|
155
|
-
id_name = get_id_name_mapping(prefix=prefix, force=force)
|
|
174
|
+
id_name = get_id_name_mapping(prefix=prefix, force=force, version=version)
|
|
156
175
|
return {v: k for k, v in id_name.items()}
|
|
157
176
|
|
|
158
177
|
|
|
159
178
|
@wrap_norm_prefix
|
|
160
|
-
def get_definition(
|
|
179
|
+
def get_definition(
|
|
180
|
+
prefix: str, identifier: str | None = None, *, version: Optional[str] = None
|
|
181
|
+
) -> Optional[str]:
|
|
161
182
|
"""Get the definition for an entity."""
|
|
162
|
-
|
|
183
|
+
if identifier is None:
|
|
184
|
+
prefix, _, identifier = prefix.rpartition(":")
|
|
185
|
+
return _help_get(get_id_definition_mapping, prefix, identifier, version=version)
|
|
163
186
|
|
|
164
187
|
|
|
165
188
|
def get_id_definition_mapping(
|
pyobo/api/relations.py
CHANGED
|
@@ -48,9 +48,11 @@ def get_relations_df(
|
|
|
48
48
|
force: bool = False,
|
|
49
49
|
wide: bool = False,
|
|
50
50
|
strict: bool = True,
|
|
51
|
+
version: Optional[str] = None,
|
|
51
52
|
) -> pd.DataFrame:
|
|
52
53
|
"""Get all relations from the OBO."""
|
|
53
|
-
version
|
|
54
|
+
if version is None:
|
|
55
|
+
version = get_version(prefix)
|
|
54
56
|
path = prefix_cache_join(prefix, name="relations.tsv", version=version)
|
|
55
57
|
|
|
56
58
|
@cached_df(path=path, dtype=str, force=force)
|
|
@@ -118,9 +120,11 @@ def get_id_multirelations_mapping(
|
|
|
118
120
|
*,
|
|
119
121
|
use_tqdm: bool = False,
|
|
120
122
|
force: bool = False,
|
|
123
|
+
version: Optional[str] = None,
|
|
121
124
|
) -> Mapping[str, List[Reference]]:
|
|
122
125
|
"""Get the OBO file and output a synonym dictionary."""
|
|
123
|
-
version
|
|
126
|
+
if version is None:
|
|
127
|
+
version = get_version(prefix)
|
|
124
128
|
ontology = get_ontology(prefix, force=force, version=version)
|
|
125
129
|
return ontology.get_id_multirelations_mapping(typedef=typedef, use_tqdm=use_tqdm)
|
|
126
130
|
|
|
@@ -134,6 +138,7 @@ def get_relation_mapping(
|
|
|
134
138
|
*,
|
|
135
139
|
use_tqdm: bool = False,
|
|
136
140
|
force: bool = False,
|
|
141
|
+
version: Optional[str] = None,
|
|
137
142
|
) -> Mapping[str, str]:
|
|
138
143
|
"""Get relations from identifiers in the source prefix to target prefix with the given relation.
|
|
139
144
|
|
|
@@ -147,7 +152,8 @@ def get_relation_mapping(
|
|
|
147
152
|
>>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping('hgnc', 'ro:HOM0000017', 'mgi')
|
|
148
153
|
>>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id]
|
|
149
154
|
"""
|
|
150
|
-
version
|
|
155
|
+
if version is None:
|
|
156
|
+
version = get_version(prefix)
|
|
151
157
|
ontology = get_ontology(prefix, force=force, version=version)
|
|
152
158
|
return ontology.get_relation_mapping(
|
|
153
159
|
relation=relation, target_prefix=target_prefix, use_tqdm=use_tqdm
|
|
@@ -163,6 +169,7 @@ def get_relation(
|
|
|
163
169
|
*,
|
|
164
170
|
use_tqdm: bool = False,
|
|
165
171
|
force: bool = False,
|
|
172
|
+
**kwargs,
|
|
166
173
|
) -> Optional[str]:
|
|
167
174
|
"""Get the target identifier corresponding to the given relationship from the source prefix/identifier pair.
|
|
168
175
|
|
|
@@ -181,6 +188,7 @@ def get_relation(
|
|
|
181
188
|
target_prefix=target_prefix,
|
|
182
189
|
use_tqdm=use_tqdm,
|
|
183
190
|
force=force,
|
|
191
|
+
**kwargs,
|
|
184
192
|
)
|
|
185
193
|
return relation_mapping.get(source_identifier)
|
|
186
194
|
|
pyobo/api/species.py
CHANGED
|
@@ -22,13 +22,13 @@ logger = logging.getLogger(__name__)
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
@wrap_norm_prefix
|
|
25
|
-
def get_species(prefix: str, identifier: str) -> Optional[str]:
|
|
25
|
+
def get_species(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
|
|
26
26
|
"""Get the species."""
|
|
27
27
|
if prefix == "uniprot":
|
|
28
28
|
raise NotImplementedError
|
|
29
29
|
|
|
30
30
|
try:
|
|
31
|
-
id_species = get_id_species_mapping(prefix)
|
|
31
|
+
id_species = get_id_species_mapping(prefix, version=version)
|
|
32
32
|
except NoBuild:
|
|
33
33
|
logger.warning("unable to look up species for prefix %s", prefix)
|
|
34
34
|
return None
|
|
@@ -37,7 +37,7 @@ def get_species(prefix: str, identifier: str) -> Optional[str]:
|
|
|
37
37
|
logger.warning("no results produced for prefix %s", prefix)
|
|
38
38
|
return None
|
|
39
39
|
|
|
40
|
-
primary_id = get_primary_identifier(prefix, identifier)
|
|
40
|
+
primary_id = get_primary_identifier(prefix, identifier, version=version)
|
|
41
41
|
return id_species.get(primary_id)
|
|
42
42
|
|
|
43
43
|
|
pyobo/api/typedefs.py
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
from functools import lru_cache
|
|
7
|
+
from typing import Optional
|
|
7
8
|
|
|
8
9
|
import pandas as pd
|
|
9
10
|
|
|
@@ -22,9 +23,12 @@ logger = logging.getLogger(__name__)
|
|
|
22
23
|
|
|
23
24
|
@lru_cache()
|
|
24
25
|
@wrap_norm_prefix
|
|
25
|
-
def get_typedef_df(
|
|
26
|
+
def get_typedef_df(
|
|
27
|
+
prefix: str, *, force: bool = False, version: Optional[str] = None
|
|
28
|
+
) -> pd.DataFrame:
|
|
26
29
|
"""Get an identifier to name mapping for the typedefs in an OBO file."""
|
|
27
|
-
version
|
|
30
|
+
if version is None:
|
|
31
|
+
version = get_version(prefix)
|
|
28
32
|
path = prefix_cache_join(prefix, name="typedefs.tsv", version=version)
|
|
29
33
|
|
|
30
34
|
@cached_df(path=path, dtype=str, force=force)
|
pyobo/api/utils.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import Optional
|
|
|
7
7
|
|
|
8
8
|
import bioversions
|
|
9
9
|
|
|
10
|
+
from ..constants import VERSION_PINS
|
|
10
11
|
from ..utils.path import prefix_directory_join
|
|
11
12
|
|
|
12
13
|
__all__ = [
|
|
@@ -25,6 +26,10 @@ def get_version(prefix: str) -> Optional[str]:
|
|
|
25
26
|
:param prefix: the resource name
|
|
26
27
|
:return: The version if available else None
|
|
27
28
|
"""
|
|
29
|
+
# Prioritize loaded environmental variable VERSION_PINS dictionary
|
|
30
|
+
version = VERSION_PINS.get(prefix)
|
|
31
|
+
if version:
|
|
32
|
+
return version
|
|
28
33
|
try:
|
|
29
34
|
version = bioversions.get_version(prefix)
|
|
30
35
|
except KeyError:
|
pyobo/api/xrefs.py
CHANGED
|
@@ -30,9 +30,16 @@ logger = logging.getLogger(__name__)
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
@wrap_norm_prefix
|
|
33
|
-
def get_xref(
|
|
33
|
+
def get_xref(
|
|
34
|
+
prefix: str,
|
|
35
|
+
identifier: str,
|
|
36
|
+
new_prefix: str,
|
|
37
|
+
*,
|
|
38
|
+
flip: bool = False,
|
|
39
|
+
version: Optional[str] = None,
|
|
40
|
+
) -> Optional[str]:
|
|
34
41
|
"""Get the xref with the new prefix if a direct path exists."""
|
|
35
|
-
filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip)
|
|
42
|
+
filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip, version=version)
|
|
36
43
|
return filtered_xrefs.get(identifier)
|
|
37
44
|
|
|
38
45
|
|
|
@@ -41,8 +48,8 @@ def get_xref(prefix: str, identifier: str, new_prefix: str, flip: bool = False)
|
|
|
41
48
|
def get_filtered_xrefs(
|
|
42
49
|
prefix: str,
|
|
43
50
|
xref_prefix: str,
|
|
44
|
-
flip: bool = False,
|
|
45
51
|
*,
|
|
52
|
+
flip: bool = False,
|
|
46
53
|
use_tqdm: bool = False,
|
|
47
54
|
force: bool = False,
|
|
48
55
|
strict: bool = False,
|
pyobo/aws.py
CHANGED
|
@@ -77,14 +77,19 @@ def upload_artifacts(
|
|
|
77
77
|
upload_artifacts_for_prefix(prefix=prefix, bucket=bucket, s3_client=s3_client)
|
|
78
78
|
|
|
79
79
|
|
|
80
|
-
def upload_artifacts_for_prefix(
|
|
80
|
+
def upload_artifacts_for_prefix(
|
|
81
|
+
*, prefix: str, bucket: str, s3_client=None, version: Optional[str] = None
|
|
82
|
+
):
|
|
81
83
|
"""Upload compiled parts for the given prefix to AWS."""
|
|
82
84
|
if s3_client is None:
|
|
83
85
|
s3_client = boto3.client("s3")
|
|
84
86
|
|
|
87
|
+
if version is None:
|
|
88
|
+
version = get_version(prefix)
|
|
89
|
+
|
|
85
90
|
logger.info("[%s] getting id->name mapping", prefix)
|
|
86
91
|
get_id_name_mapping(prefix)
|
|
87
|
-
id_name_path = prefix_cache_join(prefix, name="names.tsv", version=
|
|
92
|
+
id_name_path = prefix_cache_join(prefix, name="names.tsv", version=version)
|
|
88
93
|
if not id_name_path.exists():
|
|
89
94
|
raise FileNotFoundError
|
|
90
95
|
id_name_key = os.path.join(prefix, "cache", "names.tsv")
|
|
@@ -93,7 +98,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
|
|
|
93
98
|
|
|
94
99
|
logger.info("[%s] getting id->synonyms mapping", prefix)
|
|
95
100
|
get_id_synonyms_mapping(prefix)
|
|
96
|
-
id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=
|
|
101
|
+
id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=version)
|
|
97
102
|
if not id_synonyms_path.exists():
|
|
98
103
|
raise FileNotFoundError
|
|
99
104
|
id_synonyms_key = os.path.join(prefix, "cache", "synonyms.tsv")
|
|
@@ -102,7 +107,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
|
|
|
102
107
|
|
|
103
108
|
logger.info("[%s] getting xrefs", prefix)
|
|
104
109
|
get_xrefs_df(prefix)
|
|
105
|
-
xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=
|
|
110
|
+
xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=version)
|
|
106
111
|
if not xrefs_path.exists():
|
|
107
112
|
raise FileNotFoundError
|
|
108
113
|
xrefs_key = os.path.join(prefix, "cache", "xrefs.tsv")
|
|
@@ -111,7 +116,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
|
|
|
111
116
|
|
|
112
117
|
logger.info("[%s] getting relations", prefix)
|
|
113
118
|
get_relations_df(prefix)
|
|
114
|
-
relations_path = prefix_cache_join(prefix, name="relations.tsv", version=
|
|
119
|
+
relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
|
|
115
120
|
if not relations_path.exists():
|
|
116
121
|
raise FileNotFoundError
|
|
117
122
|
relations_key = os.path.join(prefix, "cache", "relations.tsv")
|
|
@@ -120,7 +125,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
|
|
|
120
125
|
|
|
121
126
|
logger.info("[%s] getting properties", prefix)
|
|
122
127
|
get_properties_df(prefix)
|
|
123
|
-
properties_path = prefix_cache_join(prefix, name="properties.tsv", version=
|
|
128
|
+
properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
|
|
124
129
|
if not properties_path.exists():
|
|
125
130
|
raise FileNotFoundError
|
|
126
131
|
properties_key = os.path.join(prefix, "cache", "properties.tsv")
|
|
@@ -129,7 +134,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
|
|
|
129
134
|
|
|
130
135
|
logger.info("[%s] getting alternative identifiers", prefix)
|
|
131
136
|
get_id_to_alts(prefix)
|
|
132
|
-
alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=
|
|
137
|
+
alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version)
|
|
133
138
|
if not alts_path.exists():
|
|
134
139
|
raise FileNotFoundError
|
|
135
140
|
alts_key = os.path.join(prefix, "cache", "alt_ids.tsv")
|
pyobo/cli/lookup.py
CHANGED
|
@@ -76,9 +76,10 @@ def xrefs(prefix: str, target: str, force: bool, no_strict: bool, version: Optio
|
|
|
76
76
|
@prefix_argument
|
|
77
77
|
@verbose_option
|
|
78
78
|
@force_option
|
|
79
|
-
|
|
79
|
+
@version_option
|
|
80
|
+
def metadata(prefix: str, force: bool, version: Optional[str]):
|
|
80
81
|
"""Print the metadata for the given namespace."""
|
|
81
|
-
metadata = get_metadata(prefix, force=force)
|
|
82
|
+
metadata = get_metadata(prefix, force=force, version=version)
|
|
82
83
|
click.echo(json.dumps(metadata, indent=2))
|
|
83
84
|
|
|
84
85
|
|
|
@@ -281,7 +282,7 @@ def ancestors(prefix: str, identifier: str, force: bool, version: Optional[str])
|
|
|
281
282
|
"""Look up ancestors."""
|
|
282
283
|
curies = get_ancestors(prefix=prefix, identifier=identifier, force=force, version=version)
|
|
283
284
|
for curie in sorted(curies or []):
|
|
284
|
-
click.echo(f"{curie}\t{get_name_by_curie(curie)}")
|
|
285
|
+
click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")
|
|
285
286
|
|
|
286
287
|
|
|
287
288
|
@lookup.command()
|
|
@@ -294,7 +295,7 @@ def descendants(prefix: str, identifier: str, force: bool, version: Optional[str
|
|
|
294
295
|
"""Look up descendants."""
|
|
295
296
|
curies = get_descendants(prefix=prefix, identifier=identifier, force=force, version=version)
|
|
296
297
|
for curie in sorted(curies or []):
|
|
297
|
-
click.echo(f"{curie}\t{get_name_by_curie(curie)}")
|
|
298
|
+
click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")
|
|
298
299
|
|
|
299
300
|
|
|
300
301
|
@lookup.command()
|
pyobo/constants.py
CHANGED
|
@@ -2,19 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
"""Constants for PyOBO."""
|
|
4
4
|
|
|
5
|
+
import json
|
|
5
6
|
import logging
|
|
7
|
+
import os
|
|
6
8
|
import re
|
|
7
|
-
from functools import partial
|
|
8
|
-
from typing import Callable
|
|
9
9
|
|
|
10
|
-
import bioversions
|
|
11
10
|
import pystow
|
|
12
11
|
|
|
13
12
|
__all__ = [
|
|
14
13
|
"RAW_DIRECTORY",
|
|
15
14
|
"DATABASE_DIRECTORY",
|
|
16
15
|
"SPECIES_REMAPPING",
|
|
17
|
-
"
|
|
16
|
+
"VERSION_PINS",
|
|
18
17
|
]
|
|
19
18
|
|
|
20
19
|
logger = logging.getLogger(__name__)
|
|
@@ -84,12 +83,6 @@ TYPEDEFS_FILE = "typedefs.tsv.gz"
|
|
|
84
83
|
SPECIES_RECORD = "5334738"
|
|
85
84
|
SPECIES_FILE = "species.tsv.gz"
|
|
86
85
|
|
|
87
|
-
|
|
88
|
-
def version_getter(name: str) -> Callable[[], str]:
|
|
89
|
-
"""Make a function appropriate for getting versions."""
|
|
90
|
-
return partial(bioversions.get_version, name)
|
|
91
|
-
|
|
92
|
-
|
|
93
86
|
NCBITAXON_PREFIX = "NCBITaxon"
|
|
94
87
|
DATE_FORMAT = "%d:%m:%Y %H:%M"
|
|
95
88
|
PROVENANCE_PREFIXES = {
|
|
@@ -108,3 +101,31 @@ PROVENANCE_PREFIXES = {
|
|
|
108
101
|
"isbn",
|
|
109
102
|
"issn",
|
|
110
103
|
}
|
|
104
|
+
|
|
105
|
+
# Load version pin dictionary from the environmental variable VERSION_PINS
|
|
106
|
+
try:
|
|
107
|
+
VERSION_PINS_STR = os.getenv("VERSION_PINS")
|
|
108
|
+
if not VERSION_PINS_STR:
|
|
109
|
+
VERSION_PINS = {}
|
|
110
|
+
else:
|
|
111
|
+
VERSION_PINS = json.loads(VERSION_PINS_STR)
|
|
112
|
+
for k, v in VERSION_PINS.items():
|
|
113
|
+
if not isinstance(k, str) or not isinstance(v, str):
|
|
114
|
+
logger.error("The prefix and version name must both be " "strings")
|
|
115
|
+
VERSION_PINS = {}
|
|
116
|
+
break
|
|
117
|
+
except ValueError as e:
|
|
118
|
+
logger.error(
|
|
119
|
+
"The value for the environment variable VERSION_PINS must be a valid JSON string: %s" % e
|
|
120
|
+
)
|
|
121
|
+
VERSION_PINS = {}
|
|
122
|
+
|
|
123
|
+
if VERSION_PINS:
|
|
124
|
+
logger.debug(
|
|
125
|
+
f"These are the resource versions that are pinned.\n{VERSION_PINS}. "
|
|
126
|
+
f"\nPyobo will download the latest version of a resource if it's "
|
|
127
|
+
f"not pinned.\nIf you want to use a specific version of a "
|
|
128
|
+
f"resource, edit your VERSION_PINS environmental "
|
|
129
|
+
f"variable which is a JSON string to include a prefix and version "
|
|
130
|
+
f"name."
|
|
131
|
+
)
|
pyobo/gilda_utils.py
CHANGED
|
@@ -15,6 +15,7 @@ from gilda.term import filter_out_duplicates
|
|
|
15
15
|
from tqdm.auto import tqdm
|
|
16
16
|
|
|
17
17
|
from pyobo import (
|
|
18
|
+
get_descendants,
|
|
18
19
|
get_id_name_mapping,
|
|
19
20
|
get_id_species_mapping,
|
|
20
21
|
get_id_synonyms_mapping,
|
|
@@ -247,3 +248,23 @@ def get_gilda_terms(
|
|
|
247
248
|
)
|
|
248
249
|
if term is not None:
|
|
249
250
|
yield term
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def get_gilda_term_subset(
|
|
254
|
+
source: str, ancestors: Union[str, List[str]], **kwargs
|
|
255
|
+
) -> Iterable[gilda.term.Term]:
|
|
256
|
+
"""Get a subset of terms."""
|
|
257
|
+
subset = {
|
|
258
|
+
descendant
|
|
259
|
+
for parent_curie in _ensure_list(ancestors)
|
|
260
|
+
for descendant in get_descendants(*parent_curie.split(":")) or []
|
|
261
|
+
}
|
|
262
|
+
for term in get_gilda_terms(source, **kwargs):
|
|
263
|
+
if bioregistry.curie_to_str(term.db, term.id) in subset:
|
|
264
|
+
yield term
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _ensure_list(s: Union[str, List[str]]) -> List[str]:
|
|
268
|
+
if isinstance(s, str):
|
|
269
|
+
return [s]
|
|
270
|
+
return s
|
pyobo/identifier_utils.py
CHANGED
|
@@ -2,11 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
"""Utilities for handling prefixes."""
|
|
4
4
|
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
5
7
|
import logging
|
|
6
8
|
from functools import wraps
|
|
7
9
|
from typing import Optional, Tuple, Union
|
|
8
10
|
|
|
9
11
|
import bioregistry
|
|
12
|
+
from curies import Reference, ReferenceTuple
|
|
10
13
|
|
|
11
14
|
from .registries import (
|
|
12
15
|
curie_has_blacklisted_prefix,
|
|
@@ -108,11 +111,25 @@ def wrap_norm_prefix(f):
|
|
|
108
111
|
"""Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
|
|
109
112
|
|
|
110
113
|
@wraps(f)
|
|
111
|
-
def _wrapped(prefix, *args, **kwargs):
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
114
|
+
def _wrapped(prefix: str | Reference | ReferenceTuple, *args, **kwargs):
|
|
115
|
+
if isinstance(prefix, str):
|
|
116
|
+
norm_prefix = bioregistry.normalize_prefix(prefix)
|
|
117
|
+
if norm_prefix is None:
|
|
118
|
+
raise ValueError(f"Invalid prefix: {prefix}")
|
|
119
|
+
prefix = norm_prefix
|
|
120
|
+
elif isinstance(prefix, Reference):
|
|
121
|
+
norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
|
|
122
|
+
if norm_prefix is None:
|
|
123
|
+
raise ValueError(f"Invalid prefix: {prefix.prefix}")
|
|
124
|
+
prefix = Reference(prefix=norm_prefix, identifier=prefix.identifier)
|
|
125
|
+
elif isinstance(prefix, ReferenceTuple):
|
|
126
|
+
norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
|
|
127
|
+
if norm_prefix is None:
|
|
128
|
+
raise ValueError(f"Invalid prefix: {prefix.prefix}")
|
|
129
|
+
prefix = ReferenceTuple(norm_prefix, prefix.identifier)
|
|
130
|
+
else:
|
|
131
|
+
raise TypeError
|
|
132
|
+
return f(prefix, *args, **kwargs)
|
|
116
133
|
|
|
117
134
|
return _wrapped
|
|
118
135
|
|
pyobo/reader.py
CHANGED
|
@@ -417,7 +417,7 @@ def _clean_definition(s: str) -> str:
|
|
|
417
417
|
# if '\t' in s:
|
|
418
418
|
# logger.warning('has tab')
|
|
419
419
|
return (
|
|
420
|
-
s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace("\d", "") # noqa:W605
|
|
420
|
+
s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace(r"\d", "") # noqa:W605
|
|
421
421
|
)
|
|
422
422
|
|
|
423
423
|
|
pyobo/sources/__init__.py
CHANGED
|
@@ -12,6 +12,7 @@ from .civic_gene import CIVICGeneGetter
|
|
|
12
12
|
from .complexportal import ComplexPortalGetter
|
|
13
13
|
from .conso import CONSOGetter
|
|
14
14
|
from .cpt import CPTGetter
|
|
15
|
+
from .credit import CreditGetter
|
|
15
16
|
from .cvx import CVXGetter
|
|
16
17
|
from .depmap import DepMapGetter
|
|
17
18
|
from .dictybase_gene import DictybaseGetter
|
|
@@ -69,6 +70,7 @@ __all__ = [
|
|
|
69
70
|
"CVXGetter",
|
|
70
71
|
"ChEMBLCompoundGetter",
|
|
71
72
|
"ComplexPortalGetter",
|
|
73
|
+
"CreditGetter",
|
|
72
74
|
"DepMapGetter",
|
|
73
75
|
"DictybaseGetter",
|
|
74
76
|
"DrugBankGetter",
|