pyobo 0.12.4__py3-none-any.whl → 0.12.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +6 -0
- pyobo/api/__init__.py +3 -0
- pyobo/api/embedding.py +118 -0
- pyobo/api/utils.py +0 -10
- pyobo/cli/cli.py +1 -6
- pyobo/constants.py +23 -0
- pyobo/getters.py +52 -35
- pyobo/sources/__init__.py +14 -1
- pyobo/sources/chembl/__init__.py +6 -0
- pyobo/sources/chembl/chembl_cell.py +94 -0
- pyobo/sources/chembl/chembl_mechanism.py +81 -0
- pyobo/sources/chembl/chembl_tissue.py +70 -0
- pyobo/sources/clinicaltrials.py +32 -33
- pyobo/sources/complexportal.py +5 -1
- pyobo/sources/hgnc/hgnc.py +13 -6
- pyobo/sources/iana_media_type.py +100 -0
- pyobo/sources/mesh.py +82 -29
- pyobo/sources/reactome.py +10 -3
- pyobo/sources/spdx.py +85 -0
- pyobo/sources/uniprot/uniprot.py +2 -2
- pyobo/sources/wikipathways.py +92 -7
- pyobo/struct/__init__.py +2 -0
- pyobo/struct/functional/dsl.py +10 -1
- pyobo/struct/functional/ontology.py +3 -3
- pyobo/struct/obo/reader.py +17 -53
- pyobo/struct/obograph/export.py +2 -2
- pyobo/struct/struct.py +115 -8
- pyobo/struct/struct_utils.py +10 -0
- pyobo/struct/typedef.py +15 -3
- pyobo/struct/vocabulary.py +8 -0
- pyobo/utils/cache.py +4 -3
- pyobo/utils/io.py +18 -56
- pyobo/utils/misc.py +135 -1
- pyobo/utils/path.py +34 -2
- pyobo/version.py +1 -1
- {pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/METADATA +5 -5
- {pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/RECORD +41 -35
- {pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/WHEEL +0 -0
- {pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/entry_points.txt +0 -0
- {pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/licenses/LICENSE +0 -0
pyobo/struct/typedef.py
CHANGED
|
@@ -15,7 +15,9 @@ __all__ = [
|
|
|
15
15
|
"alternative_term",
|
|
16
16
|
"broad_match",
|
|
17
17
|
"close_match",
|
|
18
|
+
"contributes_to_condition",
|
|
18
19
|
"default_typedefs",
|
|
20
|
+
"derives_from_organism",
|
|
19
21
|
"editor_note",
|
|
20
22
|
"enables",
|
|
21
23
|
"exact_match",
|
|
@@ -24,10 +26,12 @@ __all__ = [
|
|
|
24
26
|
"gene_product_member_of",
|
|
25
27
|
"has_contributor",
|
|
26
28
|
"has_dbxref",
|
|
29
|
+
"has_depiction",
|
|
27
30
|
"has_end_date",
|
|
28
31
|
"has_gene_product",
|
|
29
32
|
"has_homepage",
|
|
30
33
|
"has_inchi",
|
|
34
|
+
"has_mailbox",
|
|
31
35
|
"has_mature",
|
|
32
36
|
"has_member",
|
|
33
37
|
"has_part",
|
|
@@ -103,12 +107,18 @@ has_component = TypeDef(
|
|
|
103
107
|
derives_from = TypeDef(
|
|
104
108
|
reference=Reference(prefix=RO_PREFIX, identifier="0001000", name="derives from"),
|
|
105
109
|
)
|
|
110
|
+
derives_from_organism = TypeDef(
|
|
111
|
+
reference=Reference(prefix="CLO", identifier="0037207", name="derives from organism")
|
|
112
|
+
)
|
|
106
113
|
molecularly_interacts_with = TypeDef(
|
|
107
114
|
reference=Reference(prefix=RO_PREFIX, identifier="0002436", name="molecularly interacts with"),
|
|
108
115
|
)
|
|
109
116
|
located_in = TypeDef(
|
|
110
117
|
reference=Reference(prefix=RO_PREFIX, identifier="0001025", name="located in"),
|
|
111
118
|
)
|
|
119
|
+
contributes_to_condition = TypeDef(
|
|
120
|
+
reference=Reference(prefix=RO_PREFIX, identifier="0003304", name="contributes to condition"),
|
|
121
|
+
)
|
|
112
122
|
exact_match = TypeDef(reference=v.exact_match, is_metadata_tag=True)
|
|
113
123
|
narrow_match = TypeDef(reference=v.narrow_match, is_metadata_tag=True)
|
|
114
124
|
broad_match = TypeDef(reference=v.broad_match, is_metadata_tag=True)
|
|
@@ -257,9 +267,11 @@ has_smiles = TypeDef(reference=v.has_smiles, is_metadata_tag=True).append_xref(v
|
|
|
257
267
|
|
|
258
268
|
has_inchi = TypeDef(reference=v.has_inchi, is_metadata_tag=True).append_xref(v.debio_has_inchi)
|
|
259
269
|
|
|
260
|
-
has_homepage = TypeDef(
|
|
261
|
-
|
|
262
|
-
)
|
|
270
|
+
has_homepage = TypeDef(reference=v.has_homepage, is_metadata_tag=True)
|
|
271
|
+
has_depiction = TypeDef(reference=v.has_depiction, is_metadata_tag=True)
|
|
272
|
+
has_mailbox = TypeDef(reference=v.has_mailbox, is_metadata_tag=True)
|
|
273
|
+
has_mailing_list = TypeDef(reference=v.has_mailing_list, is_metadata_tag=True)
|
|
274
|
+
has_repository = TypeDef(reference=v.has_repository, is_metadata_tag=True)
|
|
263
275
|
|
|
264
276
|
has_category = TypeDef(
|
|
265
277
|
reference=Reference(prefix="biolink", identifier="category", name="has category"),
|
pyobo/struct/vocabulary.py
CHANGED
|
@@ -90,6 +90,14 @@ has_description = _c(_v.has_description)
|
|
|
90
90
|
has_license = _c(_v.has_license)
|
|
91
91
|
has_title = _c(_v.has_title)
|
|
92
92
|
|
|
93
|
+
has_homepage = Reference(prefix="foaf", identifier="homepage", name="has homepage")
|
|
94
|
+
has_logo = Reference(prefix="foaf", identifier="logo", name="has logo")
|
|
95
|
+
has_mailbox = Reference(prefix="foaf", identifier="mbox", name="has mailbox")
|
|
96
|
+
has_depiction = Reference(prefix="foaf", identifier="depicted_by", name="depicted by")
|
|
97
|
+
has_repository = Reference(prefix="doap", identifier="repository", name="has repository")
|
|
98
|
+
has_mailing_list = Reference(prefix="doap", identifier="mailing-list", name="has mailing list")
|
|
99
|
+
has_maintainer = Reference(prefix="doap", identifier="maintainer", name="has maintainer")
|
|
100
|
+
|
|
93
101
|
has_part = Reference(prefix=BFO_PREFIX, identifier="0000051", name="has part")
|
|
94
102
|
part_of = Reference(prefix=BFO_PREFIX, identifier="0000050", name="part of")
|
|
95
103
|
orthologous = Reference(
|
pyobo/utils/cache.py
CHANGED
|
@@ -12,8 +12,9 @@ from pystow.cache import CachedCollection as cached_collection # noqa:N813
|
|
|
12
12
|
from pystow.cache import CachedDataFrame as cached_df # noqa:N813
|
|
13
13
|
from pystow.cache import CachedJSON as cached_json # noqa:N813
|
|
14
14
|
from pystow.cache import CachedPickle as cached_pickle # noqa:N813
|
|
15
|
+
from pystow.utils import safe_open
|
|
15
16
|
|
|
16
|
-
from .io import open_map_tsv, open_multimap_tsv,
|
|
17
|
+
from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
|
|
17
18
|
|
|
18
19
|
__all__ = [
|
|
19
20
|
"cached_collection",
|
|
@@ -69,13 +70,13 @@ NODE_LINK_STYLE = "links" # TODO update to "edges"
|
|
|
69
70
|
|
|
70
71
|
def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
|
|
71
72
|
"""Read a graph that's gzipped nodelink."""
|
|
72
|
-
with safe_open(path, read
|
|
73
|
+
with safe_open(path, operation="read") as file:
|
|
73
74
|
return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
|
|
74
75
|
|
|
75
76
|
|
|
76
77
|
def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
|
|
77
78
|
"""Write a graph as gzipped nodelink."""
|
|
78
|
-
with safe_open(path,
|
|
79
|
+
with safe_open(path, operation="write") as file:
|
|
79
80
|
json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)
|
|
80
81
|
|
|
81
82
|
|
pyobo/utils/io.py
CHANGED
|
@@ -1,27 +1,24 @@
|
|
|
1
1
|
"""I/O utilities."""
|
|
2
2
|
|
|
3
3
|
import collections.abc
|
|
4
|
-
import contextlib
|
|
5
|
-
import csv
|
|
6
4
|
import gzip
|
|
7
5
|
import logging
|
|
8
6
|
from collections import defaultdict
|
|
9
7
|
from collections.abc import Generator, Iterable, Mapping
|
|
10
8
|
from contextlib import contextmanager
|
|
11
9
|
from pathlib import Path
|
|
12
|
-
from typing import
|
|
10
|
+
from typing import TypeVar, cast
|
|
13
11
|
|
|
14
12
|
import pandas as pd
|
|
13
|
+
import pystow.utils
|
|
14
|
+
from pystow.utils import safe_open_reader, safe_open_writer
|
|
15
15
|
from tqdm.auto import tqdm
|
|
16
16
|
|
|
17
17
|
__all__ = [
|
|
18
|
-
"get_reader",
|
|
19
18
|
"multidict",
|
|
20
19
|
"multisetdict",
|
|
21
20
|
"open_map_tsv",
|
|
22
21
|
"open_multimap_tsv",
|
|
23
|
-
"open_reader",
|
|
24
|
-
"safe_open",
|
|
25
22
|
"safe_open_writer",
|
|
26
23
|
"write_iterable_tsv",
|
|
27
24
|
"write_map_tsv",
|
|
@@ -34,35 +31,22 @@ X = TypeVar("X")
|
|
|
34
31
|
Y = TypeVar("Y")
|
|
35
32
|
|
|
36
33
|
|
|
37
|
-
@contextmanager
|
|
38
|
-
def open_reader(path: str | Path, sep: str = "\t"):
|
|
39
|
-
"""Open a file and get a reader for it."""
|
|
40
|
-
path = Path(path)
|
|
41
|
-
with safe_open(path, read=True) as file:
|
|
42
|
-
yield get_reader(file, sep=sep)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def get_reader(x, sep: str = "\t"):
|
|
46
|
-
"""Get a :func:`csv.reader` with PyOBO default settings."""
|
|
47
|
-
return csv.reader(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
|
|
48
|
-
|
|
49
|
-
|
|
50
34
|
def open_map_tsv(
|
|
51
35
|
path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
|
|
52
36
|
) -> Mapping[str, str]:
|
|
53
37
|
"""Load a mapping TSV file into a dictionary."""
|
|
54
|
-
|
|
38
|
+
rv = {}
|
|
39
|
+
with pystow.utils.safe_open_reader(path) as reader:
|
|
55
40
|
if has_header:
|
|
56
|
-
next(
|
|
41
|
+
next(reader) # throw away header
|
|
57
42
|
if use_tqdm:
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
for row in get_reader(file):
|
|
43
|
+
reader = tqdm(reader, desc=f"loading TSV from {path}")
|
|
44
|
+
for row in reader:
|
|
61
45
|
if len(row) != 2:
|
|
62
46
|
logger.warning("[%s] malformed row can not be put in dict: %s", path, row)
|
|
63
47
|
continue
|
|
64
48
|
rv[row[0]] = row[1]
|
|
65
|
-
|
|
49
|
+
return rv
|
|
66
50
|
|
|
67
51
|
|
|
68
52
|
def open_multimap_tsv(
|
|
@@ -72,24 +56,27 @@ def open_multimap_tsv(
|
|
|
72
56
|
has_header: bool = True,
|
|
73
57
|
) -> Mapping[str, list[str]]:
|
|
74
58
|
"""Load a mapping TSV file that has multiple mappings for each."""
|
|
75
|
-
|
|
59
|
+
with _help_multimap_tsv(path=path, use_tqdm=use_tqdm, has_header=has_header) as file:
|
|
60
|
+
return multidict(file)
|
|
76
61
|
|
|
77
62
|
|
|
63
|
+
@contextmanager
|
|
78
64
|
def _help_multimap_tsv(
|
|
79
65
|
path: str | Path,
|
|
80
66
|
*,
|
|
81
67
|
use_tqdm: bool = False,
|
|
82
68
|
has_header: bool = True,
|
|
83
|
-
) -> Iterable[tuple[str, str]]:
|
|
84
|
-
with
|
|
69
|
+
) -> Generator[Iterable[tuple[str, str]], None, None]:
|
|
70
|
+
with safe_open_reader(path) as reader:
|
|
85
71
|
if has_header:
|
|
86
72
|
try:
|
|
87
|
-
next(
|
|
73
|
+
next(reader) # throw away header
|
|
88
74
|
except gzip.BadGzipFile as e:
|
|
89
75
|
raise ValueError(f"could not open file {path}") from e
|
|
90
76
|
if use_tqdm:
|
|
91
|
-
|
|
92
|
-
|
|
77
|
+
yield tqdm(reader, desc=f"loading TSV from {path}")
|
|
78
|
+
else:
|
|
79
|
+
yield cast(Iterable[tuple[str, str]], reader)
|
|
93
80
|
|
|
94
81
|
|
|
95
82
|
def multidict(pairs: Iterable[tuple[X, Y]]) -> Mapping[X, list[Y]]:
|
|
@@ -149,28 +136,3 @@ def write_iterable_tsv(
|
|
|
149
136
|
if header is not None:
|
|
150
137
|
writer.writerow(header)
|
|
151
138
|
writer.writerows(it)
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
@contextlib.contextmanager
|
|
155
|
-
def safe_open(
|
|
156
|
-
path: str | Path, read: bool, encoding: str | None = None
|
|
157
|
-
) -> Generator[TextIO, None, None]:
|
|
158
|
-
"""Safely open a file for reading or writing text."""
|
|
159
|
-
path = Path(path).expanduser().resolve()
|
|
160
|
-
mode: Literal["rt", "wt"] = "rt" if read else "wt"
|
|
161
|
-
if path.suffix.endswith(".gz"):
|
|
162
|
-
with gzip.open(path, mode=mode, encoding=encoding) as file:
|
|
163
|
-
yield file
|
|
164
|
-
else:
|
|
165
|
-
with open(path, mode=mode) as file:
|
|
166
|
-
yield file
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
@contextlib.contextmanager
|
|
170
|
-
def safe_open_writer(f: str | Path | TextIO, *, delimiter: str = "\t"): # type:ignore
|
|
171
|
-
"""Open a CSV writer, wrapping :func:`csv.writer`."""
|
|
172
|
-
if isinstance(f, str | Path):
|
|
173
|
-
with safe_open(f, read=False) as file:
|
|
174
|
-
yield csv.writer(file, delimiter=delimiter)
|
|
175
|
-
else:
|
|
176
|
-
yield csv.writer(f, delimiter=delimiter)
|
pyobo/utils/misc.py
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
"""Miscellaneous utilities."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import logging
|
|
6
|
+
from collections.abc import Callable
|
|
4
7
|
from datetime import datetime
|
|
5
8
|
|
|
9
|
+
import bioversions.utils
|
|
10
|
+
|
|
11
|
+
from pyobo.constants import ONTOLOGY_GETTERS, OntologyFormat
|
|
12
|
+
|
|
6
13
|
__all__ = [
|
|
14
|
+
"VERSION_GETTERS",
|
|
7
15
|
"cleanup_version",
|
|
8
16
|
]
|
|
9
17
|
|
|
@@ -15,8 +23,11 @@ BIZARRE_LOGGED = set()
|
|
|
15
23
|
VERSION_REWRITES = {
|
|
16
24
|
"$Date: 2009/11/15 10:54:12 $": "2009-11-15", # for owl
|
|
17
25
|
"http://www.w3.org/2006/time#2016": "2016", # for time
|
|
26
|
+
"https://purl.org/ontology/modalia#1.0.0": "1.0.0", # for dalia
|
|
27
|
+
}
|
|
28
|
+
STATIC_VERSION_REWRITES = {
|
|
29
|
+
"orth": "2",
|
|
18
30
|
}
|
|
19
|
-
STATIC_VERSION_REWRITES = {"orth": "2"}
|
|
20
31
|
VERSION_PREFIXES = [
|
|
21
32
|
"http://www.orpha.net/version",
|
|
22
33
|
"https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
|
|
@@ -27,17 +38,34 @@ VERSION_PREFIXES = [
|
|
|
27
38
|
"http://purl.dataone.org/odo/SASAP/", # like in http://purl.dataone.org/odo/SASAP/0.3.1
|
|
28
39
|
"http://purl.dataone.org/odo/SENSO/", # like in http://purl.dataone.org/odo/SENSO/0.1.0
|
|
29
40
|
"https://purl.dataone.org/odo/ADCAD/",
|
|
41
|
+
"http://identifiers.org/combine.specifications/teddy.rel-",
|
|
42
|
+
"https://nfdi.fiz-karlsruhe.de/ontology/",
|
|
43
|
+
"http://www.w3.org/ns/prov-",
|
|
44
|
+
"https://raw.githubusercontent.com/enpadasi/Ontology-for-Nutritional-Studies/releases/download/v",
|
|
45
|
+
"http://purl.jp/bio/4/ontology/iobc/", # like http://purl.jp/bio/4/ontology/iobc/1.6.0
|
|
46
|
+
"http://w3id.org/nfdi4ing/metadata4ing/", # like http://w3id.org/nfdi4ing/metadata4ing/1.3.1
|
|
47
|
+
"http://www.semanticweb.com/OntoRxn/", # like http://www.semanticweb.com/OntoRxn/0.2.5
|
|
48
|
+
"https://w3id.org/lehrplan/ontology/", # like in https://w3id.org/lehrplan/ontology/1.0.0-4
|
|
30
49
|
]
|
|
31
50
|
VERSION_PREFIX_SPLITS = [
|
|
32
51
|
"http://www.ebi.ac.uk/efo/releases/v",
|
|
33
52
|
"http://www.ebi.ac.uk/swo/swo.owl/",
|
|
34
53
|
"http://semanticscience.org/ontology/sio/v",
|
|
35
54
|
"http://ontology.neuinfo.org/NIF/ttl/nif/version/",
|
|
55
|
+
"http://nmrml.org/cv/v", # as in http://nmrml.org/cv/v1.1.0/nmrCV
|
|
56
|
+
"http://enanomapper.github.io/ontologies/releases/", # as in http://enanomapper.github.io/ontologies/releases/10.0/enanomapper
|
|
36
57
|
]
|
|
58
|
+
BAD = {
|
|
59
|
+
"http://purl.obolibrary.org/obo",
|
|
60
|
+
"http://www.bioassayontology.org/bao/bao_complete",
|
|
61
|
+
}
|
|
37
62
|
|
|
38
63
|
|
|
39
64
|
def cleanup_version(data_version: str, prefix: str) -> str:
|
|
40
65
|
"""Clean the version information."""
|
|
66
|
+
# in case a literal string that wasn't parsed properly gets put in
|
|
67
|
+
data_version = data_version.strip('"')
|
|
68
|
+
|
|
41
69
|
if data_version in VERSION_REWRITES:
|
|
42
70
|
return VERSION_REWRITES[data_version]
|
|
43
71
|
|
|
@@ -74,3 +102,109 @@ def cleanup_version(data_version: str, prefix: str) -> str:
|
|
|
74
102
|
logger.debug("[%s] bizarre version: %s", prefix, data_version)
|
|
75
103
|
BIZARRE_LOGGED.add((prefix, data_version))
|
|
76
104
|
return data_version
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _get_obo_version(prefix: str, url: str, *, max_lines: int = 200) -> str | None:
|
|
108
|
+
rv = bioversions.utils.get_obo_version(url, max_lines=max_lines)
|
|
109
|
+
if rv is None:
|
|
110
|
+
return None
|
|
111
|
+
return cleanup_version(rv, prefix)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _get_owl_version(prefix: str, url: str, *, max_lines: int = 200) -> str | None:
|
|
115
|
+
rv = bioversions.utils.get_owl_xml_version(url, max_lines=max_lines)
|
|
116
|
+
if rv is None:
|
|
117
|
+
return None
|
|
118
|
+
return cleanup_version(rv, prefix)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _get_obograph_json_version(prefix: str, url: str) -> str | None:
|
|
122
|
+
rv = bioversions.utils.get_obograph_json_version(url)
|
|
123
|
+
if rv is None:
|
|
124
|
+
return None
|
|
125
|
+
return cleanup_version(rv, prefix)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
#: A mapping from data type to gersion getter function
|
|
129
|
+
VERSION_GETTERS: dict[OntologyFormat, Callable[[str, str], str | None]] = {
|
|
130
|
+
"obo": _get_obo_version,
|
|
131
|
+
"owl": _get_owl_version,
|
|
132
|
+
"json": _get_obograph_json_version,
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _prioritize_version(
|
|
137
|
+
data_version: str | None,
|
|
138
|
+
ontology_prefix: str,
|
|
139
|
+
version: str | None,
|
|
140
|
+
date: datetime | None,
|
|
141
|
+
) -> str | None:
|
|
142
|
+
"""Process version information coming from several sources and normalize them."""
|
|
143
|
+
if ontology_prefix in STATIC_VERSION_REWRITES:
|
|
144
|
+
return STATIC_VERSION_REWRITES[ontology_prefix]
|
|
145
|
+
|
|
146
|
+
if version:
|
|
147
|
+
if version in BAD:
|
|
148
|
+
logger.debug("[%s] had known bad version, returning None: ", ontology_prefix, version)
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
clean_injected_version = cleanup_version(version, prefix=ontology_prefix)
|
|
152
|
+
if not data_version:
|
|
153
|
+
logger.debug(
|
|
154
|
+
"[%s] did not have a version, overriding with %s",
|
|
155
|
+
ontology_prefix,
|
|
156
|
+
clean_injected_version,
|
|
157
|
+
)
|
|
158
|
+
return clean_injected_version
|
|
159
|
+
|
|
160
|
+
clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
|
|
161
|
+
if clean_data_version != clean_injected_version:
|
|
162
|
+
# in this case, we're going to trust the one that's passed
|
|
163
|
+
# through explicitly more than the graph's content
|
|
164
|
+
logger.debug(
|
|
165
|
+
"[%s] had version %s, overriding with %s",
|
|
166
|
+
ontology_prefix,
|
|
167
|
+
data_version,
|
|
168
|
+
version,
|
|
169
|
+
)
|
|
170
|
+
return clean_injected_version
|
|
171
|
+
|
|
172
|
+
if data_version:
|
|
173
|
+
if data_version in BAD:
|
|
174
|
+
logger.debug(
|
|
175
|
+
"[%s] had known bad version, returning None: ", ontology_prefix, data_version
|
|
176
|
+
)
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
|
|
180
|
+
logger.debug("[%s] using version %s", ontology_prefix, clean_data_version)
|
|
181
|
+
return clean_data_version
|
|
182
|
+
|
|
183
|
+
if date is not None:
|
|
184
|
+
derived_date_version = date.strftime("%Y-%m-%d")
|
|
185
|
+
logger.debug(
|
|
186
|
+
"[%s] does not report a version. falling back to date: %s",
|
|
187
|
+
ontology_prefix,
|
|
188
|
+
derived_date_version,
|
|
189
|
+
)
|
|
190
|
+
return derived_date_version
|
|
191
|
+
|
|
192
|
+
logger.debug("[%s] does not report a version nor a date", ontology_prefix)
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _get_version_from_artifact(prefix: str) -> str | None:
|
|
197
|
+
# assume that all possible files that can be downloaded
|
|
198
|
+
# are in sync and have the same version
|
|
199
|
+
for ontology_format, func in ONTOLOGY_GETTERS:
|
|
200
|
+
url = func(prefix)
|
|
201
|
+
if url is None:
|
|
202
|
+
continue
|
|
203
|
+
# Try to peak into the file to get the version without fully downloading
|
|
204
|
+
version_func = VERSION_GETTERS.get(ontology_format)
|
|
205
|
+
if version_func is None:
|
|
206
|
+
continue
|
|
207
|
+
version = version_func(prefix, url)
|
|
208
|
+
if version:
|
|
209
|
+
return cleanup_version(version, prefix=prefix)
|
|
210
|
+
return None
|
pyobo/utils/path.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Utilities for building paths."""
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
|
+
import json
|
|
4
5
|
import logging
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
from typing import Any, Literal
|
|
@@ -14,6 +15,7 @@ from ..constants import CACHE_SUBDIRECTORY_NAME, RAW_MODULE, RELATION_SUBDIRECTO
|
|
|
14
15
|
__all__ = [
|
|
15
16
|
"CacheArtifact",
|
|
16
17
|
"ensure_df",
|
|
18
|
+
"ensure_json",
|
|
17
19
|
"ensure_path",
|
|
18
20
|
"get_cache_path",
|
|
19
21
|
"get_relation_cache_path",
|
|
@@ -46,11 +48,13 @@ def ensure_path(
|
|
|
46
48
|
version: VersionHint = None,
|
|
47
49
|
name: str | None = None,
|
|
48
50
|
force: bool = False,
|
|
49
|
-
backend: Literal["requests", "urllib"] =
|
|
51
|
+
backend: Literal["requests", "urllib"] | None = None,
|
|
50
52
|
verify: bool = True,
|
|
51
53
|
**download_kwargs: Any,
|
|
52
54
|
) -> Path:
|
|
53
55
|
"""Download a file if it doesn't exist."""
|
|
56
|
+
if backend is None:
|
|
57
|
+
backend = "urllib"
|
|
54
58
|
if verify:
|
|
55
59
|
download_kwargs = {"backend": backend}
|
|
56
60
|
else:
|
|
@@ -79,7 +83,7 @@ def ensure_df(
|
|
|
79
83
|
sep: str = "\t",
|
|
80
84
|
dtype=str,
|
|
81
85
|
verify: bool = True,
|
|
82
|
-
backend: Literal["requests", "urllib"] =
|
|
86
|
+
backend: Literal["requests", "urllib"] | None = None,
|
|
83
87
|
**kwargs,
|
|
84
88
|
) -> pd.DataFrame:
|
|
85
89
|
"""Download a file and open as a dataframe."""
|
|
@@ -96,6 +100,34 @@ def ensure_df(
|
|
|
96
100
|
return pd.read_csv(_path, sep=sep, dtype=dtype, **kwargs)
|
|
97
101
|
|
|
98
102
|
|
|
103
|
+
def ensure_json(
|
|
104
|
+
prefix: str,
|
|
105
|
+
*parts: str,
|
|
106
|
+
url: str,
|
|
107
|
+
version: VersionHint = None,
|
|
108
|
+
name: str | None = None,
|
|
109
|
+
force: bool = False,
|
|
110
|
+
sep: str = "\t",
|
|
111
|
+
dtype=str,
|
|
112
|
+
verify: bool = True,
|
|
113
|
+
backend: Literal["requests", "urllib"] | None = None,
|
|
114
|
+
**kwargs,
|
|
115
|
+
) -> pd.DataFrame:
|
|
116
|
+
"""Download a file and open as JSON."""
|
|
117
|
+
_path = ensure_path(
|
|
118
|
+
prefix,
|
|
119
|
+
*parts,
|
|
120
|
+
url=url,
|
|
121
|
+
version=version,
|
|
122
|
+
name=name,
|
|
123
|
+
force=force,
|
|
124
|
+
verify=verify,
|
|
125
|
+
backend=backend,
|
|
126
|
+
)
|
|
127
|
+
with _path.open() as file:
|
|
128
|
+
return json.load(file)
|
|
129
|
+
|
|
130
|
+
|
|
99
131
|
class CacheArtifact(enum.Enum):
|
|
100
132
|
"""An enumeration for."""
|
|
101
133
|
|
pyobo/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyobo
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.5
|
|
4
4
|
Summary: A python package for handling and generating OBO
|
|
5
5
|
Keywords: snekpack,cookiecutter,ontologies,biomedical ontologies,life sciences,natural sciences,bioinformatics,cheminformatics,Open Biomedical Ontologies,OBO
|
|
6
6
|
Author: Charles Tapley Hoyt
|
|
@@ -36,9 +36,9 @@ Requires-Dist: more-click>=0.0.2
|
|
|
36
36
|
Requires-Dist: humanize
|
|
37
37
|
Requires-Dist: tabulate
|
|
38
38
|
Requires-Dist: cachier
|
|
39
|
-
Requires-Dist: pystow>=0.7.
|
|
40
|
-
Requires-Dist: bioversions>=0.8.
|
|
41
|
-
Requires-Dist: bioregistry>=0.12.
|
|
39
|
+
Requires-Dist: pystow>=0.7.5
|
|
40
|
+
Requires-Dist: bioversions>=0.8.101
|
|
41
|
+
Requires-Dist: bioregistry>=0.12.30
|
|
42
42
|
Requires-Dist: bioontologies>=0.7.2
|
|
43
43
|
Requires-Dist: ssslm>=0.0.13
|
|
44
44
|
Requires-Dist: zenodo-client>=0.3.6
|
|
@@ -46,7 +46,7 @@ Requires-Dist: class-resolver>=0.6.0
|
|
|
46
46
|
Requires-Dist: psycopg2-binary
|
|
47
47
|
Requires-Dist: pydantic>=2.0
|
|
48
48
|
Requires-Dist: curies>=0.10.17
|
|
49
|
-
Requires-Dist: curies-processing>=0.1.
|
|
49
|
+
Requires-Dist: curies-processing>=0.1.2
|
|
50
50
|
Requires-Dist: python-dateutil
|
|
51
51
|
Requires-Dist: networkx>=3.4
|
|
52
52
|
Requires-Dist: drugbank-downloader
|