pyobo 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +6 -0
- pyobo/api/__init__.py +3 -0
- pyobo/api/embedding.py +118 -0
- pyobo/api/utils.py +0 -10
- pyobo/cli/cli.py +1 -6
- pyobo/cli/database.py +7 -1
- pyobo/constants.py +23 -0
- pyobo/getters.py +52 -35
- pyobo/identifier_utils/api.py +3 -1
- pyobo/sources/__init__.py +14 -1
- pyobo/sources/chembl/__init__.py +6 -0
- pyobo/sources/chembl/chembl_cell.py +94 -0
- pyobo/sources/chembl/chembl_mechanism.py +81 -0
- pyobo/sources/chembl/chembl_tissue.py +70 -0
- pyobo/sources/clinicaltrials.py +32 -33
- pyobo/sources/complexportal.py +5 -1
- pyobo/sources/drugcentral.py +2 -1
- pyobo/sources/hgnc/hgnc.py +13 -6
- pyobo/sources/iana_media_type.py +100 -0
- pyobo/sources/mesh.py +82 -29
- pyobo/sources/reactome.py +10 -3
- pyobo/sources/spdx.py +89 -0
- pyobo/sources/uniprot/uniprot.py +2 -2
- pyobo/sources/wikipathways.py +92 -7
- pyobo/struct/__init__.py +2 -0
- pyobo/struct/functional/dsl.py +10 -1
- pyobo/struct/functional/ontology.py +3 -3
- pyobo/struct/obo/reader.py +17 -53
- pyobo/struct/obograph/export.py +2 -2
- pyobo/struct/struct.py +125 -8
- pyobo/struct/struct_utils.py +10 -0
- pyobo/struct/typedef.py +15 -3
- pyobo/struct/vocabulary.py +8 -0
- pyobo/utils/cache.py +4 -3
- pyobo/utils/io.py +18 -56
- pyobo/utils/misc.py +142 -1
- pyobo/utils/path.py +34 -2
- pyobo/version.py +1 -1
- {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/METADATA +11 -7
- {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/RECORD +44 -38
- {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/WHEEL +0 -0
- {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/entry_points.txt +0 -0
- {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/licenses/LICENSE +0 -0
pyobo/sources/reactome.py
CHANGED
|
@@ -22,6 +22,7 @@ __all__ = [
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
23
23
|
|
|
24
24
|
PREFIX = "reactome"
|
|
25
|
+
ROOT = Reference(prefix="pw", identifier="0000001", name="pathway")
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
# TODO alt ids https://reactome.org/download/current/reactome_stable_ids.txt
|
|
@@ -32,10 +33,12 @@ class ReactomeGetter(Obo):
|
|
|
32
33
|
|
|
33
34
|
ontology = bioversions_key = PREFIX
|
|
34
35
|
typedefs = [from_species, has_participant, has_citation]
|
|
36
|
+
root_terms = [ROOT]
|
|
35
37
|
|
|
36
38
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
37
39
|
"""Iterate over terms in the ontology."""
|
|
38
|
-
|
|
40
|
+
yield Term(reference=ROOT)
|
|
41
|
+
yield from iter_terms(version=self._version_or_raise, force=force)
|
|
39
42
|
|
|
40
43
|
|
|
41
44
|
def ensure_participant_df(version: str, force: bool = False) -> pd.DataFrame:
|
|
@@ -87,6 +90,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
87
90
|
for parent_id, child_id in hierarchy_df.values:
|
|
88
91
|
terms[child_id].append_parent(terms[parent_id])
|
|
89
92
|
|
|
93
|
+
for term in terms.values():
|
|
94
|
+
if not term.parents:
|
|
95
|
+
term.append_parent(ROOT)
|
|
96
|
+
|
|
90
97
|
uniprot_pathway_df = ensure_participant_df(version=version, force=force)
|
|
91
98
|
for uniprot_id, reactome_id in tqdm(
|
|
92
99
|
uniprot_pathway_df.values,
|
|
@@ -102,7 +109,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
102
109
|
reference = Reference(prefix="uniprot.isoform", identifier=uniprot_id)
|
|
103
110
|
else:
|
|
104
111
|
reference = Reference(prefix="uniprot", identifier=uniprot_id)
|
|
105
|
-
terms[reactome_id].
|
|
112
|
+
terms[reactome_id].annotate_object(has_participant, reference)
|
|
106
113
|
|
|
107
114
|
chebi_pathway_url = f"https://reactome.org/download/{version}/ChEBI2Reactome_All_Levels.txt"
|
|
108
115
|
chebi_pathway_df = ensure_df(
|
|
@@ -122,7 +129,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
122
129
|
if reactome_id not in terms:
|
|
123
130
|
tqdm.write(f"{reactome_id} appears in chebi participants file but not pathways file")
|
|
124
131
|
continue
|
|
125
|
-
terms[reactome_id].
|
|
132
|
+
terms[reactome_id].annotate_object(
|
|
126
133
|
has_participant, Reference(prefix="chebi", identifier=chebi_id)
|
|
127
134
|
)
|
|
128
135
|
|
pyobo/sources/spdx.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Convert SPDX to an ontology."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from pydantic import ValidationError
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
|
|
9
|
+
from pyobo.struct import Obo, Reference, Term, TypeDef
|
|
10
|
+
from pyobo.struct.typedef import see_also
|
|
11
|
+
from pyobo.struct.vocabulary import xsd_boolean
|
|
12
|
+
from pyobo.utils.path import ensure_json
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"SPDXLicenseGetter",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
DATA_URL = "https://github.com/spdx/license-list-data/raw/refs/heads/main/json/licenses.json"
|
|
19
|
+
LICENSE_PREFIX = "spdx"
|
|
20
|
+
TERM_PREFIX = "spdx.term"
|
|
21
|
+
|
|
22
|
+
ROOT = Term.from_triple(TERM_PREFIX, "ListedLicense", "listed license")
|
|
23
|
+
IS_OSI = TypeDef(
|
|
24
|
+
reference=Reference(prefix=TERM_PREFIX, identifier="isOsiApproved", name="is OSI approved"),
|
|
25
|
+
is_metadata_tag=True,
|
|
26
|
+
domain=ROOT.reference,
|
|
27
|
+
range=xsd_boolean,
|
|
28
|
+
)
|
|
29
|
+
IS_FSF = TypeDef(
|
|
30
|
+
reference=Reference(prefix=TERM_PREFIX, identifier="isFsfLibre", name="is FSF Libre"),
|
|
31
|
+
is_metadata_tag=True,
|
|
32
|
+
domain=ROOT.reference,
|
|
33
|
+
range=xsd_boolean,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_terms(version: str) -> Iterable[Term]:
|
|
38
|
+
"""Iterate over terms."""
|
|
39
|
+
yield ROOT
|
|
40
|
+
data = ensure_json(
|
|
41
|
+
LICENSE_PREFIX,
|
|
42
|
+
url=DATA_URL,
|
|
43
|
+
version=version,
|
|
44
|
+
)
|
|
45
|
+
for record in data["licenses"]:
|
|
46
|
+
if term := _get_term(record):
|
|
47
|
+
yield term
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _get_term(record: dict[str, Any]) -> Term | None:
|
|
51
|
+
try:
|
|
52
|
+
reference = Reference(
|
|
53
|
+
prefix=LICENSE_PREFIX, identifier=record["licenseId"], name=record["name"]
|
|
54
|
+
)
|
|
55
|
+
except ValidationError:
|
|
56
|
+
tqdm.write(f"invalid: {record['licenseId']}")
|
|
57
|
+
return None
|
|
58
|
+
term = (
|
|
59
|
+
Term(
|
|
60
|
+
reference=reference,
|
|
61
|
+
is_obsolete=True if record.get("isDeprecatedLicenseId") else None,
|
|
62
|
+
# type="Instance",
|
|
63
|
+
)
|
|
64
|
+
.append_parent(ROOT)
|
|
65
|
+
.append_synonym(record["licenseId"])
|
|
66
|
+
)
|
|
67
|
+
if record.get("isOsiApproved"):
|
|
68
|
+
term.annotate_boolean(IS_OSI, True)
|
|
69
|
+
if record.get("isFsfLibre"):
|
|
70
|
+
term.annotate_boolean(IS_FSF, True)
|
|
71
|
+
for uri in record.get("seeAlso", []):
|
|
72
|
+
term.annotate_uri(see_also, uri)
|
|
73
|
+
return term
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class SPDXLicenseGetter(Obo):
|
|
77
|
+
"""An ontology representation of the SPDX Licenses."""
|
|
78
|
+
|
|
79
|
+
bioversions_key = ontology = LICENSE_PREFIX
|
|
80
|
+
typedefs = [see_also, IS_FSF, IS_OSI]
|
|
81
|
+
root_terms = [ROOT.reference]
|
|
82
|
+
|
|
83
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
84
|
+
"""Iterate over terms in the ontology."""
|
|
85
|
+
return get_terms(version=self._version_or_raise)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
if __name__ == "__main__":
|
|
89
|
+
SPDXLicenseGetter.cli()
|
pyobo/sources/uniprot/uniprot.py
CHANGED
|
@@ -4,6 +4,7 @@ from collections.abc import Iterable
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import cast
|
|
6
6
|
|
|
7
|
+
from pystow.utils import safe_open_reader
|
|
7
8
|
from tqdm.auto import tqdm
|
|
8
9
|
|
|
9
10
|
from pyobo import Obo, Reference
|
|
@@ -22,7 +23,6 @@ from pyobo.struct import (
|
|
|
22
23
|
participates_in,
|
|
23
24
|
)
|
|
24
25
|
from pyobo.struct.typedef import gene_product_of, located_in, molecularly_interacts_with
|
|
25
|
-
from pyobo.utils.io import open_reader
|
|
26
26
|
|
|
27
27
|
PREFIX = "uniprot"
|
|
28
28
|
BASE_URL = "https://rest.uniprot.org/uniprotkb/stream"
|
|
@@ -78,7 +78,7 @@ class UniProtGetter(Obo):
|
|
|
78
78
|
|
|
79
79
|
def iter_terms(version: str | None = None) -> Iterable[Term]:
|
|
80
80
|
"""Iterate over UniProt Terms."""
|
|
81
|
-
with
|
|
81
|
+
with safe_open_reader(ensure(version=version)) as reader:
|
|
82
82
|
_ = next(reader) # header
|
|
83
83
|
for (
|
|
84
84
|
uniprot_id,
|
pyobo/sources/wikipathways.py
CHANGED
|
@@ -3,13 +3,14 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
from collections.abc import Iterable
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
import pystow
|
|
7
|
+
from pystow.utils import DownloadError, read_zipfile_rdf
|
|
7
8
|
from tqdm import tqdm
|
|
8
9
|
|
|
9
10
|
from .gmt_utils import parse_wikipathways_gmt
|
|
10
11
|
from ..constants import SPECIES_REMAPPING
|
|
11
12
|
from ..struct import Obo, Reference, Term, from_species
|
|
12
|
-
from ..struct.typedef import has_participant
|
|
13
|
+
from ..struct.typedef import contributes_to_condition, has_depiction, has_participant, located_in
|
|
13
14
|
from ..utils.path import ensure_path
|
|
14
15
|
|
|
15
16
|
__all__ = [
|
|
@@ -20,6 +21,7 @@ logger = logging.getLogger(__name__)
|
|
|
20
21
|
|
|
21
22
|
PREFIX = "wikipathways"
|
|
22
23
|
|
|
24
|
+
ROOT = Reference(prefix="pw", identifier="0000001", name="pathway")
|
|
23
25
|
_PATHWAY_INFO = [
|
|
24
26
|
("Anopheles_gambiae", "7165"),
|
|
25
27
|
("Arabidopsis_thaliana", "3702"),
|
|
@@ -46,17 +48,27 @@ class WikiPathwaysGetter(Obo):
|
|
|
46
48
|
"""An ontology representation of WikiPathways' pathway database."""
|
|
47
49
|
|
|
48
50
|
ontology = bioversions_key = PREFIX
|
|
49
|
-
typedefs = [from_species, has_participant]
|
|
51
|
+
typedefs = [from_species, has_participant, contributes_to_condition, located_in, has_depiction]
|
|
52
|
+
root_terms = [ROOT]
|
|
50
53
|
|
|
51
54
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
52
55
|
"""Iterate over terms in the ontology."""
|
|
53
|
-
|
|
56
|
+
yield Term(reference=ROOT)
|
|
57
|
+
yield from iter_terms(version=self._version_or_raise)
|
|
54
58
|
|
|
55
59
|
|
|
56
|
-
|
|
60
|
+
PW_PREFIX = "http://purl.obolibrary.org/obo/PW_"
|
|
61
|
+
DOID_PREFIX = "http://purl.obolibrary.org/obo/DOID_"
|
|
62
|
+
CL_PREFIX = "http://purl.obolibrary.org/obo/CL_"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def iter_terms(version: str, *, include_descriptions: bool = False) -> Iterable[Term]:
|
|
57
66
|
"""Get WikiPathways terms."""
|
|
58
|
-
|
|
67
|
+
archive_url = f"https://data.wikipathways.org/current/rdf/wikipathways-{version}-rdf-wp.zip"
|
|
68
|
+
archive = pystow.ensure(PREFIX, url=archive_url, version=version)
|
|
59
69
|
|
|
70
|
+
base_url = f"http://data.wikipathways.org/{version}/gmt/wikipathways-{version}-gmt"
|
|
71
|
+
pw_references = set()
|
|
60
72
|
for species_code, taxonomy_id in tqdm(_PATHWAY_INFO, desc=f"[{PREFIX}]", unit="species"):
|
|
61
73
|
url = f"{base_url}-{species_code}.gmt"
|
|
62
74
|
try:
|
|
@@ -68,15 +80,88 @@ def iter_terms(version: str) -> Iterable[Term]:
|
|
|
68
80
|
taxonomy_name = SPECIES_REMAPPING.get(species_code, species_code)
|
|
69
81
|
|
|
70
82
|
for identifier, _version, _revision, name, _species, genes in parse_wikipathways_gmt(path):
|
|
71
|
-
|
|
83
|
+
graph = read_zipfile_rdf(archive, inner_path=f"wp/{identifier}.ttl")
|
|
84
|
+
uri = f"https://identifiers.org/wikipathways/{identifier}"
|
|
85
|
+
|
|
86
|
+
definition: str | None = None
|
|
87
|
+
if include_descriptions:
|
|
88
|
+
# TODO deal with weird characters breaking OFN
|
|
89
|
+
description_results = list(
|
|
90
|
+
graph.query(
|
|
91
|
+
f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/dcterms:description ?p }} LIMIT 1"
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
if description_results:
|
|
95
|
+
definition = str(description_results[0][0]) # type:ignore[index]
|
|
96
|
+
|
|
97
|
+
term = Term(
|
|
98
|
+
reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
|
|
99
|
+
definition=definition,
|
|
100
|
+
)
|
|
72
101
|
term.set_species(taxonomy_id, taxonomy_name)
|
|
102
|
+
term.annotate_uri(
|
|
103
|
+
has_depiction,
|
|
104
|
+
f"https://www.wikipathways.org/wikipathways-assets/pathways/{identifier}/{identifier}.svg",
|
|
105
|
+
)
|
|
73
106
|
for ncbigene_id in genes:
|
|
74
107
|
term.annotate_object(
|
|
75
108
|
has_participant,
|
|
76
109
|
Reference(prefix="ncbigene", identifier=ncbigene_id),
|
|
77
110
|
)
|
|
111
|
+
# TODO switch query over to including chemicals from RDF SPARQL query
|
|
112
|
+
# TODO get description from SPARQL
|
|
113
|
+
parents = [ # type:ignore[misc]
|
|
114
|
+
p
|
|
115
|
+
for (p,) in graph.query(
|
|
116
|
+
f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/wp:pathwayOntologyTag ?p }}"
|
|
117
|
+
)
|
|
118
|
+
]
|
|
119
|
+
for parent in parents:
|
|
120
|
+
if parent.startswith(PW_PREFIX):
|
|
121
|
+
ref = Reference(prefix="pw", identifier=parent.removeprefix(PW_PREFIX))
|
|
122
|
+
pw_references.add(ref)
|
|
123
|
+
term.append_parent(ref)
|
|
124
|
+
if not parents:
|
|
125
|
+
tqdm.write(f"[{term.curie}] could not find parent")
|
|
126
|
+
term.append_parent(ROOT)
|
|
127
|
+
|
|
128
|
+
diseases = graph.query(
|
|
129
|
+
f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/wp:diseaseOntologyTag ?p }}"
|
|
130
|
+
)
|
|
131
|
+
for (disease,) in diseases: # type:ignore[misc]
|
|
132
|
+
if disease.startswith(DOID_PREFIX):
|
|
133
|
+
term.annotate_object(
|
|
134
|
+
contributes_to_condition,
|
|
135
|
+
Reference(prefix="doid", identifier=disease.removeprefix(DOID_PREFIX)),
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
cells = graph.query(
|
|
139
|
+
f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/wp:cellTypeOntologyTag ?p }}"
|
|
140
|
+
)
|
|
141
|
+
for (cell,) in cells: # type:ignore[misc]
|
|
142
|
+
if cell.startswith(CL_PREFIX):
|
|
143
|
+
term.annotate_object(
|
|
144
|
+
located_in,
|
|
145
|
+
Reference(prefix="cl", identifier=cell.removeprefix(CL_PREFIX)),
|
|
146
|
+
)
|
|
147
|
+
|
|
78
148
|
yield term
|
|
79
149
|
|
|
150
|
+
from ..api import get_ancestors
|
|
151
|
+
from ..getters import get_ontology
|
|
152
|
+
|
|
153
|
+
for pw_reference in list(pw_references):
|
|
154
|
+
pw_references.update(get_ancestors(pw_reference) or set())
|
|
155
|
+
|
|
156
|
+
for pw_term in get_ontology("pw"):
|
|
157
|
+
if pw_term.reference in pw_references:
|
|
158
|
+
yield Term(
|
|
159
|
+
reference=pw_term.reference,
|
|
160
|
+
definition=pw_term.definition,
|
|
161
|
+
# PW has issues in hierarchy - there are lots of leaves with no root
|
|
162
|
+
parents=pw_term.parents or [ROOT],
|
|
163
|
+
)
|
|
164
|
+
|
|
80
165
|
|
|
81
166
|
if __name__ == "__main__":
|
|
82
167
|
WikiPathwaysGetter.cli()
|
pyobo/struct/__init__.py
CHANGED
|
@@ -16,6 +16,7 @@ from .struct import (
|
|
|
16
16
|
SynonymTypeDef,
|
|
17
17
|
Term,
|
|
18
18
|
TypeDef,
|
|
19
|
+
build_ontology,
|
|
19
20
|
make_ad_hoc_ontology,
|
|
20
21
|
)
|
|
21
22
|
from .struct_utils import Annotation, Stanza, StanzaType
|
|
@@ -57,6 +58,7 @@ __all__ = [
|
|
|
57
58
|
"Term",
|
|
58
59
|
"TypeDef",
|
|
59
60
|
"_parse_str_or_curie_or_uri",
|
|
61
|
+
"build_ontology",
|
|
60
62
|
"default_reference",
|
|
61
63
|
"derives_from",
|
|
62
64
|
"enables",
|
pyobo/struct/functional/dsl.py
CHANGED
|
@@ -211,7 +211,16 @@ class LiteralBox(Box):
|
|
|
211
211
|
|
|
212
212
|
def to_funowl(self) -> str:
|
|
213
213
|
"""Represent this literal for functional OWL."""
|
|
214
|
-
|
|
214
|
+
rv = self.literal.n3(self._namespace_manager)
|
|
215
|
+
# it appears that the OFN format doesn't use triple quotes
|
|
216
|
+
if rv.startswith('"""') and rv.endswith('"""^^xsd:string'):
|
|
217
|
+
# strip them off
|
|
218
|
+
rv = rv.removeprefix('"""').removesuffix('"""^^xsd:string')
|
|
219
|
+
# escape quotes
|
|
220
|
+
rv = rv.replace('"', '\\"')
|
|
221
|
+
# stick back quotes and xsd tag
|
|
222
|
+
rv = '"' + rv + '"^^xsd:string'
|
|
223
|
+
return rv
|
|
215
224
|
|
|
216
225
|
def to_funowl_args(self) -> str: # pragma: no cover
|
|
217
226
|
"""Get the inside of the functional OWL tag representing the literal (unused)."""
|
|
@@ -8,6 +8,7 @@ from collections.abc import Sequence
|
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
|
|
10
10
|
from curies import Converter
|
|
11
|
+
from pystow.utils import safe_open
|
|
11
12
|
from rdflib import OWL, RDF, Graph, term
|
|
12
13
|
|
|
13
14
|
from pyobo.struct.functional.dsl import Annotation, Annotations, Axiom, Box
|
|
@@ -16,7 +17,6 @@ from pyobo.struct.functional.utils import (
|
|
|
16
17
|
FunctionalOWLSerializable,
|
|
17
18
|
list_to_funowl,
|
|
18
19
|
)
|
|
19
|
-
from pyobo.utils.io import safe_open
|
|
20
20
|
|
|
21
21
|
__all__ = [
|
|
22
22
|
"Document",
|
|
@@ -108,9 +108,9 @@ class Document:
|
|
|
108
108
|
return graph
|
|
109
109
|
|
|
110
110
|
def write_funowl(self, path: str | Path) -> None:
|
|
111
|
-
"""Write functional OWL to a file
|
|
111
|
+
"""Write functional OWL to a file."""
|
|
112
112
|
path = Path(path).expanduser().resolve()
|
|
113
|
-
with safe_open(path,
|
|
113
|
+
with safe_open(path, operation="write") as file:
|
|
114
114
|
file.write(self.to_funowl())
|
|
115
115
|
|
|
116
116
|
def to_funowl(self) -> str:
|
pyobo/struct/obo/reader.py
CHANGED
|
@@ -18,6 +18,7 @@ from curies import ReferenceTuple
|
|
|
18
18
|
from curies.preprocessing import BlocklistError
|
|
19
19
|
from curies.vocabulary import SynonymScope
|
|
20
20
|
from more_itertools import pairwise
|
|
21
|
+
from pystow.utils import safe_open
|
|
21
22
|
from tqdm.auto import tqdm
|
|
22
23
|
|
|
23
24
|
from .reader_utils import (
|
|
@@ -52,8 +53,7 @@ from ...identifier_utils import (
|
|
|
52
53
|
get_rules,
|
|
53
54
|
)
|
|
54
55
|
from ...utils.cache import write_gzipped_graph
|
|
55
|
-
from ...utils.
|
|
56
|
-
from ...utils.misc import STATIC_VERSION_REWRITES, cleanup_version
|
|
56
|
+
from ...utils.misc import _prioritize_version
|
|
57
57
|
|
|
58
58
|
__all__ = [
|
|
59
59
|
"from_obo_path",
|
|
@@ -90,7 +90,7 @@ def from_obo_path(
|
|
|
90
90
|
)
|
|
91
91
|
else:
|
|
92
92
|
logger.info("[%s] parsing OBO with obonet from %s", prefix or "<unknown>", path)
|
|
93
|
-
with safe_open(path, read
|
|
93
|
+
with safe_open(path, operation="read") as file:
|
|
94
94
|
graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
|
|
95
95
|
|
|
96
96
|
if prefix:
|
|
@@ -157,7 +157,7 @@ def from_obonet(
|
|
|
157
157
|
upgrade: bool = True,
|
|
158
158
|
use_tqdm: bool = False,
|
|
159
159
|
) -> Obo:
|
|
160
|
-
"""Get all
|
|
160
|
+
"""Get all the terms from a OBO graph."""
|
|
161
161
|
ontology_prefix_raw = graph.graph["ontology"]
|
|
162
162
|
ontology_prefix = _normalize_prefix_strict(ontology_prefix_raw)
|
|
163
163
|
logger.info("[%s] extracting OBO using obonet", ontology_prefix)
|
|
@@ -168,8 +168,11 @@ def from_obonet(
|
|
|
168
168
|
|
|
169
169
|
macro_config = MacroConfig(graph.graph, strict=strict, ontology_prefix=ontology_prefix)
|
|
170
170
|
|
|
171
|
-
data_version =
|
|
172
|
-
graph
|
|
171
|
+
data_version = _prioritize_version(
|
|
172
|
+
data_version=graph.graph.get("data-version") or None,
|
|
173
|
+
ontology_prefix=ontology_prefix,
|
|
174
|
+
version=version,
|
|
175
|
+
date=date,
|
|
173
176
|
)
|
|
174
177
|
if data_version and "/" in data_version:
|
|
175
178
|
raise ValueError(
|
|
@@ -533,17 +536,22 @@ def _process_subsets(stanza: Stanza, data, *, ontology_prefix: str, strict: bool
|
|
|
533
536
|
stanza.append_subset(reference)
|
|
534
537
|
|
|
535
538
|
|
|
539
|
+
# needed to parse OPMI
|
|
540
|
+
_BOOLEAN_TRUE_VALUES = {"true", "1", 1}
|
|
541
|
+
_BOOLEAN_FALSE_VALUES = {"false", "0", 0}
|
|
542
|
+
|
|
543
|
+
|
|
536
544
|
def _get_boolean(data: Mapping[str, Any], tag: str) -> bool | None:
|
|
537
545
|
value = data.get(tag)
|
|
538
546
|
if value is None:
|
|
539
547
|
return None
|
|
540
548
|
if isinstance(value, list):
|
|
541
549
|
value = value[0]
|
|
542
|
-
if value
|
|
550
|
+
if value in _BOOLEAN_FALSE_VALUES:
|
|
543
551
|
return False
|
|
544
|
-
if value
|
|
552
|
+
if value in _BOOLEAN_TRUE_VALUES:
|
|
545
553
|
return True
|
|
546
|
-
raise ValueError(value)
|
|
554
|
+
raise ValueError(f"unhandled value for boolean: ({type(value)}) {value}")
|
|
547
555
|
|
|
548
556
|
|
|
549
557
|
def _get_reference(
|
|
@@ -703,50 +711,6 @@ def _clean_graph_ontology(graph, prefix: str) -> None:
|
|
|
703
711
|
graph.graph["ontology"] = prefix
|
|
704
712
|
|
|
705
713
|
|
|
706
|
-
def _clean_graph_version(
|
|
707
|
-
graph, ontology_prefix: str, version: str | None, date: datetime | None
|
|
708
|
-
) -> str | None:
|
|
709
|
-
if ontology_prefix in STATIC_VERSION_REWRITES:
|
|
710
|
-
return STATIC_VERSION_REWRITES[ontology_prefix]
|
|
711
|
-
|
|
712
|
-
data_version: str | None = graph.graph.get("data-version") or None
|
|
713
|
-
if version:
|
|
714
|
-
clean_injected_version = cleanup_version(version, prefix=ontology_prefix)
|
|
715
|
-
if not data_version:
|
|
716
|
-
logger.debug(
|
|
717
|
-
"[%s] did not have a version, overriding with %s",
|
|
718
|
-
ontology_prefix,
|
|
719
|
-
clean_injected_version,
|
|
720
|
-
)
|
|
721
|
-
return clean_injected_version
|
|
722
|
-
|
|
723
|
-
clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
|
|
724
|
-
if clean_data_version != clean_injected_version:
|
|
725
|
-
# in this case, we're going to trust the one that's passed
|
|
726
|
-
# through explicitly more than the graph's content
|
|
727
|
-
logger.debug(
|
|
728
|
-
"[%s] had version %s, overriding with %s", ontology_prefix, data_version, version
|
|
729
|
-
)
|
|
730
|
-
return clean_injected_version
|
|
731
|
-
|
|
732
|
-
if data_version:
|
|
733
|
-
clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
|
|
734
|
-
logger.debug("[%s] using version %s", ontology_prefix, clean_data_version)
|
|
735
|
-
return clean_data_version
|
|
736
|
-
|
|
737
|
-
if date is not None:
|
|
738
|
-
derived_date_version = date.strftime("%Y-%m-%d")
|
|
739
|
-
logger.debug(
|
|
740
|
-
"[%s] does not report a version. falling back to date: %s",
|
|
741
|
-
ontology_prefix,
|
|
742
|
-
derived_date_version,
|
|
743
|
-
)
|
|
744
|
-
return derived_date_version
|
|
745
|
-
|
|
746
|
-
logger.debug("[%s] does not report a version nor a date", ontology_prefix)
|
|
747
|
-
return None
|
|
748
|
-
|
|
749
|
-
|
|
750
714
|
def _iter_obo_graph(
|
|
751
715
|
graph: nx.MultiDiGraph,
|
|
752
716
|
*,
|
pyobo/struct/obograph/export.py
CHANGED
|
@@ -8,11 +8,11 @@ import curies
|
|
|
8
8
|
import obographs as og
|
|
9
9
|
from curies import Converter, ReferenceTuple
|
|
10
10
|
from curies import vocabulary as v
|
|
11
|
+
from pystow.utils import safe_open
|
|
11
12
|
|
|
12
13
|
from pyobo.identifier_utils.api import get_converter
|
|
13
14
|
from pyobo.struct import Obo, OBOLiteral, Stanza, Term, TypeDef
|
|
14
15
|
from pyobo.struct import typedef as tdv
|
|
15
|
-
from pyobo.utils.io import safe_open
|
|
16
16
|
|
|
17
17
|
__all__ = [
|
|
18
18
|
"to_obograph",
|
|
@@ -25,7 +25,7 @@ def write_obograph(obo: Obo, path: str | Path, *, converter: Converter | None =
|
|
|
25
25
|
"""Write an ontology to a file as OBO Graph JSON."""
|
|
26
26
|
path = Path(path).expanduser().resolve()
|
|
27
27
|
raw_graph = to_obograph(obo, converter=converter)
|
|
28
|
-
with safe_open(path,
|
|
28
|
+
with safe_open(path, operation="write") as file:
|
|
29
29
|
file.write(raw_graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
|
|
30
30
|
|
|
31
31
|
|