pyobo 0.12.3__py3-none-any.whl → 0.12.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +6 -0
- pyobo/api/__init__.py +3 -0
- pyobo/api/embedding.py +118 -0
- pyobo/api/names.py +8 -1
- pyobo/api/utils.py +0 -10
- pyobo/cli/cli.py +1 -6
- pyobo/constants.py +23 -0
- pyobo/getters.py +52 -35
- pyobo/sources/__init__.py +14 -1
- pyobo/sources/chembl/__init__.py +6 -0
- pyobo/sources/chembl/chembl_cell.py +94 -0
- pyobo/sources/chembl/chembl_mechanism.py +81 -0
- pyobo/sources/chembl/chembl_tissue.py +70 -0
- pyobo/sources/clinicaltrials.py +32 -33
- pyobo/sources/complexportal.py +5 -1
- pyobo/sources/hgnc/hgnc.py +13 -6
- pyobo/sources/iana_media_type.py +100 -0
- pyobo/sources/mesh.py +82 -29
- pyobo/sources/reactome.py +10 -3
- pyobo/sources/spdx.py +85 -0
- pyobo/sources/uniprot/uniprot.py +2 -2
- pyobo/sources/wikipathways.py +92 -7
- pyobo/struct/__init__.py +2 -0
- pyobo/struct/functional/dsl.py +10 -1
- pyobo/struct/functional/ontology.py +3 -3
- pyobo/struct/obo/reader.py +17 -53
- pyobo/struct/obograph/export.py +2 -2
- pyobo/struct/struct.py +115 -8
- pyobo/struct/struct_utils.py +10 -0
- pyobo/struct/typedef.py +15 -3
- pyobo/struct/vocabulary.py +8 -0
- pyobo/utils/cache.py +4 -3
- pyobo/utils/io.py +18 -56
- pyobo/utils/misc.py +135 -1
- pyobo/utils/path.py +34 -2
- pyobo/version.py +1 -1
- {pyobo-0.12.3.dist-info → pyobo-0.12.5.dist-info}/METADATA +5 -5
- {pyobo-0.12.3.dist-info → pyobo-0.12.5.dist-info}/RECORD +42 -36
- {pyobo-0.12.3.dist-info → pyobo-0.12.5.dist-info}/WHEEL +0 -0
- {pyobo-0.12.3.dist-info → pyobo-0.12.5.dist-info}/entry_points.txt +0 -0
- {pyobo-0.12.3.dist-info → pyobo-0.12.5.dist-info}/licenses/LICENSE +0 -0
pyobo/sources/spdx.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Convert SPDX to an ontology."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from pydantic import ValidationError
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
|
|
9
|
+
from pyobo.struct import Obo, Reference, Term, TypeDef
|
|
10
|
+
from pyobo.struct.typedef import see_also
|
|
11
|
+
from pyobo.struct.vocabulary import xsd_boolean
|
|
12
|
+
from pyobo.utils.path import ensure_json
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"SPDXLicenseGetter",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
DATA_URL = "https://github.com/spdx/license-list-data/raw/refs/heads/main/json/licenses.json"
|
|
19
|
+
LICENSE_PREFIX = "spdx"
|
|
20
|
+
TERM_PREFIX = "spdx.term"
|
|
21
|
+
|
|
22
|
+
ROOT = Term.from_triple(TERM_PREFIX, "ListedLicense", "listed license")
|
|
23
|
+
IS_OSI = TypeDef(
|
|
24
|
+
reference=Reference(prefix=TERM_PREFIX, identifier="isOsiApproved", name="is OSI approved"),
|
|
25
|
+
is_metadata_tag=True,
|
|
26
|
+
domain=ROOT.reference,
|
|
27
|
+
range=xsd_boolean,
|
|
28
|
+
)
|
|
29
|
+
IS_FSF = TypeDef(
|
|
30
|
+
reference=Reference(prefix=TERM_PREFIX, identifier="isFsfLibre", name="is FSF Libre"),
|
|
31
|
+
is_metadata_tag=True,
|
|
32
|
+
domain=ROOT.reference,
|
|
33
|
+
range=xsd_boolean,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_terms(version: str) -> Iterable[Term]:
|
|
38
|
+
"""Iterate over terms."""
|
|
39
|
+
yield ROOT
|
|
40
|
+
data = ensure_json(
|
|
41
|
+
LICENSE_PREFIX,
|
|
42
|
+
url=DATA_URL,
|
|
43
|
+
version=version,
|
|
44
|
+
)
|
|
45
|
+
for record in data["licenses"]:
|
|
46
|
+
if term := _get_term(record):
|
|
47
|
+
yield term
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _get_term(record: dict[str, Any]) -> Term | None:
|
|
51
|
+
try:
|
|
52
|
+
reference = Reference(
|
|
53
|
+
prefix=LICENSE_PREFIX, identifier=record["licenseId"], name=record["name"]
|
|
54
|
+
)
|
|
55
|
+
except ValidationError:
|
|
56
|
+
tqdm.write(f"invalid: {record['licenseId']}")
|
|
57
|
+
return None
|
|
58
|
+
term = Term(
|
|
59
|
+
reference=reference,
|
|
60
|
+
is_obsolete=True if record.get("isDeprecatedLicenseId") else None,
|
|
61
|
+
# type="Instance",
|
|
62
|
+
).append_parent(ROOT)
|
|
63
|
+
if record.get("isOsiApproved"):
|
|
64
|
+
term.annotate_boolean(IS_OSI, True)
|
|
65
|
+
if record.get("isFsfLibre"):
|
|
66
|
+
term.annotate_boolean(IS_FSF, True)
|
|
67
|
+
for uri in record.get("seeAlso", []):
|
|
68
|
+
term.annotate_uri(see_also, uri)
|
|
69
|
+
return term
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class SPDXLicenseGetter(Obo):
|
|
73
|
+
"""An ontology representation of the SPDX Licenses."""
|
|
74
|
+
|
|
75
|
+
bioversions_key = ontology = LICENSE_PREFIX
|
|
76
|
+
typedefs = [see_also, IS_FSF, IS_OSI]
|
|
77
|
+
root_terms = [ROOT.reference]
|
|
78
|
+
|
|
79
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
80
|
+
"""Iterate over terms in the ontology."""
|
|
81
|
+
return get_terms(version=self._version_or_raise)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
SPDXLicenseGetter.cli(["--obo", "--owl", "--rewrite"])
|
pyobo/sources/uniprot/uniprot.py
CHANGED
|
@@ -4,6 +4,7 @@ from collections.abc import Iterable
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import cast
|
|
6
6
|
|
|
7
|
+
from pystow.utils import safe_open_reader
|
|
7
8
|
from tqdm.auto import tqdm
|
|
8
9
|
|
|
9
10
|
from pyobo import Obo, Reference
|
|
@@ -22,7 +23,6 @@ from pyobo.struct import (
|
|
|
22
23
|
participates_in,
|
|
23
24
|
)
|
|
24
25
|
from pyobo.struct.typedef import gene_product_of, located_in, molecularly_interacts_with
|
|
25
|
-
from pyobo.utils.io import open_reader
|
|
26
26
|
|
|
27
27
|
PREFIX = "uniprot"
|
|
28
28
|
BASE_URL = "https://rest.uniprot.org/uniprotkb/stream"
|
|
@@ -78,7 +78,7 @@ class UniProtGetter(Obo):
|
|
|
78
78
|
|
|
79
79
|
def iter_terms(version: str | None = None) -> Iterable[Term]:
|
|
80
80
|
"""Iterate over UniProt Terms."""
|
|
81
|
-
with
|
|
81
|
+
with safe_open_reader(ensure(version=version)) as reader:
|
|
82
82
|
_ = next(reader) # header
|
|
83
83
|
for (
|
|
84
84
|
uniprot_id,
|
pyobo/sources/wikipathways.py
CHANGED
|
@@ -3,13 +3,14 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
from collections.abc import Iterable
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
import pystow
|
|
7
|
+
from pystow.utils import DownloadError, read_zipfile_rdf
|
|
7
8
|
from tqdm import tqdm
|
|
8
9
|
|
|
9
10
|
from .gmt_utils import parse_wikipathways_gmt
|
|
10
11
|
from ..constants import SPECIES_REMAPPING
|
|
11
12
|
from ..struct import Obo, Reference, Term, from_species
|
|
12
|
-
from ..struct.typedef import has_participant
|
|
13
|
+
from ..struct.typedef import contributes_to_condition, has_depiction, has_participant, located_in
|
|
13
14
|
from ..utils.path import ensure_path
|
|
14
15
|
|
|
15
16
|
__all__ = [
|
|
@@ -20,6 +21,7 @@ logger = logging.getLogger(__name__)
|
|
|
20
21
|
|
|
21
22
|
PREFIX = "wikipathways"
|
|
22
23
|
|
|
24
|
+
ROOT = Reference(prefix="pw", identifier="0000001", name="pathway")
|
|
23
25
|
_PATHWAY_INFO = [
|
|
24
26
|
("Anopheles_gambiae", "7165"),
|
|
25
27
|
("Arabidopsis_thaliana", "3702"),
|
|
@@ -46,17 +48,27 @@ class WikiPathwaysGetter(Obo):
|
|
|
46
48
|
"""An ontology representation of WikiPathways' pathway database."""
|
|
47
49
|
|
|
48
50
|
ontology = bioversions_key = PREFIX
|
|
49
|
-
typedefs = [from_species, has_participant]
|
|
51
|
+
typedefs = [from_species, has_participant, contributes_to_condition, located_in, has_depiction]
|
|
52
|
+
root_terms = [ROOT]
|
|
50
53
|
|
|
51
54
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
52
55
|
"""Iterate over terms in the ontology."""
|
|
53
|
-
|
|
56
|
+
yield Term(reference=ROOT)
|
|
57
|
+
yield from iter_terms(version=self._version_or_raise)
|
|
54
58
|
|
|
55
59
|
|
|
56
|
-
|
|
60
|
+
PW_PREFIX = "http://purl.obolibrary.org/obo/PW_"
|
|
61
|
+
DOID_PREFIX = "http://purl.obolibrary.org/obo/DOID_"
|
|
62
|
+
CL_PREFIX = "http://purl.obolibrary.org/obo/CL_"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def iter_terms(version: str, *, include_descriptions: bool = False) -> Iterable[Term]:
|
|
57
66
|
"""Get WikiPathways terms."""
|
|
58
|
-
|
|
67
|
+
archive_url = f"https://data.wikipathways.org/current/rdf/wikipathways-{version}-rdf-wp.zip"
|
|
68
|
+
archive = pystow.ensure(PREFIX, url=archive_url, version=version)
|
|
59
69
|
|
|
70
|
+
base_url = f"http://data.wikipathways.org/{version}/gmt/wikipathways-{version}-gmt"
|
|
71
|
+
pw_references = set()
|
|
60
72
|
for species_code, taxonomy_id in tqdm(_PATHWAY_INFO, desc=f"[{PREFIX}]", unit="species"):
|
|
61
73
|
url = f"{base_url}-{species_code}.gmt"
|
|
62
74
|
try:
|
|
@@ -68,15 +80,88 @@ def iter_terms(version: str) -> Iterable[Term]:
|
|
|
68
80
|
taxonomy_name = SPECIES_REMAPPING.get(species_code, species_code)
|
|
69
81
|
|
|
70
82
|
for identifier, _version, _revision, name, _species, genes in parse_wikipathways_gmt(path):
|
|
71
|
-
|
|
83
|
+
graph = read_zipfile_rdf(archive, inner_path=f"wp/{identifier}.ttl")
|
|
84
|
+
uri = f"https://identifiers.org/wikipathways/{identifier}"
|
|
85
|
+
|
|
86
|
+
definition: str | None = None
|
|
87
|
+
if include_descriptions:
|
|
88
|
+
# TODO deal with weird characters breaking OFN
|
|
89
|
+
description_results = list(
|
|
90
|
+
graph.query(
|
|
91
|
+
f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/dcterms:description ?p }} LIMIT 1"
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
if description_results:
|
|
95
|
+
definition = str(description_results[0][0]) # type:ignore[index]
|
|
96
|
+
|
|
97
|
+
term = Term(
|
|
98
|
+
reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
|
|
99
|
+
definition=definition,
|
|
100
|
+
)
|
|
72
101
|
term.set_species(taxonomy_id, taxonomy_name)
|
|
102
|
+
term.annotate_uri(
|
|
103
|
+
has_depiction,
|
|
104
|
+
f"https://www.wikipathways.org/wikipathways-assets/pathways/{identifier}/{identifier}.svg",
|
|
105
|
+
)
|
|
73
106
|
for ncbigene_id in genes:
|
|
74
107
|
term.annotate_object(
|
|
75
108
|
has_participant,
|
|
76
109
|
Reference(prefix="ncbigene", identifier=ncbigene_id),
|
|
77
110
|
)
|
|
111
|
+
# TODO switch query over to including chemicals from RDF SPARQL query
|
|
112
|
+
# TODO get description from SPARQL
|
|
113
|
+
parents = [ # type:ignore[misc]
|
|
114
|
+
p
|
|
115
|
+
for (p,) in graph.query(
|
|
116
|
+
f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/wp:pathwayOntologyTag ?p }}"
|
|
117
|
+
)
|
|
118
|
+
]
|
|
119
|
+
for parent in parents:
|
|
120
|
+
if parent.startswith(PW_PREFIX):
|
|
121
|
+
ref = Reference(prefix="pw", identifier=parent.removeprefix(PW_PREFIX))
|
|
122
|
+
pw_references.add(ref)
|
|
123
|
+
term.append_parent(ref)
|
|
124
|
+
if not parents:
|
|
125
|
+
tqdm.write(f"[{term.curie}] could not find parent")
|
|
126
|
+
term.append_parent(ROOT)
|
|
127
|
+
|
|
128
|
+
diseases = graph.query(
|
|
129
|
+
f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/wp:diseaseOntologyTag ?p }}"
|
|
130
|
+
)
|
|
131
|
+
for (disease,) in diseases: # type:ignore[misc]
|
|
132
|
+
if disease.startswith(DOID_PREFIX):
|
|
133
|
+
term.annotate_object(
|
|
134
|
+
contributes_to_condition,
|
|
135
|
+
Reference(prefix="doid", identifier=disease.removeprefix(DOID_PREFIX)),
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
cells = graph.query(
|
|
139
|
+
f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/wp:cellTypeOntologyTag ?p }}"
|
|
140
|
+
)
|
|
141
|
+
for (cell,) in cells: # type:ignore[misc]
|
|
142
|
+
if cell.startswith(CL_PREFIX):
|
|
143
|
+
term.annotate_object(
|
|
144
|
+
located_in,
|
|
145
|
+
Reference(prefix="cl", identifier=cell.removeprefix(CL_PREFIX)),
|
|
146
|
+
)
|
|
147
|
+
|
|
78
148
|
yield term
|
|
79
149
|
|
|
150
|
+
from ..api import get_ancestors
|
|
151
|
+
from ..getters import get_ontology
|
|
152
|
+
|
|
153
|
+
for pw_reference in list(pw_references):
|
|
154
|
+
pw_references.update(get_ancestors(pw_reference) or set())
|
|
155
|
+
|
|
156
|
+
for pw_term in get_ontology("pw"):
|
|
157
|
+
if pw_term.reference in pw_references:
|
|
158
|
+
yield Term(
|
|
159
|
+
reference=pw_term.reference,
|
|
160
|
+
definition=pw_term.definition,
|
|
161
|
+
# PW has issues in hierarchy - there are lots of leaves with no root
|
|
162
|
+
parents=pw_term.parents or [ROOT],
|
|
163
|
+
)
|
|
164
|
+
|
|
80
165
|
|
|
81
166
|
if __name__ == "__main__":
|
|
82
167
|
WikiPathwaysGetter.cli()
|
pyobo/struct/__init__.py
CHANGED
|
@@ -16,6 +16,7 @@ from .struct import (
|
|
|
16
16
|
SynonymTypeDef,
|
|
17
17
|
Term,
|
|
18
18
|
TypeDef,
|
|
19
|
+
build_ontology,
|
|
19
20
|
make_ad_hoc_ontology,
|
|
20
21
|
)
|
|
21
22
|
from .struct_utils import Annotation, Stanza, StanzaType
|
|
@@ -57,6 +58,7 @@ __all__ = [
|
|
|
57
58
|
"Term",
|
|
58
59
|
"TypeDef",
|
|
59
60
|
"_parse_str_or_curie_or_uri",
|
|
61
|
+
"build_ontology",
|
|
60
62
|
"default_reference",
|
|
61
63
|
"derives_from",
|
|
62
64
|
"enables",
|
pyobo/struct/functional/dsl.py
CHANGED
|
@@ -211,7 +211,16 @@ class LiteralBox(Box):
|
|
|
211
211
|
|
|
212
212
|
def to_funowl(self) -> str:
|
|
213
213
|
"""Represent this literal for functional OWL."""
|
|
214
|
-
|
|
214
|
+
rv = self.literal.n3(self._namespace_manager)
|
|
215
|
+
# it appears that the OFN format doesn't use triple quotes
|
|
216
|
+
if rv.startswith('"""') and rv.endswith('"""^^xsd:string'):
|
|
217
|
+
# strip them off
|
|
218
|
+
rv = rv.removeprefix('"""').removesuffix('"""^^xsd:string')
|
|
219
|
+
# escape quotes
|
|
220
|
+
rv = rv.replace('"', '\\"')
|
|
221
|
+
# stick back quotes and xsd tag
|
|
222
|
+
rv = '"' + rv + '"^^xsd:string'
|
|
223
|
+
return rv
|
|
215
224
|
|
|
216
225
|
def to_funowl_args(self) -> str: # pragma: no cover
|
|
217
226
|
"""Get the inside of the functional OWL tag representing the literal (unused)."""
|
|
@@ -8,6 +8,7 @@ from collections.abc import Sequence
|
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
|
|
10
10
|
from curies import Converter
|
|
11
|
+
from pystow.utils import safe_open
|
|
11
12
|
from rdflib import OWL, RDF, Graph, term
|
|
12
13
|
|
|
13
14
|
from pyobo.struct.functional.dsl import Annotation, Annotations, Axiom, Box
|
|
@@ -16,7 +17,6 @@ from pyobo.struct.functional.utils import (
|
|
|
16
17
|
FunctionalOWLSerializable,
|
|
17
18
|
list_to_funowl,
|
|
18
19
|
)
|
|
19
|
-
from pyobo.utils.io import safe_open
|
|
20
20
|
|
|
21
21
|
__all__ = [
|
|
22
22
|
"Document",
|
|
@@ -108,9 +108,9 @@ class Document:
|
|
|
108
108
|
return graph
|
|
109
109
|
|
|
110
110
|
def write_funowl(self, path: str | Path) -> None:
|
|
111
|
-
"""Write functional OWL to a file
|
|
111
|
+
"""Write functional OWL to a file."""
|
|
112
112
|
path = Path(path).expanduser().resolve()
|
|
113
|
-
with safe_open(path,
|
|
113
|
+
with safe_open(path, operation="write") as file:
|
|
114
114
|
file.write(self.to_funowl())
|
|
115
115
|
|
|
116
116
|
def to_funowl(self) -> str:
|
pyobo/struct/obo/reader.py
CHANGED
|
@@ -18,6 +18,7 @@ from curies import ReferenceTuple
|
|
|
18
18
|
from curies.preprocessing import BlocklistError
|
|
19
19
|
from curies.vocabulary import SynonymScope
|
|
20
20
|
from more_itertools import pairwise
|
|
21
|
+
from pystow.utils import safe_open
|
|
21
22
|
from tqdm.auto import tqdm
|
|
22
23
|
|
|
23
24
|
from .reader_utils import (
|
|
@@ -52,8 +53,7 @@ from ...identifier_utils import (
|
|
|
52
53
|
get_rules,
|
|
53
54
|
)
|
|
54
55
|
from ...utils.cache import write_gzipped_graph
|
|
55
|
-
from ...utils.
|
|
56
|
-
from ...utils.misc import STATIC_VERSION_REWRITES, cleanup_version
|
|
56
|
+
from ...utils.misc import _prioritize_version
|
|
57
57
|
|
|
58
58
|
__all__ = [
|
|
59
59
|
"from_obo_path",
|
|
@@ -90,7 +90,7 @@ def from_obo_path(
|
|
|
90
90
|
)
|
|
91
91
|
else:
|
|
92
92
|
logger.info("[%s] parsing OBO with obonet from %s", prefix or "<unknown>", path)
|
|
93
|
-
with safe_open(path, read
|
|
93
|
+
with safe_open(path, operation="read") as file:
|
|
94
94
|
graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
|
|
95
95
|
|
|
96
96
|
if prefix:
|
|
@@ -157,7 +157,7 @@ def from_obonet(
|
|
|
157
157
|
upgrade: bool = True,
|
|
158
158
|
use_tqdm: bool = False,
|
|
159
159
|
) -> Obo:
|
|
160
|
-
"""Get all
|
|
160
|
+
"""Get all the terms from a OBO graph."""
|
|
161
161
|
ontology_prefix_raw = graph.graph["ontology"]
|
|
162
162
|
ontology_prefix = _normalize_prefix_strict(ontology_prefix_raw)
|
|
163
163
|
logger.info("[%s] extracting OBO using obonet", ontology_prefix)
|
|
@@ -168,8 +168,11 @@ def from_obonet(
|
|
|
168
168
|
|
|
169
169
|
macro_config = MacroConfig(graph.graph, strict=strict, ontology_prefix=ontology_prefix)
|
|
170
170
|
|
|
171
|
-
data_version =
|
|
172
|
-
graph
|
|
171
|
+
data_version = _prioritize_version(
|
|
172
|
+
data_version=graph.graph.get("data-version") or None,
|
|
173
|
+
ontology_prefix=ontology_prefix,
|
|
174
|
+
version=version,
|
|
175
|
+
date=date,
|
|
173
176
|
)
|
|
174
177
|
if data_version and "/" in data_version:
|
|
175
178
|
raise ValueError(
|
|
@@ -533,17 +536,22 @@ def _process_subsets(stanza: Stanza, data, *, ontology_prefix: str, strict: bool
|
|
|
533
536
|
stanza.append_subset(reference)
|
|
534
537
|
|
|
535
538
|
|
|
539
|
+
# needed to parse OPMI
|
|
540
|
+
_BOOLEAN_TRUE_VALUES = {"true", "1", 1}
|
|
541
|
+
_BOOLEAN_FALSE_VALUES = {"false", "0", 0}
|
|
542
|
+
|
|
543
|
+
|
|
536
544
|
def _get_boolean(data: Mapping[str, Any], tag: str) -> bool | None:
|
|
537
545
|
value = data.get(tag)
|
|
538
546
|
if value is None:
|
|
539
547
|
return None
|
|
540
548
|
if isinstance(value, list):
|
|
541
549
|
value = value[0]
|
|
542
|
-
if value
|
|
550
|
+
if value in _BOOLEAN_FALSE_VALUES:
|
|
543
551
|
return False
|
|
544
|
-
if value
|
|
552
|
+
if value in _BOOLEAN_TRUE_VALUES:
|
|
545
553
|
return True
|
|
546
|
-
raise ValueError(value)
|
|
554
|
+
raise ValueError(f"unhandled value for boolean: ({type(value)}) {value}")
|
|
547
555
|
|
|
548
556
|
|
|
549
557
|
def _get_reference(
|
|
@@ -703,50 +711,6 @@ def _clean_graph_ontology(graph, prefix: str) -> None:
|
|
|
703
711
|
graph.graph["ontology"] = prefix
|
|
704
712
|
|
|
705
713
|
|
|
706
|
-
def _clean_graph_version(
|
|
707
|
-
graph, ontology_prefix: str, version: str | None, date: datetime | None
|
|
708
|
-
) -> str | None:
|
|
709
|
-
if ontology_prefix in STATIC_VERSION_REWRITES:
|
|
710
|
-
return STATIC_VERSION_REWRITES[ontology_prefix]
|
|
711
|
-
|
|
712
|
-
data_version: str | None = graph.graph.get("data-version") or None
|
|
713
|
-
if version:
|
|
714
|
-
clean_injected_version = cleanup_version(version, prefix=ontology_prefix)
|
|
715
|
-
if not data_version:
|
|
716
|
-
logger.debug(
|
|
717
|
-
"[%s] did not have a version, overriding with %s",
|
|
718
|
-
ontology_prefix,
|
|
719
|
-
clean_injected_version,
|
|
720
|
-
)
|
|
721
|
-
return clean_injected_version
|
|
722
|
-
|
|
723
|
-
clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
|
|
724
|
-
if clean_data_version != clean_injected_version:
|
|
725
|
-
# in this case, we're going to trust the one that's passed
|
|
726
|
-
# through explicitly more than the graph's content
|
|
727
|
-
logger.debug(
|
|
728
|
-
"[%s] had version %s, overriding with %s", ontology_prefix, data_version, version
|
|
729
|
-
)
|
|
730
|
-
return clean_injected_version
|
|
731
|
-
|
|
732
|
-
if data_version:
|
|
733
|
-
clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
|
|
734
|
-
logger.debug("[%s] using version %s", ontology_prefix, clean_data_version)
|
|
735
|
-
return clean_data_version
|
|
736
|
-
|
|
737
|
-
if date is not None:
|
|
738
|
-
derived_date_version = date.strftime("%Y-%m-%d")
|
|
739
|
-
logger.debug(
|
|
740
|
-
"[%s] does not report a version. falling back to date: %s",
|
|
741
|
-
ontology_prefix,
|
|
742
|
-
derived_date_version,
|
|
743
|
-
)
|
|
744
|
-
return derived_date_version
|
|
745
|
-
|
|
746
|
-
logger.debug("[%s] does not report a version nor a date", ontology_prefix)
|
|
747
|
-
return None
|
|
748
|
-
|
|
749
|
-
|
|
750
714
|
def _iter_obo_graph(
|
|
751
715
|
graph: nx.MultiDiGraph,
|
|
752
716
|
*,
|
pyobo/struct/obograph/export.py
CHANGED
|
@@ -8,11 +8,11 @@ import curies
|
|
|
8
8
|
import obographs as og
|
|
9
9
|
from curies import Converter, ReferenceTuple
|
|
10
10
|
from curies import vocabulary as v
|
|
11
|
+
from pystow.utils import safe_open
|
|
11
12
|
|
|
12
13
|
from pyobo.identifier_utils.api import get_converter
|
|
13
14
|
from pyobo.struct import Obo, OBOLiteral, Stanza, Term, TypeDef
|
|
14
15
|
from pyobo.struct import typedef as tdv
|
|
15
|
-
from pyobo.utils.io import safe_open
|
|
16
16
|
|
|
17
17
|
__all__ = [
|
|
18
18
|
"to_obograph",
|
|
@@ -25,7 +25,7 @@ def write_obograph(obo: Obo, path: str | Path, *, converter: Converter | None =
|
|
|
25
25
|
"""Write an ontology to a file as OBO Graph JSON."""
|
|
26
26
|
path = Path(path).expanduser().resolve()
|
|
27
27
|
raw_graph = to_obograph(obo, converter=converter)
|
|
28
|
-
with safe_open(path,
|
|
28
|
+
with safe_open(path, operation="write") as file:
|
|
29
29
|
file.write(raw_graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
|
|
30
30
|
|
|
31
31
|
|
pyobo/struct/struct.py
CHANGED
|
@@ -25,6 +25,7 @@ import ssslm
|
|
|
25
25
|
from curies import Converter, ReferenceTuple
|
|
26
26
|
from curies import vocabulary as _cv
|
|
27
27
|
from more_click import force_option, verbose_option
|
|
28
|
+
from pystow.utils import safe_open
|
|
28
29
|
from tqdm.auto import tqdm
|
|
29
30
|
from typing_extensions import Self
|
|
30
31
|
|
|
@@ -70,7 +71,7 @@ from ..constants import (
|
|
|
70
71
|
TARGET_PREFIX,
|
|
71
72
|
)
|
|
72
73
|
from ..utils.cache import write_gzipped_graph
|
|
73
|
-
from ..utils.io import multidict,
|
|
74
|
+
from ..utils.io import multidict, write_iterable_tsv
|
|
74
75
|
from ..utils.path import (
|
|
75
76
|
CacheArtifact,
|
|
76
77
|
get_cache_path,
|
|
@@ -87,6 +88,7 @@ __all__ = [
|
|
|
87
88
|
"TypeDef",
|
|
88
89
|
"abbreviation",
|
|
89
90
|
"acronym",
|
|
91
|
+
"build_ontology",
|
|
90
92
|
"make_ad_hoc_ontology",
|
|
91
93
|
]
|
|
92
94
|
|
|
@@ -746,13 +748,23 @@ class Obo:
|
|
|
746
748
|
help="Re-process the data, but don't download it again.",
|
|
747
749
|
)
|
|
748
750
|
@click.option("--owl", is_flag=True, help="Write OWL via ROBOT")
|
|
751
|
+
@click.option("--obo", is_flag=True, help="Write OBO")
|
|
749
752
|
@click.option("--ofn", is_flag=True, help="Write Functional OWL (OFN)")
|
|
750
753
|
@click.option("--ttl", is_flag=True, help="Write turtle RDF via OFN")
|
|
754
|
+
@click.option("--cache/--no-cache", is_flag=True, help="Write the cache", default=True)
|
|
751
755
|
@click.option(
|
|
752
756
|
"--version", help="Specify data version to get. Use this if bioversions is acting up."
|
|
753
757
|
)
|
|
754
|
-
def _main(
|
|
755
|
-
|
|
758
|
+
def _main(
|
|
759
|
+
force: bool,
|
|
760
|
+
obo: bool,
|
|
761
|
+
owl: bool,
|
|
762
|
+
ofn: bool,
|
|
763
|
+
ttl: bool,
|
|
764
|
+
version: str | None,
|
|
765
|
+
rewrite: bool,
|
|
766
|
+
cache: bool,
|
|
767
|
+
) -> None:
|
|
756
768
|
try:
|
|
757
769
|
inst = cls(force=force, data_version=version)
|
|
758
770
|
except Exception as e:
|
|
@@ -760,13 +772,14 @@ class Obo:
|
|
|
760
772
|
sys.exit(1)
|
|
761
773
|
inst.write_default(
|
|
762
774
|
write_obograph=False,
|
|
763
|
-
write_obo=
|
|
775
|
+
write_obo=obo,
|
|
764
776
|
write_owl=owl,
|
|
765
777
|
write_ofn=ofn,
|
|
766
778
|
write_ttl=ttl,
|
|
767
779
|
write_nodes=True,
|
|
768
780
|
force=force or rewrite,
|
|
769
781
|
use_tqdm=True,
|
|
782
|
+
write_cache=cache,
|
|
770
783
|
)
|
|
771
784
|
|
|
772
785
|
return _main
|
|
@@ -909,6 +922,8 @@ class Obo:
|
|
|
909
922
|
end = f'"{obo_escape_slim(value.value)}" {reference_escape(value.datatype, ontology_prefix=self.ontology)}'
|
|
910
923
|
case Reference():
|
|
911
924
|
end = reference_escape(value, ontology_prefix=self.ontology)
|
|
925
|
+
case _:
|
|
926
|
+
raise TypeError(f"Invalid property value: {value}")
|
|
912
927
|
yield f"property_value: {reference_escape(predicate, ontology_prefix=self.ontology)} {end}"
|
|
913
928
|
|
|
914
929
|
def _iterate_property_pairs(self) -> Iterable[Annotation]:
|
|
@@ -925,10 +940,21 @@ class Obo:
|
|
|
925
940
|
license_literal = OBOLiteral.string(license_spdx_id)
|
|
926
941
|
yield Annotation(v.has_license, license_literal)
|
|
927
942
|
|
|
928
|
-
# Description
|
|
929
943
|
if description := bioregistry.get_description(self.ontology):
|
|
930
|
-
description = obo_escape_slim(description.strip())
|
|
931
944
|
yield Annotation(v.has_description, OBOLiteral.string(description.strip()))
|
|
945
|
+
if homepage := bioregistry.get_homepage(self.ontology):
|
|
946
|
+
yield Annotation(v.has_homepage, OBOLiteral.uri(homepage))
|
|
947
|
+
if repository := bioregistry.get_repository(self.ontology):
|
|
948
|
+
yield Annotation(v.has_repository, OBOLiteral.uri(repository))
|
|
949
|
+
if logo := bioregistry.get_logo(self.ontology):
|
|
950
|
+
yield Annotation(v.has_logo, OBOLiteral.uri(logo))
|
|
951
|
+
if mailing_list := bioregistry.get_mailing_list(self.ontology):
|
|
952
|
+
yield Annotation(v.has_mailing_list, OBOLiteral.string(mailing_list))
|
|
953
|
+
if (maintainer := bioregistry.get_contact(self.ontology)) and maintainer.orcid:
|
|
954
|
+
yield Annotation(
|
|
955
|
+
v.has_maintainer,
|
|
956
|
+
Reference(prefix="orcid", identifier=maintainer.orcid, name=maintainer.name),
|
|
957
|
+
)
|
|
932
958
|
|
|
933
959
|
# Root terms
|
|
934
960
|
for root_term in self.root_terms or []:
|
|
@@ -973,7 +999,7 @@ class Obo:
|
|
|
973
999
|
unit="line",
|
|
974
1000
|
)
|
|
975
1001
|
if isinstance(file, str | Path | os.PathLike):
|
|
976
|
-
with safe_open(file,
|
|
1002
|
+
with safe_open(file, operation="write") as fh:
|
|
977
1003
|
self._write_lines(it, fh)
|
|
978
1004
|
else:
|
|
979
1005
|
self._write_lines(it, file)
|
|
@@ -1149,7 +1175,7 @@ class Obo:
|
|
|
1149
1175
|
metadata = self.get_metadata()
|
|
1150
1176
|
for path in (self._root_metadata_path, self._get_cache_path(CacheArtifact.metadata)):
|
|
1151
1177
|
logger.debug("[%s] caching metadata to %s", self._prefix_version, path)
|
|
1152
|
-
with safe_open(path,
|
|
1178
|
+
with safe_open(path, operation="write") as file:
|
|
1153
1179
|
json.dump(metadata, file, indent=2)
|
|
1154
1180
|
|
|
1155
1181
|
def write_prefix_map(self) -> None:
|
|
@@ -2265,6 +2291,87 @@ class AdHocOntologyBase(Obo):
|
|
|
2265
2291
|
"""A base class for ad-hoc ontologies."""
|
|
2266
2292
|
|
|
2267
2293
|
|
|
2294
|
+
def build_ontology(
|
|
2295
|
+
prefix: str,
|
|
2296
|
+
*,
|
|
2297
|
+
terms: list[Term] | None = None,
|
|
2298
|
+
synonym_typedefs: list[SynonymTypeDef] | None = None,
|
|
2299
|
+
typedefs: list[TypeDef] | None = None,
|
|
2300
|
+
name: str | None = None, # inferred
|
|
2301
|
+
version: str | None = None,
|
|
2302
|
+
idspaces: dict[str, str] | None = None,
|
|
2303
|
+
root_terms: list[Reference] | None = None,
|
|
2304
|
+
subsetdefs: list[tuple[Reference, str]] | None = None,
|
|
2305
|
+
properties: list[Annotation] | None = None,
|
|
2306
|
+
imports: list[str] | None = None,
|
|
2307
|
+
description: str | None = None,
|
|
2308
|
+
homepage: str | None = None,
|
|
2309
|
+
mailing_list: str | None = None,
|
|
2310
|
+
logo: str | None = None,
|
|
2311
|
+
repository: str | None = None,
|
|
2312
|
+
) -> Obo:
|
|
2313
|
+
"""Build an ontology from parts."""
|
|
2314
|
+
if name is None:
|
|
2315
|
+
name = bioregistry.get_name(prefix)
|
|
2316
|
+
# TODO auto-populate license and other properties
|
|
2317
|
+
|
|
2318
|
+
if properties is None:
|
|
2319
|
+
properties = []
|
|
2320
|
+
if typedefs is None:
|
|
2321
|
+
typedefs = []
|
|
2322
|
+
|
|
2323
|
+
if description:
|
|
2324
|
+
from .typedef import has_description
|
|
2325
|
+
|
|
2326
|
+
properties.append(Annotation.string(has_description.reference, description))
|
|
2327
|
+
if has_description not in typedefs:
|
|
2328
|
+
typedefs.append(has_description) # TODO get proper typedef
|
|
2329
|
+
|
|
2330
|
+
if homepage:
|
|
2331
|
+
from .typedef import has_homepage
|
|
2332
|
+
|
|
2333
|
+
properties.append(Annotation.uri(has_homepage.reference, homepage))
|
|
2334
|
+
if has_homepage not in typedefs:
|
|
2335
|
+
typedefs.append(has_homepage)
|
|
2336
|
+
|
|
2337
|
+
if logo:
|
|
2338
|
+
from .typedef import has_depiction
|
|
2339
|
+
|
|
2340
|
+
properties.append(Annotation.uri(has_depiction.reference, logo))
|
|
2341
|
+
if has_depiction not in typedefs:
|
|
2342
|
+
typedefs.append(has_depiction)
|
|
2343
|
+
|
|
2344
|
+
if mailing_list:
|
|
2345
|
+
from .typedef import has_mailing_list
|
|
2346
|
+
|
|
2347
|
+
properties.append(Annotation.string(has_mailing_list.reference, mailing_list))
|
|
2348
|
+
if has_mailing_list not in typedefs:
|
|
2349
|
+
typedefs.append(has_mailing_list)
|
|
2350
|
+
|
|
2351
|
+
if repository:
|
|
2352
|
+
from .typedef import has_repository
|
|
2353
|
+
|
|
2354
|
+
properties.append(Annotation.uri(has_repository.reference, repository))
|
|
2355
|
+
if has_repository not in typedefs:
|
|
2356
|
+
typedefs.append(has_repository)
|
|
2357
|
+
|
|
2358
|
+
return make_ad_hoc_ontology(
|
|
2359
|
+
_ontology=prefix,
|
|
2360
|
+
_name=name,
|
|
2361
|
+
# _auto_generated_by
|
|
2362
|
+
_typedefs=typedefs,
|
|
2363
|
+
_synonym_typedefs=synonym_typedefs,
|
|
2364
|
+
# _date: datetime.datetime | None = None,
|
|
2365
|
+
_data_version=version,
|
|
2366
|
+
_idspaces=idspaces,
|
|
2367
|
+
_root_terms=root_terms,
|
|
2368
|
+
_subsetdefs=subsetdefs,
|
|
2369
|
+
_property_values=properties,
|
|
2370
|
+
_imports=imports,
|
|
2371
|
+
terms=terms,
|
|
2372
|
+
)
|
|
2373
|
+
|
|
2374
|
+
|
|
2268
2375
|
def make_ad_hoc_ontology(
|
|
2269
2376
|
_ontology: str,
|
|
2270
2377
|
_name: str | None = None,
|
pyobo/struct/struct_utils.py
CHANGED
|
@@ -63,6 +63,16 @@ class Annotation(NamedTuple):
|
|
|
63
63
|
"""Return a literal property for a float."""
|
|
64
64
|
return cls(predicate, OBOLiteral.float(value))
|
|
65
65
|
|
|
66
|
+
@classmethod
|
|
67
|
+
def uri(cls, predicate: Reference, uri: str) -> Self:
|
|
68
|
+
"""Return a literal property for a URI."""
|
|
69
|
+
return cls(predicate, OBOLiteral.uri(uri))
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def string(cls, predicate: Reference, value: str, *, language: str | None = None) -> Self:
|
|
73
|
+
"""Return a literal property for a float."""
|
|
74
|
+
return cls(predicate, OBOLiteral.string(value, language=language))
|
|
75
|
+
|
|
66
76
|
@staticmethod
|
|
67
77
|
def _sort_key(x: Annotation):
|
|
68
78
|
return x.predicate, _reference_or_literal_key(x.value)
|