pyobo 0.12.2__py3-none-any.whl → 0.12.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +12 -4
- pyobo/api/names.py +8 -1
- pyobo/getters.py +10 -3
- pyobo/identifier_utils/__init__.py +2 -0
- pyobo/identifier_utils/api.py +3 -2
- pyobo/sources/__init__.py +2 -0
- pyobo/sources/drugbank/drugbank.py +1 -1
- pyobo/sources/gwascentral/gwascentral_study.py +1 -1
- pyobo/sources/intact.py +79 -0
- pyobo/struct/__init__.py +2 -1
- pyobo/struct/functional/ontology.py +2 -2
- pyobo/struct/obo/__init__.py +9 -0
- pyobo/{reader.py → struct/obo/reader.py} +21 -18
- pyobo/struct/obograph/__init__.py +16 -0
- pyobo/struct/obograph/export.py +315 -0
- pyobo/struct/obograph/reader.py +242 -0
- pyobo/struct/obograph/utils.py +47 -0
- pyobo/struct/struct.py +13 -23
- pyobo/struct/struct_utils.py +22 -14
- pyobo/struct/typedef.py +4 -0
- pyobo/struct/vocabulary.py +7 -0
- pyobo/version.py +1 -1
- {pyobo-0.12.2.dist-info → pyobo-0.12.4.dist-info}/METADATA +3 -2
- {pyobo-0.12.2.dist-info → pyobo-0.12.4.dist-info}/RECORD +28 -23
- pyobo/obographs.py +0 -152
- /pyobo/{reader_utils.py → struct/obo/reader_utils.py} +0 -0
- {pyobo-0.12.2.dist-info → pyobo-0.12.4.dist-info}/WHEEL +0 -0
- {pyobo-0.12.2.dist-info → pyobo-0.12.4.dist-info}/entry_points.txt +0 -0
- {pyobo-0.12.2.dist-info → pyobo-0.12.4.dist-info}/licenses/LICENSE +0 -0
pyobo/__init__.py
CHANGED
|
@@ -57,20 +57,29 @@ from .api import (
|
|
|
57
57
|
)
|
|
58
58
|
from .getters import get_ontology
|
|
59
59
|
from .ner import get_grounder, ground
|
|
60
|
-
from .obographs import parse_results_from_obo
|
|
61
60
|
from .plugins import (
|
|
62
61
|
has_nomenclature_plugin,
|
|
63
62
|
iter_nomenclature_plugins,
|
|
64
63
|
run_nomenclature_plugin,
|
|
65
64
|
)
|
|
66
|
-
from .
|
|
67
|
-
|
|
65
|
+
from .struct import (
|
|
66
|
+
Obo,
|
|
67
|
+
Reference,
|
|
68
|
+
StanzaType,
|
|
69
|
+
Synonym,
|
|
70
|
+
SynonymTypeDef,
|
|
71
|
+
Term,
|
|
72
|
+
TypeDef,
|
|
73
|
+
default_reference,
|
|
74
|
+
)
|
|
75
|
+
from .struct.obo import from_obo_path, from_obonet
|
|
68
76
|
from .utils.path import ensure_path
|
|
69
77
|
from .version import get_version
|
|
70
78
|
|
|
71
79
|
__all__ = [
|
|
72
80
|
"Obo",
|
|
73
81
|
"Reference",
|
|
82
|
+
"StanzaType",
|
|
74
83
|
"Synonym",
|
|
75
84
|
"SynonymTypeDef",
|
|
76
85
|
"Term",
|
|
@@ -138,6 +147,5 @@ __all__ = [
|
|
|
138
147
|
"has_nomenclature_plugin",
|
|
139
148
|
"is_descendent",
|
|
140
149
|
"iter_nomenclature_plugins",
|
|
141
|
-
"parse_results_from_obo",
|
|
142
150
|
"run_nomenclature_plugin",
|
|
143
151
|
]
|
pyobo/api/names.py
CHANGED
|
@@ -166,7 +166,14 @@ def get_references(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> set[Refe
|
|
|
166
166
|
ontology = get_ontology(prefix, **kwargs)
|
|
167
167
|
return sorted(ontology.iterate_references())
|
|
168
168
|
|
|
169
|
-
|
|
169
|
+
try:
|
|
170
|
+
return set(_get_references())
|
|
171
|
+
except NoBuildError:
|
|
172
|
+
logger.debug("[%s] no build", prefix)
|
|
173
|
+
return set()
|
|
174
|
+
except (Exception, subprocess.CalledProcessError) as e:
|
|
175
|
+
logger.exception("[%s v%s] could not load: %s", prefix, version, e)
|
|
176
|
+
return set()
|
|
170
177
|
|
|
171
178
|
|
|
172
179
|
@lru_cache
|
pyobo/getters.py
CHANGED
|
@@ -33,8 +33,8 @@ from .constants import (
|
|
|
33
33
|
)
|
|
34
34
|
from .identifier_utils import ParseError, wrap_norm_prefix
|
|
35
35
|
from .plugins import has_nomenclature_plugin, run_nomenclature_plugin
|
|
36
|
-
from .reader import from_obo_path, from_obonet
|
|
37
36
|
from .struct import Obo
|
|
37
|
+
from .struct.obo import from_obo_path, from_obonet
|
|
38
38
|
from .utils.io import safe_open_writer
|
|
39
39
|
from .utils.path import ensure_path, prefix_directory_join
|
|
40
40
|
from .version import get_git_hash, get_version
|
|
@@ -159,13 +159,20 @@ def get_ontology(
|
|
|
159
159
|
elif ontology_format == "obo":
|
|
160
160
|
pass # all gucci
|
|
161
161
|
elif ontology_format == "owl":
|
|
162
|
-
|
|
162
|
+
import bioontologies.robot
|
|
163
163
|
|
|
164
164
|
_converted_obo_path = path.with_suffix(".obo")
|
|
165
165
|
if prefix in REQUIRES_NO_ROBOT_CHECK:
|
|
166
166
|
robot_check = False
|
|
167
|
-
robot.convert(path, _converted_obo_path, check=robot_check)
|
|
167
|
+
bioontologies.robot.convert(path, _converted_obo_path, check=robot_check)
|
|
168
168
|
path = _converted_obo_path
|
|
169
|
+
elif ontology_format == "json":
|
|
170
|
+
from .struct.obograph import read_obograph
|
|
171
|
+
|
|
172
|
+
obo = read_obograph(prefix=prefix, path=path)
|
|
173
|
+
if cache:
|
|
174
|
+
obo.write_default(force=force_process)
|
|
175
|
+
return obo
|
|
169
176
|
else:
|
|
170
177
|
raise UnhandledFormatError(f"[{prefix}] unhandled ontology file format: {path.suffix}")
|
|
171
178
|
|
|
@@ -12,6 +12,7 @@ from .api import (
|
|
|
12
12
|
UnregisteredPrefixError,
|
|
13
13
|
_is_valid_identifier,
|
|
14
14
|
_parse_str_or_curie_or_uri_helper,
|
|
15
|
+
get_converter,
|
|
15
16
|
standardize_ec,
|
|
16
17
|
wrap_norm_prefix,
|
|
17
18
|
)
|
|
@@ -27,6 +28,7 @@ __all__ = [
|
|
|
27
28
|
"UnregisteredPrefixError",
|
|
28
29
|
"_is_valid_identifier",
|
|
29
30
|
"_parse_str_or_curie_or_uri_helper",
|
|
31
|
+
"get_converter",
|
|
30
32
|
"get_rules",
|
|
31
33
|
"ground_relation",
|
|
32
34
|
"standardize_ec",
|
pyobo/identifier_utils/api.py
CHANGED
|
@@ -135,9 +135,10 @@ def _preclean_uri(s: str) -> str:
|
|
|
135
135
|
|
|
136
136
|
|
|
137
137
|
@lru_cache(1)
|
|
138
|
-
def
|
|
138
|
+
def get_converter() -> PreprocessingConverter:
|
|
139
|
+
"""Get a converter."""
|
|
139
140
|
return PreprocessingConverter(
|
|
140
|
-
|
|
141
|
+
records=bioregistry.manager.converter.records,
|
|
141
142
|
rules=get_rules(),
|
|
142
143
|
preclean=_preclean_uri,
|
|
143
144
|
)
|
pyobo/sources/__init__.py
CHANGED
|
@@ -27,6 +27,7 @@ from .gtdb import GTDBGetter
|
|
|
27
27
|
from .gwascentral import GWASCentralPhenotypeGetter, GWASCentralStudyGetter
|
|
28
28
|
from .hgnc import HGNCGetter, HGNCGroupGetter
|
|
29
29
|
from .icd import ICD10Getter, ICD11Getter
|
|
30
|
+
from .intact import IntactGetter
|
|
30
31
|
from .interpro import InterProGetter
|
|
31
32
|
from .itis import ITISGetter
|
|
32
33
|
from .kegg import KEGGGeneGetter, KEGGGenomeGetter, KEGGPathwayGetter
|
|
@@ -102,6 +103,7 @@ __all__ = [
|
|
|
102
103
|
"ICD10Getter",
|
|
103
104
|
"ICD11Getter",
|
|
104
105
|
"ITISGetter",
|
|
106
|
+
"IntactGetter",
|
|
105
107
|
"InterProGetter",
|
|
106
108
|
"KEGGGeneGetter",
|
|
107
109
|
"KEGGGenomeGetter",
|
|
@@ -149,7 +149,7 @@ def get_xml_root(version: str | None = None) -> ElementTree.Element:
|
|
|
149
149
|
raise NoBuildError from e
|
|
150
150
|
|
|
151
151
|
element = parse_drugbank(version=version, username=username, password=password)
|
|
152
|
-
return element.getroot()
|
|
152
|
+
return element.getroot() # type:ignore
|
|
153
153
|
|
|
154
154
|
|
|
155
155
|
ns = "{http://www.drugbank.ca}"
|
|
@@ -79,7 +79,7 @@ def iterate_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
79
79
|
except ElementTree.ParseError:
|
|
80
80
|
logger.warning("malformed XML in %s", tar_info.path)
|
|
81
81
|
continue
|
|
82
|
-
yield _get_term_from_tree(tree)
|
|
82
|
+
yield _get_term_from_tree(tree) # type:ignore
|
|
83
83
|
|
|
84
84
|
|
|
85
85
|
if __name__ == "__main__":
|
pyobo/sources/intact.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Converter for IntAct complexes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from collections.abc import Iterable
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from pydantic import ValidationError
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
|
|
12
|
+
from pyobo.struct import Obo, Reference, Term
|
|
13
|
+
from pyobo.utils.path import ensure_path
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"IntactGetter",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
PREFIX = "intact"
|
|
20
|
+
COMPLEXPORTAL_MAPPINGS_UNVERSIONED = (
|
|
21
|
+
"https://ftp.ebi.ac.uk/pub/databases/intact/complex/current/various/cpx_ebi_ac_translation.txt"
|
|
22
|
+
)
|
|
23
|
+
REACTOME_MAPPINGS_UNVERSIONED = (
|
|
24
|
+
"https://ftp.ebi.ac.uk/pub/databases/intact/current/various/reactome.dat"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# TODO it looks like it's probably also the case that
|
|
29
|
+
# this semantic space contains IDs for proteins/
|
|
30
|
+
# interactors. These need to be added too
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class IntactGetter(Obo):
|
|
34
|
+
"""An ontology representation of Intact."""
|
|
35
|
+
|
|
36
|
+
bioversions_key = ontology = PREFIX
|
|
37
|
+
|
|
38
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
39
|
+
"""Iterate over terms in the ontology."""
|
|
40
|
+
return get_terms(force=force, version=self._version_or_raise)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
44
|
+
"""Get terms from the Contributor Roles Taxonomy via GitHub."""
|
|
45
|
+
cplx = _get_mappings(
|
|
46
|
+
COMPLEXPORTAL_MAPPINGS_UNVERSIONED, "complexportal", version=version, force=force
|
|
47
|
+
)
|
|
48
|
+
reactome = _get_mappings(
|
|
49
|
+
REACTOME_MAPPINGS_UNVERSIONED, "reactome", version=version, force=force
|
|
50
|
+
)
|
|
51
|
+
for intact_id in sorted(set(cplx).union(reactome)):
|
|
52
|
+
term = Term.from_triple(PREFIX, intact_id)
|
|
53
|
+
for complexportal_xref in sorted(cplx.get(intact_id, [])):
|
|
54
|
+
term.append_exact_match(complexportal_xref)
|
|
55
|
+
for reactome_xref in sorted(reactome.get(intact_id, [])):
|
|
56
|
+
term.append_xref(reactome_xref)
|
|
57
|
+
yield term
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _get_mappings(
|
|
61
|
+
url: str, target_prefix: str, version: str, *, force: bool = False
|
|
62
|
+
) -> dict[str, set[Reference]]:
|
|
63
|
+
path = ensure_path(PREFIX, url=url, version=version, force=force)
|
|
64
|
+
df = pd.read_csv(path, sep="\t", header=None, usecols=[0, 1])
|
|
65
|
+
|
|
66
|
+
dd = defaultdict(set)
|
|
67
|
+
for intact_id, target_identifier in df.values:
|
|
68
|
+
try:
|
|
69
|
+
obj = Reference(prefix=target_prefix, identifier=target_identifier)
|
|
70
|
+
except ValidationError:
|
|
71
|
+
tqdm.write(f"[intact:{intact_id}] invalid xref: {target_prefix}:{target_identifier}")
|
|
72
|
+
continue
|
|
73
|
+
dd[intact_id].add(obj)
|
|
74
|
+
|
|
75
|
+
return dict(dd)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
if __name__ == "__main__":
|
|
79
|
+
IntactGetter.cli()
|
pyobo/struct/__init__.py
CHANGED
|
@@ -18,7 +18,7 @@ from .struct import (
|
|
|
18
18
|
TypeDef,
|
|
19
19
|
make_ad_hoc_ontology,
|
|
20
20
|
)
|
|
21
|
-
from .struct_utils import Annotation, Stanza
|
|
21
|
+
from .struct_utils import Annotation, Stanza, StanzaType
|
|
22
22
|
from .typedef import (
|
|
23
23
|
derives_from,
|
|
24
24
|
enables,
|
|
@@ -51,6 +51,7 @@ __all__ = [
|
|
|
51
51
|
"Reference",
|
|
52
52
|
"Referenced",
|
|
53
53
|
"Stanza",
|
|
54
|
+
"StanzaType",
|
|
54
55
|
"Synonym",
|
|
55
56
|
"SynonymTypeDef",
|
|
56
57
|
"Term",
|
|
@@ -250,7 +250,7 @@ class Import(Box):
|
|
|
250
250
|
|
|
251
251
|
def get_rdf_graph_oracle(boxes: list[Box], *, prefix_map: dict[str, str]) -> Graph:
|
|
252
252
|
"""Serialize to turtle via OFN and conversion with ROBOT."""
|
|
253
|
-
|
|
253
|
+
import bioontologies.robot
|
|
254
254
|
|
|
255
255
|
ontology = Ontology(
|
|
256
256
|
iri=EXAMPLE_ONTOLOGY_IRI,
|
|
@@ -265,7 +265,7 @@ def get_rdf_graph_oracle(boxes: list[Box], *, prefix_map: dict[str, str]) -> Gra
|
|
|
265
265
|
ofn_path.write_text(text)
|
|
266
266
|
ttl_path = stub.with_suffix(".ttl")
|
|
267
267
|
try:
|
|
268
|
-
convert(ofn_path, ttl_path)
|
|
268
|
+
bioontologies.robot.convert(ofn_path, ttl_path)
|
|
269
269
|
except subprocess.CalledProcessError:
|
|
270
270
|
raise RuntimeError(f"failed to convert axioms from:\n\n{text}") from None
|
|
271
271
|
graph.parse(ttl_path)
|
|
@@ -20,15 +20,6 @@ from curies.vocabulary import SynonymScope
|
|
|
20
20
|
from more_itertools import pairwise
|
|
21
21
|
from tqdm.auto import tqdm
|
|
22
22
|
|
|
23
|
-
from .constants import DATE_FORMAT, PROVENANCE_PREFIXES
|
|
24
|
-
from .identifier_utils import (
|
|
25
|
-
NotCURIEError,
|
|
26
|
-
ParseError,
|
|
27
|
-
UnparsableIRIError,
|
|
28
|
-
_is_valid_identifier,
|
|
29
|
-
_parse_str_or_curie_or_uri_helper,
|
|
30
|
-
get_rules,
|
|
31
|
-
)
|
|
32
23
|
from .reader_utils import (
|
|
33
24
|
_chomp_axioms,
|
|
34
25
|
_chomp_references,
|
|
@@ -36,7 +27,9 @@ from .reader_utils import (
|
|
|
36
27
|
_chomp_typedef,
|
|
37
28
|
_parse_provenance_list,
|
|
38
29
|
)
|
|
39
|
-
from
|
|
30
|
+
from .. import vocabulary as v
|
|
31
|
+
from ..reference import OBOLiteral, _obo_parse_identifier
|
|
32
|
+
from ..struct import (
|
|
40
33
|
Obo,
|
|
41
34
|
Reference,
|
|
42
35
|
Synonym,
|
|
@@ -46,18 +39,26 @@ from .struct import (
|
|
|
46
39
|
default_reference,
|
|
47
40
|
make_ad_hoc_ontology,
|
|
48
41
|
)
|
|
49
|
-
from
|
|
50
|
-
from
|
|
51
|
-
from
|
|
52
|
-
from
|
|
53
|
-
from
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
42
|
+
from ..struct_utils import Annotation, Stanza
|
|
43
|
+
from ..typedef import comment as has_comment
|
|
44
|
+
from ..typedef import default_typedefs, has_ontology_root_term
|
|
45
|
+
from ...constants import DATE_FORMAT, PROVENANCE_PREFIXES
|
|
46
|
+
from ...identifier_utils import (
|
|
47
|
+
NotCURIEError,
|
|
48
|
+
ParseError,
|
|
49
|
+
UnparsableIRIError,
|
|
50
|
+
_is_valid_identifier,
|
|
51
|
+
_parse_str_or_curie_or_uri_helper,
|
|
52
|
+
get_rules,
|
|
53
|
+
)
|
|
54
|
+
from ...utils.cache import write_gzipped_graph
|
|
55
|
+
from ...utils.io import safe_open
|
|
56
|
+
from ...utils.misc import STATIC_VERSION_REWRITES, cleanup_version
|
|
57
57
|
|
|
58
58
|
__all__ = [
|
|
59
59
|
"from_obo_path",
|
|
60
60
|
"from_obonet",
|
|
61
|
+
"from_str",
|
|
61
62
|
]
|
|
62
63
|
|
|
63
64
|
logger = logging.getLogger(__name__)
|
|
@@ -1353,6 +1354,8 @@ def _handle_prop(
|
|
|
1353
1354
|
case None:
|
|
1354
1355
|
return None
|
|
1355
1356
|
|
|
1357
|
+
return None
|
|
1358
|
+
|
|
1356
1359
|
|
|
1357
1360
|
def _get_prop(
|
|
1358
1361
|
property_id: str,
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""I/O for OBO Graph JSON."""
|
|
2
|
+
|
|
3
|
+
from .export import to_obograph, to_parsed_obograph, to_parsed_obograph_oracle, write_obograph
|
|
4
|
+
from .reader import from_obograph, from_standardized_graph, read_obograph
|
|
5
|
+
from .utils import assert_graph_equal
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"assert_graph_equal",
|
|
9
|
+
"from_obograph",
|
|
10
|
+
"from_standardized_graph",
|
|
11
|
+
"read_obograph",
|
|
12
|
+
"to_obograph",
|
|
13
|
+
"to_parsed_obograph",
|
|
14
|
+
"to_parsed_obograph_oracle",
|
|
15
|
+
"write_obograph",
|
|
16
|
+
]
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""Exports to OBO Graph JSON."""
|
|
2
|
+
|
|
3
|
+
import tempfile
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import bioregistry
|
|
7
|
+
import curies
|
|
8
|
+
import obographs as og
|
|
9
|
+
from curies import Converter, ReferenceTuple
|
|
10
|
+
from curies import vocabulary as v
|
|
11
|
+
|
|
12
|
+
from pyobo.identifier_utils.api import get_converter
|
|
13
|
+
from pyobo.struct import Obo, OBOLiteral, Stanza, Term, TypeDef
|
|
14
|
+
from pyobo.struct import typedef as tdv
|
|
15
|
+
from pyobo.utils.io import safe_open
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"to_obograph",
|
|
19
|
+
"to_parsed_obograph",
|
|
20
|
+
"write_obograph",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def write_obograph(obo: Obo, path: str | Path, *, converter: Converter | None = None) -> None:
|
|
25
|
+
"""Write an ontology to a file as OBO Graph JSON."""
|
|
26
|
+
path = Path(path).expanduser().resolve()
|
|
27
|
+
raw_graph = to_obograph(obo, converter=converter)
|
|
28
|
+
with safe_open(path, read=False) as file:
|
|
29
|
+
file.write(raw_graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def to_parsed_obograph_oracle(
|
|
33
|
+
obo: Obo, *, converter: Converter | None = None
|
|
34
|
+
) -> og.StandardizedGraphDocument:
|
|
35
|
+
"""Serialize to OBO, convert to OBO Graph JSON with ROBOT, load, then parse."""
|
|
36
|
+
import bioontologies.robot
|
|
37
|
+
|
|
38
|
+
if converter is None:
|
|
39
|
+
converter = get_converter()
|
|
40
|
+
|
|
41
|
+
with tempfile.TemporaryDirectory() as directory:
|
|
42
|
+
stub = Path(directory).joinpath("test")
|
|
43
|
+
obo_path = stub.with_suffix(".obo")
|
|
44
|
+
obograph_path = stub.with_suffix(".json")
|
|
45
|
+
obo.write_obo(obo_path)
|
|
46
|
+
bioontologies.robot.convert(input_path=obo_path, output_path=obograph_path)
|
|
47
|
+
raw = og.read(obograph_path, squeeze=False)
|
|
48
|
+
rv = raw.standardize(converter)
|
|
49
|
+
for graph in rv.graphs:
|
|
50
|
+
if graph.meta and graph.meta.properties:
|
|
51
|
+
graph.meta.properties = [
|
|
52
|
+
p
|
|
53
|
+
for p in graph.meta.properties
|
|
54
|
+
if p.predicate.pair
|
|
55
|
+
!= ReferenceTuple(prefix="oboinowl", identifier="hasOBOFormatVersion")
|
|
56
|
+
] or None
|
|
57
|
+
return rv
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def to_obograph(obo: Obo, *, converter: Converter | None = None) -> og.GraphDocument:
|
|
61
|
+
"""Convert an ontology to a OBO Graph JSON document."""
|
|
62
|
+
if converter is None:
|
|
63
|
+
converter = get_converter()
|
|
64
|
+
return to_parsed_obograph(obo).to_raw(converter)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def to_parsed_obograph(obo: Obo) -> og.StandardizedGraphDocument:
|
|
68
|
+
"""Convert an ontology to a processed OBO Graph JSON document."""
|
|
69
|
+
return og.StandardizedGraphDocument(graphs=[_to_parsed_graph(obo)])
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _to_parsed_graph(obo: Obo) -> og.StandardizedGraph:
|
|
73
|
+
return og.StandardizedGraph(
|
|
74
|
+
id=f"http://purl.obolibrary.org/obo/{obo.ontology}.owl",
|
|
75
|
+
meta=_get_meta(obo),
|
|
76
|
+
nodes=_get_nodes(obo),
|
|
77
|
+
edges=_get_edges(obo),
|
|
78
|
+
equivalent_node_sets=_get_equivalent_node_sets(obo),
|
|
79
|
+
property_chain_axioms=_get_property_chain_axioms(obo),
|
|
80
|
+
domain_range_axioms=_get_domain_ranges(obo),
|
|
81
|
+
logical_definition_axioms=_get_logical_definition_axioms(obo),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _get_logical_definition_axioms(obo: Obo) -> list[og.StandardizedLogicalDefinition]:
|
|
86
|
+
rv: list[og.StandardizedLogicalDefinition] = []
|
|
87
|
+
# TODO
|
|
88
|
+
return rv
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _get_domain_ranges(obo: Obo) -> list[og.StandardizedDomainRangeAxiom]:
|
|
92
|
+
rv = []
|
|
93
|
+
for typedef in obo.typedefs or []:
|
|
94
|
+
if typedef.domain or typedef.range:
|
|
95
|
+
rv.append(
|
|
96
|
+
og.StandardizedDomainRangeAxiom(
|
|
97
|
+
predicate=typedef.reference,
|
|
98
|
+
domains=[typedef.domain] if typedef.domain else [],
|
|
99
|
+
ranges=[typedef.range] if typedef.range else [],
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
return rv
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _get_equivalent_node_sets(obo: Obo) -> list[og.StandardizedEquivalentNodeSet]:
|
|
106
|
+
rv = []
|
|
107
|
+
for node in obo:
|
|
108
|
+
for e in node.equivalent_to:
|
|
109
|
+
rv.append(og.StandardizedEquivalentNodeSet(node=node.reference, equivalents=[e]))
|
|
110
|
+
return rv
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _get_property_chain_axioms(obo: Obo) -> list[og.StandardizedPropertyChainAxiom]:
|
|
114
|
+
rv = []
|
|
115
|
+
for typedef in obo.typedefs or []:
|
|
116
|
+
for chain in typedef.holds_over_chain:
|
|
117
|
+
rv.append(
|
|
118
|
+
og.StandardizedPropertyChainAxiom(
|
|
119
|
+
predicate=typedef.reference,
|
|
120
|
+
chain=chain,
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
# TODO typedef.equivalent_to_chain
|
|
124
|
+
return rv
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _get_meta(obo: Obo) -> og.StandardizedMeta | None:
|
|
128
|
+
properties = []
|
|
129
|
+
|
|
130
|
+
if description := bioregistry.get_description(obo.ontology):
|
|
131
|
+
properties.append(
|
|
132
|
+
og.StandardizedProperty(
|
|
133
|
+
predicate=v.has_description,
|
|
134
|
+
value=description,
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
for root_term in obo.root_terms or []:
|
|
139
|
+
properties.append(
|
|
140
|
+
og.StandardizedProperty(
|
|
141
|
+
predicate=v.has_ontology_root_term,
|
|
142
|
+
value=root_term,
|
|
143
|
+
)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
if license_spdx_id := bioregistry.get_license(obo.ontology):
|
|
147
|
+
properties.append(
|
|
148
|
+
og.StandardizedProperty(
|
|
149
|
+
predicate=v.has_license,
|
|
150
|
+
value=license_spdx_id,
|
|
151
|
+
)
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if obo.name:
|
|
155
|
+
properties.append(
|
|
156
|
+
og.StandardizedProperty(
|
|
157
|
+
predicate=v.has_title,
|
|
158
|
+
value=obo.name,
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
for p in obo.property_values or []:
|
|
163
|
+
properties.append(
|
|
164
|
+
og.StandardizedProperty(
|
|
165
|
+
predicate=p.predicate,
|
|
166
|
+
value=p.value.value if isinstance(p.value, OBOLiteral) else p.value,
|
|
167
|
+
)
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
if obo.data_version:
|
|
171
|
+
version_iri = (
|
|
172
|
+
f"http://purl.obolibrary.org/obo/{obo.ontology}/{obo.data_version}/{obo.ontology}.owl"
|
|
173
|
+
)
|
|
174
|
+
else:
|
|
175
|
+
version_iri = None
|
|
176
|
+
|
|
177
|
+
# comments don't make the round trip
|
|
178
|
+
subsets = [r for r, _ in obo.subsetdefs or []] or None
|
|
179
|
+
|
|
180
|
+
if not properties and not version_iri and not subsets:
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
return og.StandardizedMeta(
|
|
184
|
+
properties=properties or None,
|
|
185
|
+
version_iri=version_iri,
|
|
186
|
+
subsets=subsets,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _get_nodes(obo: Obo) -> list[og.StandardizedNode]:
|
|
191
|
+
rv = []
|
|
192
|
+
for term in obo:
|
|
193
|
+
rv.append(_get_class_node(term))
|
|
194
|
+
for typedef in _get_typedefs(obo):
|
|
195
|
+
rv.append(_get_typedef_node(typedef))
|
|
196
|
+
return rv
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _get_typedefs(obo: Obo) -> set[TypeDef]:
|
|
200
|
+
rv = set(obo.typedefs or [])
|
|
201
|
+
if obo.auto_generated_by:
|
|
202
|
+
rv.add(tdv.obo_autogenerated_by)
|
|
203
|
+
return rv
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _get_definition(stanza: Stanza) -> og.StandardizedDefinition | None:
|
|
207
|
+
if not stanza.definition:
|
|
208
|
+
return None
|
|
209
|
+
return og.StandardizedDefinition(
|
|
210
|
+
value=stanza.definition,
|
|
211
|
+
xrefs=[p for p in stanza.provenance if isinstance(p, curies.Reference)],
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _get_synonyms(stanza: Stanza) -> list[og.StandardizedSynonym] | None:
|
|
216
|
+
return [
|
|
217
|
+
og.StandardizedSynonym(
|
|
218
|
+
text=synonym.name,
|
|
219
|
+
predicate=v.synonym_scopes[synonym.specificity]
|
|
220
|
+
if synonym.specificity is not None
|
|
221
|
+
else v.has_related_synonym,
|
|
222
|
+
type=synonym.type,
|
|
223
|
+
xrefs=[p for p in synonym.provenance if isinstance(p, curies.Reference)],
|
|
224
|
+
)
|
|
225
|
+
for synonym in stanza.synonyms
|
|
226
|
+
] or None
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _get_properties(term: Stanza) -> list[og.StandardizedProperty] | None:
|
|
230
|
+
properties = []
|
|
231
|
+
for predicate, obj in term.iterate_object_properties():
|
|
232
|
+
properties.append(
|
|
233
|
+
og.StandardizedProperty(
|
|
234
|
+
predicate=predicate,
|
|
235
|
+
value=obj,
|
|
236
|
+
)
|
|
237
|
+
)
|
|
238
|
+
for predicate, literal in term.iterate_literal_properties():
|
|
239
|
+
properties.append(
|
|
240
|
+
og.StandardizedProperty(
|
|
241
|
+
predicate=predicate,
|
|
242
|
+
value=literal.value,
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
return properties or None
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _get_xrefs(stanza: Stanza) -> list[og.StandardizedXref] | None:
|
|
249
|
+
return [og.StandardizedXref(reference=xref) for xref in stanza.xrefs] or None
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _meta_or_none(meta: og.StandardizedMeta) -> og.StandardizedMeta | None:
|
|
253
|
+
if all(
|
|
254
|
+
x is None
|
|
255
|
+
for x in (
|
|
256
|
+
meta.definition,
|
|
257
|
+
meta.subsets,
|
|
258
|
+
meta.xrefs,
|
|
259
|
+
meta.synonyms,
|
|
260
|
+
meta.comments,
|
|
261
|
+
meta.version_iri,
|
|
262
|
+
meta.properties,
|
|
263
|
+
)
|
|
264
|
+
):
|
|
265
|
+
return None
|
|
266
|
+
return meta
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _get_class_node(term: Term) -> og.StandardizedNode:
|
|
270
|
+
meta = og.StandardizedMeta(
|
|
271
|
+
definition=_get_definition(term),
|
|
272
|
+
subsets=term.subsets or None,
|
|
273
|
+
xrefs=_get_xrefs(term),
|
|
274
|
+
synonyms=_get_synonyms(term),
|
|
275
|
+
comments=term.get_comments() or None,
|
|
276
|
+
deprecated=term.is_obsolete or False,
|
|
277
|
+
properties=_get_properties(term),
|
|
278
|
+
)
|
|
279
|
+
return og.StandardizedNode(
|
|
280
|
+
reference=term.reference,
|
|
281
|
+
label=term.name,
|
|
282
|
+
meta=_meta_or_none(meta),
|
|
283
|
+
type="CLASS" if term.type == "Term" else "INDIVIDUAL",
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _get_typedef_node(typedef: TypeDef) -> og.StandardizedNode:
|
|
288
|
+
meta = og.StandardizedMeta(
|
|
289
|
+
definition=_get_definition(typedef),
|
|
290
|
+
subsets=typedef.subsets or None,
|
|
291
|
+
xrefs=_get_xrefs(typedef),
|
|
292
|
+
synonyms=_get_synonyms(typedef),
|
|
293
|
+
comments=typedef.get_comments() or None,
|
|
294
|
+
deprecated=typedef.is_obsolete or False,
|
|
295
|
+
properties=_get_properties(typedef),
|
|
296
|
+
)
|
|
297
|
+
return og.StandardizedNode(
|
|
298
|
+
reference=typedef.reference,
|
|
299
|
+
label=typedef.name,
|
|
300
|
+
meta=_meta_or_none(meta),
|
|
301
|
+
type="PROPERTY",
|
|
302
|
+
property_type="ANNOTATION" if typedef.is_metadata_tag else "OBJECT",
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _get_edges(obo: Obo) -> list[og.StandardizedEdge]:
|
|
307
|
+
rv = [
|
|
308
|
+
og.StandardizedEdge(
|
|
309
|
+
subject=stanza.reference,
|
|
310
|
+
predicate=typedef.reference,
|
|
311
|
+
object=target,
|
|
312
|
+
)
|
|
313
|
+
for stanza, typedef, target in obo.iterate_edges(include_xrefs=False)
|
|
314
|
+
]
|
|
315
|
+
return rv
|