pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/reader.py
CHANGED
|
@@ -1,33 +1,59 @@
|
|
|
1
1
|
"""OBO Readers."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import logging
|
|
6
|
+
import typing as t
|
|
7
|
+
from collections import Counter
|
|
4
8
|
from collections.abc import Iterable, Mapping
|
|
5
9
|
from datetime import datetime
|
|
10
|
+
from io import StringIO
|
|
6
11
|
from pathlib import Path
|
|
7
|
-
from
|
|
12
|
+
from textwrap import dedent
|
|
13
|
+
from typing import Any
|
|
8
14
|
|
|
9
15
|
import bioregistry
|
|
10
16
|
import networkx as nx
|
|
17
|
+
from curies import ReferenceTuple
|
|
18
|
+
from curies.vocabulary import SynonymScope
|
|
11
19
|
from more_itertools import pairwise
|
|
12
20
|
from tqdm.auto import tqdm
|
|
13
21
|
|
|
14
22
|
from .constants import DATE_FORMAT, PROVENANCE_PREFIXES
|
|
15
|
-
from .identifier_utils import
|
|
16
|
-
|
|
23
|
+
from .identifier_utils import (
|
|
24
|
+
BlacklistedError,
|
|
25
|
+
NotCURIEError,
|
|
26
|
+
ParseError,
|
|
27
|
+
UnparsableIRIError,
|
|
28
|
+
_is_valid_identifier,
|
|
29
|
+
_parse_str_or_curie_or_uri_helper,
|
|
30
|
+
remap_prefix,
|
|
31
|
+
str_is_blacklisted,
|
|
32
|
+
)
|
|
33
|
+
from .reader_utils import (
|
|
34
|
+
_chomp_axioms,
|
|
35
|
+
_chomp_references,
|
|
36
|
+
_chomp_specificity,
|
|
37
|
+
_chomp_typedef,
|
|
38
|
+
_parse_provenance_list,
|
|
39
|
+
)
|
|
17
40
|
from .struct import (
|
|
18
41
|
Obo,
|
|
19
42
|
Reference,
|
|
20
43
|
Synonym,
|
|
21
|
-
SynonymSpecificities,
|
|
22
|
-
SynonymSpecificity,
|
|
23
44
|
SynonymTypeDef,
|
|
24
45
|
Term,
|
|
25
46
|
TypeDef,
|
|
47
|
+
default_reference,
|
|
26
48
|
make_ad_hoc_ontology,
|
|
27
49
|
)
|
|
28
|
-
from .struct
|
|
29
|
-
from .struct.
|
|
30
|
-
from .
|
|
50
|
+
from .struct import vocabulary as v
|
|
51
|
+
from .struct.reference import OBOLiteral, _obo_parse_identifier
|
|
52
|
+
from .struct.struct_utils import Annotation, Stanza
|
|
53
|
+
from .struct.typedef import comment as has_comment
|
|
54
|
+
from .struct.typedef import default_typedefs, has_ontology_root_term
|
|
55
|
+
from .utils.cache import write_gzipped_graph
|
|
56
|
+
from .utils.misc import STATIC_VERSION_REWRITES, cleanup_version
|
|
31
57
|
|
|
32
58
|
__all__ = [
|
|
33
59
|
"from_obo_path",
|
|
@@ -36,369 +62,1032 @@ __all__ = [
|
|
|
36
62
|
|
|
37
63
|
logger = logging.getLogger(__name__)
|
|
38
64
|
|
|
39
|
-
# FIXME use bioontologies
|
|
40
|
-
# RELATION_REMAPPINGS: Mapping[str, Tuple[str, str]] = bioontologies.upgrade.load()
|
|
41
|
-
RELATION_REMAPPINGS: Mapping[str, tuple[str, str]] = {
|
|
42
|
-
"part_of": part_of.pair,
|
|
43
|
-
"has_part": has_part.pair,
|
|
44
|
-
"develops_from": develops_from.pair,
|
|
45
|
-
"seeAlso": ("rdf", "seeAlso"),
|
|
46
|
-
"dc-contributor": ("dc", "contributor"),
|
|
47
|
-
"dc-creator": ("dc", "creator"),
|
|
48
|
-
}
|
|
49
|
-
|
|
50
65
|
|
|
51
66
|
def from_obo_path(
|
|
52
|
-
path:
|
|
67
|
+
path: str | Path,
|
|
68
|
+
prefix: str | None = None,
|
|
69
|
+
*,
|
|
70
|
+
strict: bool = False,
|
|
71
|
+
version: str | None,
|
|
72
|
+
upgrade: bool = True,
|
|
73
|
+
use_tqdm: bool = False,
|
|
74
|
+
ignore_obsolete: bool = False,
|
|
75
|
+
_cache_path: Path | None = None,
|
|
53
76
|
) -> Obo:
|
|
54
77
|
"""Get the OBO graph from a path."""
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
)
|
|
78
|
+
path = Path(path).expanduser().resolve()
|
|
79
|
+
if path.suffix.endswith(".gz"):
|
|
80
|
+
import gzip
|
|
81
|
+
|
|
82
|
+
logger.info("[%s] parsing gzipped OBO with obonet from %s", prefix or "<unknown>", path)
|
|
83
|
+
with gzip.open(path, "rt") as file:
|
|
84
|
+
graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
|
|
85
|
+
elif path.suffix.endswith(".zip"):
|
|
86
|
+
import io
|
|
87
|
+
import zipfile
|
|
88
|
+
|
|
89
|
+
logger.info("[%s] parsing zipped OBO with obonet from %s", prefix or "<unknown>", path)
|
|
90
|
+
with zipfile.ZipFile(path) as zf:
|
|
91
|
+
with zf.open(path.name.removesuffix(".zip"), "r") as file:
|
|
92
|
+
content = file.read().decode("utf-8")
|
|
93
|
+
graph = _read_obo(
|
|
94
|
+
io.StringIO(content), prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
logger.info("[%s] parsing OBO with obonet from %s", prefix or "<unknown>", path)
|
|
98
|
+
with open(path) as file:
|
|
99
|
+
graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
|
|
68
100
|
|
|
69
101
|
if prefix:
|
|
70
102
|
# Make sure the graph is named properly
|
|
71
103
|
_clean_graph_ontology(graph, prefix)
|
|
72
104
|
|
|
105
|
+
if _cache_path:
|
|
106
|
+
logger.info("[%s] writing obonet cache to %s", prefix, _cache_path)
|
|
107
|
+
write_gzipped_graph(path=_cache_path, graph=graph)
|
|
108
|
+
|
|
73
109
|
# Convert to an Obo instance and return
|
|
74
|
-
return from_obonet(graph, strict=strict,
|
|
110
|
+
return from_obonet(graph, strict=strict, version=version, upgrade=upgrade, use_tqdm=use_tqdm)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _read_obo(
|
|
114
|
+
filelike, prefix: str | None, ignore_obsolete: bool, use_tqdm: bool = True
|
|
115
|
+
) -> nx.MultiDiGraph:
|
|
116
|
+
import obonet
|
|
117
|
+
|
|
118
|
+
return obonet.read_obo(
|
|
119
|
+
tqdm(
|
|
120
|
+
filelike,
|
|
121
|
+
unit_scale=True,
|
|
122
|
+
desc=f"[{prefix or ''}] parsing OBO",
|
|
123
|
+
disable=not use_tqdm,
|
|
124
|
+
leave=True,
|
|
125
|
+
),
|
|
126
|
+
ignore_obsolete=ignore_obsolete,
|
|
127
|
+
)
|
|
75
128
|
|
|
76
129
|
|
|
77
|
-
def
|
|
130
|
+
def _normalize_prefix_strict(prefix: str) -> str:
|
|
131
|
+
n = bioregistry.normalize_prefix(prefix)
|
|
132
|
+
if n is None:
|
|
133
|
+
raise ValueError(f"unknown prefix: {prefix}")
|
|
134
|
+
return n
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def from_str(
|
|
138
|
+
text: str,
|
|
139
|
+
*,
|
|
140
|
+
strict: bool = False,
|
|
141
|
+
version: str | None = None,
|
|
142
|
+
upgrade: bool = True,
|
|
143
|
+
ignore_obsolete: bool = False,
|
|
144
|
+
use_tqdm: bool = False,
|
|
145
|
+
) -> Obo:
|
|
146
|
+
"""Read an ontology from a string representation."""
|
|
147
|
+
import obonet
|
|
148
|
+
|
|
149
|
+
text = dedent(text).strip()
|
|
150
|
+
io = StringIO()
|
|
151
|
+
io.write(text)
|
|
152
|
+
io.seek(0)
|
|
153
|
+
graph = obonet.read_obo(io, ignore_obsolete=ignore_obsolete)
|
|
154
|
+
return from_obonet(graph, strict=strict, version=version, upgrade=upgrade, use_tqdm=use_tqdm)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def from_obonet(
|
|
158
|
+
graph: nx.MultiDiGraph,
|
|
159
|
+
*,
|
|
160
|
+
strict: bool = False,
|
|
161
|
+
version: str | None = None,
|
|
162
|
+
upgrade: bool = True,
|
|
163
|
+
use_tqdm: bool = False,
|
|
164
|
+
) -> Obo:
|
|
78
165
|
"""Get all of the terms from a OBO graph."""
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
name = _get_name(graph=graph, ontology=ontology)
|
|
87
|
-
|
|
88
|
-
data_version = graph.graph.get("data-version")
|
|
89
|
-
if not data_version:
|
|
90
|
-
if date is not None:
|
|
91
|
-
data_version = date.strftime("%Y-%m-%d")
|
|
92
|
-
logger.info(
|
|
93
|
-
"[%s] does not report a version. falling back to date: %s",
|
|
94
|
-
ontology,
|
|
95
|
-
data_version,
|
|
96
|
-
)
|
|
97
|
-
else:
|
|
98
|
-
logger.warning("[%s] does not report a version nor a date", ontology)
|
|
99
|
-
else:
|
|
100
|
-
data_version = cleanup_version(data_version=data_version, prefix=ontology)
|
|
101
|
-
if data_version is not None:
|
|
102
|
-
logger.info("[%s] using version %s", ontology, data_version)
|
|
103
|
-
elif date is not None:
|
|
104
|
-
logger.info(
|
|
105
|
-
"[%s] unrecognized version format, falling back to date: %s",
|
|
106
|
-
ontology,
|
|
107
|
-
data_version,
|
|
108
|
-
)
|
|
109
|
-
data_version = date.strftime("%Y-%m-%d")
|
|
110
|
-
else:
|
|
111
|
-
logger.warning(
|
|
112
|
-
"[%s] UNRECOGNIZED VERSION FORMAT AND MISSING DATE: %s", ontology, data_version
|
|
113
|
-
)
|
|
166
|
+
ontology_prefix_raw = graph.graph["ontology"]
|
|
167
|
+
ontology_prefix = _normalize_prefix_strict(ontology_prefix_raw)
|
|
168
|
+
logger.info("[%s] extracting OBO using obonet", ontology_prefix)
|
|
169
|
+
|
|
170
|
+
date = _get_date(graph=graph, ontology_prefix=ontology_prefix)
|
|
171
|
+
name = _get_name(graph=graph, ontology_prefix=ontology_prefix)
|
|
172
|
+
imports = graph.graph.get("import")
|
|
114
173
|
|
|
174
|
+
macro_config = MacroConfig(graph.graph, strict=strict, ontology_prefix=ontology_prefix)
|
|
175
|
+
|
|
176
|
+
data_version = _clean_graph_version(
|
|
177
|
+
graph, ontology_prefix=ontology_prefix, version=version, date=date
|
|
178
|
+
)
|
|
115
179
|
if data_version and "/" in data_version:
|
|
116
|
-
raise ValueError(
|
|
117
|
-
|
|
118
|
-
#: Parsed CURIEs to references (even external ones)
|
|
119
|
-
reference_it = (
|
|
120
|
-
Reference(
|
|
121
|
-
prefix=prefix,
|
|
122
|
-
identifier=bioregistry.standardize_identifier(prefix, identifier),
|
|
123
|
-
# if name isn't available, it means its external to this ontology
|
|
124
|
-
name=data.get("name"),
|
|
180
|
+
raise ValueError(
|
|
181
|
+
f"[{ontology_prefix}] slashes not allowed in data versions because of filesystem usage: {data_version}"
|
|
125
182
|
)
|
|
126
|
-
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
183
|
+
|
|
184
|
+
missing_typedefs: set[ReferenceTuple] = set()
|
|
185
|
+
|
|
186
|
+
subset_typedefs = _get_subsetdefs(graph.graph, ontology_prefix=ontology_prefix)
|
|
187
|
+
|
|
188
|
+
root_terms: list[Reference] = []
|
|
189
|
+
property_values: list[Annotation] = []
|
|
190
|
+
for ann in iterate_node_properties(
|
|
191
|
+
graph.graph,
|
|
192
|
+
ontology_prefix=ontology_prefix,
|
|
193
|
+
upgrade=upgrade,
|
|
194
|
+
node=Reference(prefix="obo", identifier=ontology_prefix),
|
|
195
|
+
strict=strict,
|
|
196
|
+
context="graph property",
|
|
197
|
+
):
|
|
198
|
+
if ann.predicate.pair == has_ontology_root_term.pair:
|
|
199
|
+
match ann.value:
|
|
200
|
+
case OBOLiteral():
|
|
201
|
+
logger.warning(
|
|
202
|
+
"[%s] tried to use a literal as an ontology root: %s",
|
|
203
|
+
ontology_prefix,
|
|
204
|
+
ann.value.value,
|
|
205
|
+
)
|
|
206
|
+
continue
|
|
207
|
+
case Reference():
|
|
208
|
+
root_terms.append(ann.value)
|
|
209
|
+
else:
|
|
210
|
+
property_values.append(ann)
|
|
211
|
+
|
|
212
|
+
for remark in graph.graph.get("remark", []):
|
|
213
|
+
property_values.append(Annotation(has_comment.reference, OBOLiteral.string(remark)))
|
|
214
|
+
|
|
215
|
+
idspaces: dict[str, str] = {}
|
|
216
|
+
for x in graph.graph.get("idspace", []):
|
|
217
|
+
prefix, uri_prefix, *_ = (y.strip() for y in x.split(" ", 2))
|
|
218
|
+
idspaces[prefix] = uri_prefix
|
|
131
219
|
|
|
132
220
|
#: CURIEs to typedefs
|
|
133
|
-
typedefs: Mapping[
|
|
134
|
-
typedef.pair: typedef
|
|
221
|
+
typedefs: Mapping[ReferenceTuple, TypeDef] = {
|
|
222
|
+
typedef.pair: typedef
|
|
223
|
+
for typedef in iterate_typedefs(
|
|
224
|
+
graph,
|
|
225
|
+
ontology_prefix=ontology_prefix,
|
|
226
|
+
strict=strict,
|
|
227
|
+
upgrade=upgrade,
|
|
228
|
+
macro_config=macro_config,
|
|
229
|
+
)
|
|
135
230
|
}
|
|
136
231
|
|
|
137
|
-
synonym_typedefs: Mapping[
|
|
138
|
-
synonym_typedef.
|
|
139
|
-
for synonym_typedef in iterate_graph_synonym_typedefs(
|
|
232
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] = {
|
|
233
|
+
synonym_typedef.pair: synonym_typedef
|
|
234
|
+
for synonym_typedef in iterate_graph_synonym_typedefs(
|
|
235
|
+
graph,
|
|
236
|
+
ontology_prefix=ontology_prefix,
|
|
237
|
+
strict=strict,
|
|
238
|
+
upgrade=upgrade,
|
|
239
|
+
)
|
|
140
240
|
}
|
|
141
241
|
|
|
142
|
-
|
|
242
|
+
terms = _get_terms(
|
|
243
|
+
graph,
|
|
244
|
+
strict=strict,
|
|
245
|
+
ontology_prefix=ontology_prefix,
|
|
246
|
+
upgrade=upgrade,
|
|
247
|
+
typedefs=typedefs,
|
|
248
|
+
missing_typedefs=missing_typedefs,
|
|
249
|
+
synonym_typedefs=synonym_typedefs,
|
|
250
|
+
subset_typedefs=subset_typedefs,
|
|
251
|
+
macro_config=macro_config,
|
|
252
|
+
use_tqdm=use_tqdm,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
return make_ad_hoc_ontology(
|
|
256
|
+
_ontology=ontology_prefix,
|
|
257
|
+
_name=name,
|
|
258
|
+
_auto_generated_by=graph.graph.get("auto-generated-by"),
|
|
259
|
+
_typedefs=list(typedefs.values()),
|
|
260
|
+
_synonym_typedefs=list(synonym_typedefs.values()),
|
|
261
|
+
_date=date,
|
|
262
|
+
_data_version=data_version,
|
|
263
|
+
_root_terms=root_terms,
|
|
264
|
+
terms=terms,
|
|
265
|
+
_property_values=property_values,
|
|
266
|
+
_subsetdefs=subset_typedefs,
|
|
267
|
+
_imports=imports,
|
|
268
|
+
_idspaces=idspaces,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _get_terms(
|
|
273
|
+
graph,
|
|
274
|
+
*,
|
|
275
|
+
strict: bool,
|
|
276
|
+
ontology_prefix: str,
|
|
277
|
+
upgrade: bool,
|
|
278
|
+
typedefs: Mapping[ReferenceTuple, TypeDef],
|
|
279
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
|
|
280
|
+
subset_typedefs,
|
|
281
|
+
missing_typedefs: set[ReferenceTuple],
|
|
282
|
+
macro_config: MacroConfig,
|
|
283
|
+
use_tqdm: bool = False,
|
|
284
|
+
) -> list[Term]:
|
|
143
285
|
terms = []
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
286
|
+
for reference, data in _iter_obo_graph(
|
|
287
|
+
graph=graph,
|
|
288
|
+
strict=strict,
|
|
289
|
+
ontology_prefix=ontology_prefix,
|
|
290
|
+
use_tqdm=use_tqdm,
|
|
291
|
+
upgrade=upgrade,
|
|
292
|
+
):
|
|
293
|
+
if reference.prefix != ontology_prefix:
|
|
294
|
+
continue
|
|
295
|
+
if not data:
|
|
296
|
+
# this allows us to skip anything that isn't really defined
|
|
297
|
+
# caveat: this misses terms that are just defined with an ID
|
|
147
298
|
continue
|
|
148
299
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
raise e
|
|
157
|
-
xrefs, provenance = [], []
|
|
158
|
-
for node_xref in node_xrefs:
|
|
159
|
-
if node_xref.prefix in PROVENANCE_PREFIXES:
|
|
160
|
-
provenance.append(node_xref)
|
|
161
|
-
else:
|
|
162
|
-
xrefs.append(node_xref)
|
|
163
|
-
n_xrefs += len(xrefs)
|
|
300
|
+
term = Term(
|
|
301
|
+
reference=reference,
|
|
302
|
+
builtin=_get_boolean(data, "builtin"),
|
|
303
|
+
is_anonymous=_get_boolean(data, "is_anonymous"),
|
|
304
|
+
is_obsolete=_get_boolean(data, "is_obsolete"),
|
|
305
|
+
namespace=data.get("namespace"),
|
|
306
|
+
)
|
|
164
307
|
|
|
165
|
-
|
|
166
|
-
|
|
308
|
+
_process_alts(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
309
|
+
_process_parents(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
310
|
+
_process_synonyms(
|
|
311
|
+
term,
|
|
312
|
+
data,
|
|
313
|
+
ontology_prefix=ontology_prefix,
|
|
314
|
+
strict=strict,
|
|
315
|
+
upgrade=upgrade,
|
|
316
|
+
synonym_typedefs=synonym_typedefs,
|
|
317
|
+
)
|
|
318
|
+
_process_xrefs(
|
|
319
|
+
term,
|
|
320
|
+
data,
|
|
321
|
+
ontology_prefix=ontology_prefix,
|
|
322
|
+
strict=strict,
|
|
323
|
+
macro_config=macro_config,
|
|
324
|
+
upgrade=upgrade,
|
|
325
|
+
)
|
|
326
|
+
_process_properties(
|
|
327
|
+
term,
|
|
328
|
+
data,
|
|
329
|
+
ontology_prefix=ontology_prefix,
|
|
330
|
+
strict=strict,
|
|
331
|
+
upgrade=upgrade,
|
|
332
|
+
typedefs=typedefs,
|
|
167
333
|
)
|
|
168
|
-
|
|
169
|
-
|
|
334
|
+
_process_relations(
|
|
335
|
+
term,
|
|
336
|
+
data,
|
|
337
|
+
ontology_prefix=ontology_prefix,
|
|
338
|
+
strict=strict,
|
|
339
|
+
upgrade=upgrade,
|
|
340
|
+
typedefs=typedefs,
|
|
341
|
+
missing_typedefs=missing_typedefs,
|
|
342
|
+
)
|
|
343
|
+
_process_replaced_by(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
344
|
+
_process_subsets(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
345
|
+
_process_intersection_of(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
346
|
+
_process_union_of(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
347
|
+
_process_equivalent_to(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
348
|
+
_process_disjoint_from(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
349
|
+
_process_consider(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
350
|
+
_process_comment(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
351
|
+
_process_description(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
352
|
+
_process_creation_date(term, data)
|
|
170
353
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
except MissingPrefixError as e:
|
|
174
|
-
e.reference = reference
|
|
175
|
-
raise e
|
|
176
|
-
n_alt_ids += len(alt_ids)
|
|
354
|
+
terms.append(term)
|
|
355
|
+
return terms
|
|
177
356
|
|
|
178
|
-
try:
|
|
179
|
-
parents = list(
|
|
180
|
-
iterate_node_parents(
|
|
181
|
-
data,
|
|
182
|
-
prefix=prefix,
|
|
183
|
-
identifier=identifier,
|
|
184
|
-
strict=strict,
|
|
185
|
-
)
|
|
186
|
-
)
|
|
187
|
-
except MissingPrefixError as e:
|
|
188
|
-
e.reference = reference
|
|
189
|
-
raise e
|
|
190
|
-
n_parents += len(parents)
|
|
191
357
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
358
|
+
def _process_description(term: Stanza, data, *, ontology_prefix: str, strict: bool):
|
|
359
|
+
definition, definition_references = get_definition(
|
|
360
|
+
data, node=term.reference, strict=strict, ontology_prefix=ontology_prefix
|
|
361
|
+
)
|
|
362
|
+
term.definition = definition
|
|
363
|
+
if term.definition:
|
|
364
|
+
for definition_reference in definition_references:
|
|
365
|
+
term._append_annotation(
|
|
366
|
+
v.has_description,
|
|
367
|
+
OBOLiteral.string(term.definition),
|
|
368
|
+
Annotation(v.has_dbxref, definition_reference),
|
|
199
369
|
)
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _process_comment(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
373
|
+
if comment := data.get("comment"):
|
|
374
|
+
term.append_comment(comment)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def _process_creation_date(term: Stanza, data) -> None:
|
|
378
|
+
date_str = data.get("creation_date")
|
|
379
|
+
if not date_str:
|
|
380
|
+
return
|
|
381
|
+
if isinstance(date_str, list):
|
|
382
|
+
date_str = date_str[0]
|
|
383
|
+
try:
|
|
384
|
+
term.append_creation_date(date_str)
|
|
385
|
+
except ValueError:
|
|
386
|
+
logger.warning("[%s] failed to parse creation_date: %s", term.reference.curie, date_str)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _process_union_of(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
390
|
+
for reference in iterate_node_reference_tag(
|
|
391
|
+
"union_of", data=data, ontology_prefix=ontology_prefix, strict=strict, node=term.reference
|
|
392
|
+
):
|
|
393
|
+
term.append_union_of(reference)
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def _process_equivalent_to(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
397
|
+
for reference in iterate_node_reference_tag(
|
|
398
|
+
"equivalent_to",
|
|
399
|
+
data=data,
|
|
400
|
+
ontology_prefix=ontology_prefix,
|
|
401
|
+
strict=strict,
|
|
402
|
+
node=term.reference,
|
|
403
|
+
):
|
|
404
|
+
term.append_equivalent_to(reference)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _process_disjoint_from(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
408
|
+
for reference in iterate_node_reference_tag(
|
|
409
|
+
"disjoint_from",
|
|
410
|
+
data=data,
|
|
411
|
+
ontology_prefix=ontology_prefix,
|
|
412
|
+
strict=strict,
|
|
413
|
+
node=term.reference,
|
|
414
|
+
):
|
|
415
|
+
term.append_disjoint_from(reference)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def _process_alts(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
419
|
+
for alt_reference in iterate_node_reference_tag(
|
|
420
|
+
"alt_id", data, node=term.reference, strict=strict, ontology_prefix=ontology_prefix
|
|
421
|
+
):
|
|
422
|
+
term.append_alt(alt_reference)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def _process_parents(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
426
|
+
for tag in ["is_a", "instance_of"]:
|
|
427
|
+
for parent in iterate_node_reference_tag(
|
|
428
|
+
tag, data, node=term.reference, strict=strict, ontology_prefix=ontology_prefix
|
|
429
|
+
):
|
|
430
|
+
term.append_parent(parent)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def _process_synonyms(
|
|
434
|
+
term: Stanza,
|
|
435
|
+
data,
|
|
436
|
+
*,
|
|
437
|
+
ontology_prefix: str,
|
|
438
|
+
strict: bool,
|
|
439
|
+
upgrade: bool,
|
|
440
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
|
|
441
|
+
) -> None:
|
|
442
|
+
synonyms = list(
|
|
443
|
+
iterate_node_synonyms(
|
|
444
|
+
data,
|
|
445
|
+
synonym_typedefs,
|
|
446
|
+
node=term.reference,
|
|
447
|
+
strict=strict,
|
|
448
|
+
ontology_prefix=ontology_prefix,
|
|
449
|
+
upgrade=upgrade,
|
|
200
450
|
)
|
|
201
|
-
|
|
451
|
+
)
|
|
452
|
+
for synonym in synonyms:
|
|
453
|
+
term.append_synonym(synonym)
|
|
202
454
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
455
|
+
|
|
456
|
+
def _process_xrefs(
|
|
457
|
+
term: Stanza,
|
|
458
|
+
data,
|
|
459
|
+
*,
|
|
460
|
+
ontology_prefix: str,
|
|
461
|
+
strict: bool,
|
|
462
|
+
macro_config: MacroConfig,
|
|
463
|
+
upgrade: bool,
|
|
464
|
+
) -> None:
|
|
465
|
+
for reference, provenance in iterate_node_xrefs(
|
|
466
|
+
data=data,
|
|
467
|
+
strict=strict,
|
|
468
|
+
ontology_prefix=ontology_prefix,
|
|
469
|
+
node=term.reference,
|
|
470
|
+
upgrade=upgrade,
|
|
471
|
+
):
|
|
472
|
+
_handle_xref(term, reference, provenance=provenance, macro_config=macro_config)
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def _process_properties(
|
|
476
|
+
term: Stanza, data, *, ontology_prefix: str, strict: bool, upgrade: bool, typedefs
|
|
477
|
+
) -> None:
|
|
478
|
+
for ann in iterate_node_properties(
|
|
479
|
+
data,
|
|
480
|
+
node=term.reference,
|
|
481
|
+
strict=strict,
|
|
482
|
+
ontology_prefix=ontology_prefix,
|
|
483
|
+
upgrade=upgrade,
|
|
484
|
+
context="stanza property",
|
|
485
|
+
):
|
|
486
|
+
# TODO parse axioms
|
|
487
|
+
term.append_property(ann)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _process_relations(
|
|
491
|
+
term: Stanza,
|
|
492
|
+
data,
|
|
493
|
+
*,
|
|
494
|
+
ontology_prefix: str,
|
|
495
|
+
strict: bool,
|
|
496
|
+
upgrade: bool,
|
|
497
|
+
typedefs: Mapping[ReferenceTuple, TypeDef],
|
|
498
|
+
missing_typedefs: set[ReferenceTuple],
|
|
499
|
+
) -> None:
|
|
500
|
+
relations_references = list(
|
|
501
|
+
iterate_node_relationships(
|
|
502
|
+
data,
|
|
503
|
+
node=term.reference,
|
|
504
|
+
strict=strict,
|
|
505
|
+
ontology_prefix=ontology_prefix,
|
|
506
|
+
upgrade=upgrade,
|
|
211
507
|
)
|
|
508
|
+
)
|
|
509
|
+
for relation, reference in relations_references:
|
|
510
|
+
if (
|
|
511
|
+
relation.pair not in typedefs
|
|
512
|
+
and relation.pair not in default_typedefs
|
|
513
|
+
and relation.pair not in missing_typedefs
|
|
514
|
+
):
|
|
515
|
+
missing_typedefs.add(relation.pair)
|
|
516
|
+
logger.warning("[%s] has no typedef for %s", ontology_prefix, relation.curie)
|
|
517
|
+
logger.debug("[%s] available typedefs: %s", ontology_prefix, set(typedefs))
|
|
518
|
+
# TODO parse axioms
|
|
519
|
+
term.append_relationship(relation, reference)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _process_replaced_by(stanza: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
523
|
+
for reference in iterate_node_reference_tag(
|
|
524
|
+
"replaced_by", data, node=stanza.reference, strict=strict, ontology_prefix=ontology_prefix
|
|
525
|
+
):
|
|
526
|
+
stanza.append_replaced_by(reference)
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def _process_subsets(stanza: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
530
|
+
for reference in iterate_node_reference_tag(
|
|
531
|
+
"subset",
|
|
532
|
+
data,
|
|
533
|
+
node=stanza.reference,
|
|
534
|
+
strict=strict,
|
|
535
|
+
ontology_prefix=ontology_prefix,
|
|
536
|
+
counter=SUBSET_ERROR_COUNTER,
|
|
537
|
+
):
|
|
538
|
+
stanza.append_subset(reference)
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def _get_boolean(data: Mapping[str, Any], tag: str) -> bool | None:
|
|
542
|
+
value = data.get(tag)
|
|
543
|
+
if value is None:
|
|
544
|
+
return None
|
|
545
|
+
if isinstance(value, list):
|
|
546
|
+
value = value[0]
|
|
547
|
+
if value == "false":
|
|
548
|
+
return False
|
|
549
|
+
if value == "true":
|
|
550
|
+
return True
|
|
551
|
+
raise ValueError(value)
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def _get_reference(
|
|
555
|
+
data: Mapping[str, Any], tag: str, *, ontology_prefix: str, strict: bool, **kwargs
|
|
556
|
+
) -> Reference | None:
|
|
557
|
+
value = data.get(tag)
|
|
558
|
+
if value is None:
|
|
559
|
+
return None
|
|
560
|
+
if isinstance(value, list):
|
|
561
|
+
value = value[0]
|
|
562
|
+
return _obo_parse_identifier(
|
|
563
|
+
value, ontology_prefix=ontology_prefix, strict=strict, context=tag, **kwargs
|
|
564
|
+
)
|
|
212
565
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
566
|
+
|
|
567
|
+
class MacroConfig:
|
|
568
|
+
"""A configuration data class for reader macros."""
|
|
569
|
+
|
|
570
|
+
def __init__(
|
|
571
|
+
self, data: Mapping[str, list[str]] | None = None, *, strict: bool, ontology_prefix: str
|
|
572
|
+
):
|
|
573
|
+
"""Instantiate the configuration from obonet graph metadata."""
|
|
574
|
+
if data is None:
|
|
575
|
+
data = {}
|
|
576
|
+
|
|
577
|
+
self.treat_xrefs_as_equivalent: set[str] = set()
|
|
578
|
+
for prefix in data.get("treat-xrefs-as-equivalent", []):
|
|
579
|
+
prefix_norm = bioregistry.normalize_prefix(prefix)
|
|
580
|
+
if prefix_norm is None:
|
|
581
|
+
continue
|
|
582
|
+
self.treat_xrefs_as_equivalent.add(prefix_norm)
|
|
583
|
+
|
|
584
|
+
self.treat_xrefs_as_genus_differentia: dict[str, tuple[Reference, Reference]] = {}
|
|
585
|
+
for line in data.get("treat-xrefs-as-genus-differentia", []):
|
|
586
|
+
try:
|
|
587
|
+
gd_prefix, gd_predicate, gd_target = line.split()
|
|
588
|
+
except ValueError:
|
|
589
|
+
# this happens in `plana`, where there's an incorrectly written
|
|
590
|
+
# line `CARO part_of NCBITaxon:79327; CL part_of NCBITaxon:79327`
|
|
591
|
+
tqdm.write(
|
|
592
|
+
f"[{ontology_prefix}] failed to parse treat-xrefs-as-genus-differentia: {line}"
|
|
220
593
|
)
|
|
594
|
+
continue
|
|
595
|
+
|
|
596
|
+
gd_prefix_norm = bioregistry.normalize_prefix(gd_prefix)
|
|
597
|
+
if gd_prefix_norm is None:
|
|
598
|
+
continue
|
|
599
|
+
gd_predicate_re = _obo_parse_identifier(
|
|
600
|
+
gd_predicate, ontology_prefix=ontology_prefix, strict=strict
|
|
221
601
|
)
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
602
|
+
if gd_predicate_re is None:
|
|
603
|
+
continue
|
|
604
|
+
gd_target_re = _obo_parse_identifier(
|
|
605
|
+
gd_target, ontology_prefix=ontology_prefix, strict=strict
|
|
606
|
+
)
|
|
607
|
+
if gd_target_re is None:
|
|
608
|
+
continue
|
|
609
|
+
self.treat_xrefs_as_genus_differentia[gd_prefix_norm] = (gd_predicate_re, gd_target_re)
|
|
610
|
+
|
|
611
|
+
self.treat_xrefs_as_relationship: dict[str, Reference] = {}
|
|
612
|
+
for line in data.get("treat-xrefs-as-relationship", []):
|
|
613
|
+
try:
|
|
614
|
+
gd_prefix, gd_predicate = line.split()
|
|
615
|
+
except ValueError:
|
|
616
|
+
tqdm.write(
|
|
617
|
+
f"[{ontology_prefix}] failed to parse treat-xrefs-as-relationship: {line}"
|
|
618
|
+
)
|
|
235
619
|
continue
|
|
236
|
-
n_relations += 1
|
|
237
|
-
term.append_relationship(typedef, reference)
|
|
238
|
-
for prop, value in iterate_node_properties(data, term=term):
|
|
239
|
-
n_properties += 1
|
|
240
|
-
term.append_property(prop, value)
|
|
241
|
-
terms.append(term)
|
|
242
620
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
621
|
+
gd_prefix_norm = bioregistry.normalize_prefix(gd_prefix)
|
|
622
|
+
if gd_prefix_norm is None:
|
|
623
|
+
continue
|
|
624
|
+
gd_predicate_re = _obo_parse_identifier(
|
|
625
|
+
gd_predicate, ontology_prefix=ontology_prefix, strict=strict
|
|
626
|
+
)
|
|
627
|
+
if gd_predicate_re is None:
|
|
628
|
+
continue
|
|
629
|
+
self.treat_xrefs_as_relationship[gd_prefix_norm] = gd_predicate_re
|
|
248
630
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
631
|
+
self.treat_xrefs_as_is_a: set[str] = set()
|
|
632
|
+
for prefix in data.get("treat-xrefs-as-is_a", []):
|
|
633
|
+
gd_prefix_norm = bioregistry.normalize_prefix(prefix)
|
|
634
|
+
if gd_prefix_norm is None:
|
|
635
|
+
continue
|
|
636
|
+
self.treat_xrefs_as_is_a.add(gd_prefix_norm)
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
def _handle_xref(
|
|
640
|
+
term: Stanza,
|
|
641
|
+
xref: Reference,
|
|
642
|
+
*,
|
|
643
|
+
provenance: list[Reference | OBOLiteral],
|
|
644
|
+
macro_config: MacroConfig | None = None,
|
|
645
|
+
) -> Stanza:
|
|
646
|
+
annotations = [Annotation(v.has_dbxref, p) for p in provenance]
|
|
647
|
+
|
|
648
|
+
if macro_config is not None:
|
|
649
|
+
if xref.prefix in macro_config.treat_xrefs_as_equivalent:
|
|
650
|
+
return term.append_equivalent(xref, annotations=annotations)
|
|
651
|
+
elif object_property := macro_config.treat_xrefs_as_genus_differentia.get(xref.prefix):
|
|
652
|
+
# TODO how to add annotations here?
|
|
653
|
+
if annotations:
|
|
654
|
+
logger.warning(
|
|
655
|
+
"[%s] unable to add provenance to xref upgraded to intersection_of: %s",
|
|
656
|
+
term.reference.curie,
|
|
657
|
+
xref,
|
|
658
|
+
)
|
|
659
|
+
return term.append_intersection_of(xref).append_intersection_of(object_property)
|
|
660
|
+
elif predicate := macro_config.treat_xrefs_as_relationship.get(xref.prefix):
|
|
661
|
+
return term.append_relationship(predicate, xref, annotations=annotations)
|
|
662
|
+
elif xref.prefix in macro_config.treat_xrefs_as_is_a:
|
|
663
|
+
return term.append_parent(xref, annotations=annotations)
|
|
664
|
+
|
|
665
|
+
# TODO this is not what spec calls for, maybe
|
|
666
|
+
# need a flag in macro config for this
|
|
667
|
+
if xref.prefix in PROVENANCE_PREFIXES:
|
|
668
|
+
return term.append_provenance(xref, annotations=annotations)
|
|
669
|
+
|
|
670
|
+
return term.append_xref(xref, annotations=annotations)
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
SUBSET_ERROR_COUNTER: Counter[tuple[str, str]] = Counter()
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def _get_subsetdefs(graph: nx.MultiDiGraph, ontology_prefix: str) -> list[tuple[Reference, str]]:
|
|
677
|
+
rv = []
|
|
678
|
+
for subsetdef in graph.get("subsetdef", []):
|
|
679
|
+
left, _, right = subsetdef.partition(" ")
|
|
680
|
+
if not right:
|
|
681
|
+
logger.warning("[%s] subsetdef did not have two parts", ontology_prefix, subsetdef)
|
|
682
|
+
continue
|
|
683
|
+
left_ref = _obo_parse_identifier(
|
|
684
|
+
left,
|
|
685
|
+
ontology_prefix=ontology_prefix,
|
|
686
|
+
name=right,
|
|
687
|
+
line=subsetdef,
|
|
688
|
+
counter=SUBSET_ERROR_COUNTER,
|
|
689
|
+
)
|
|
690
|
+
if left_ref is None:
|
|
691
|
+
continue
|
|
692
|
+
right = right.strip('"')
|
|
693
|
+
rv.append((left_ref, right))
|
|
694
|
+
return rv
|
|
260
695
|
|
|
261
696
|
|
|
262
697
|
def _clean_graph_ontology(graph, prefix: str) -> None:
|
|
263
698
|
"""Update the ontology entry in the graph's metadata, if necessary."""
|
|
264
699
|
if "ontology" not in graph.graph:
|
|
265
|
-
logger.
|
|
700
|
+
logger.debug('[%s] missing "ontology" key', prefix)
|
|
266
701
|
graph.graph["ontology"] = prefix
|
|
267
702
|
elif not graph.graph["ontology"].isalpha():
|
|
268
|
-
logger.
|
|
269
|
-
"[%s] ontology
|
|
703
|
+
logger.debug(
|
|
704
|
+
"[%s] ontology prefix `%s` has a strange format. replacing with prefix",
|
|
270
705
|
prefix,
|
|
271
706
|
graph.graph["ontology"],
|
|
272
707
|
)
|
|
273
708
|
graph.graph["ontology"] = prefix
|
|
274
709
|
|
|
275
710
|
|
|
711
|
+
def _clean_graph_version(
|
|
712
|
+
graph, ontology_prefix: str, version: str | None, date: datetime | None
|
|
713
|
+
) -> str | None:
|
|
714
|
+
if ontology_prefix in STATIC_VERSION_REWRITES:
|
|
715
|
+
return STATIC_VERSION_REWRITES[ontology_prefix]
|
|
716
|
+
|
|
717
|
+
data_version: str | None = graph.graph.get("data-version") or None
|
|
718
|
+
if version:
|
|
719
|
+
clean_injected_version = cleanup_version(version, prefix=ontology_prefix)
|
|
720
|
+
if not data_version:
|
|
721
|
+
logger.debug(
|
|
722
|
+
"[%s] did not have a version, overriding with %s",
|
|
723
|
+
ontology_prefix,
|
|
724
|
+
clean_injected_version,
|
|
725
|
+
)
|
|
726
|
+
return clean_injected_version
|
|
727
|
+
|
|
728
|
+
clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
|
|
729
|
+
if clean_data_version != clean_injected_version:
|
|
730
|
+
# in this case, we're going to trust the one that's passed
|
|
731
|
+
# through explicitly more than the graph's content
|
|
732
|
+
logger.debug(
|
|
733
|
+
"[%s] had version %s, overriding with %s", ontology_prefix, data_version, version
|
|
734
|
+
)
|
|
735
|
+
return clean_injected_version
|
|
736
|
+
|
|
737
|
+
if data_version:
|
|
738
|
+
clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
|
|
739
|
+
logger.debug("[%s] using version %s", ontology_prefix, clean_data_version)
|
|
740
|
+
return clean_data_version
|
|
741
|
+
|
|
742
|
+
if date is not None:
|
|
743
|
+
derived_date_version = date.strftime("%Y-%m-%d")
|
|
744
|
+
logger.debug(
|
|
745
|
+
"[%s] does not report a version. falling back to date: %s",
|
|
746
|
+
ontology_prefix,
|
|
747
|
+
derived_date_version,
|
|
748
|
+
)
|
|
749
|
+
return derived_date_version
|
|
750
|
+
|
|
751
|
+
logger.debug("[%s] does not report a version nor a date", ontology_prefix)
|
|
752
|
+
return None
|
|
753
|
+
|
|
754
|
+
|
|
276
755
|
def _iter_obo_graph(
|
|
277
756
|
graph: nx.MultiDiGraph,
|
|
278
757
|
*,
|
|
279
|
-
strict: bool =
|
|
280
|
-
|
|
758
|
+
strict: bool = False,
|
|
759
|
+
ontology_prefix: str,
|
|
760
|
+
use_tqdm: bool = False,
|
|
761
|
+
upgrade: bool,
|
|
762
|
+
) -> Iterable[tuple[Reference, Mapping[str, Any]]]:
|
|
281
763
|
"""Iterate over the nodes in the graph with the prefix stripped (if it's there)."""
|
|
282
|
-
for node, data in
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
764
|
+
for node, data in tqdm(
|
|
765
|
+
graph.nodes(data=True), disable=not use_tqdm, unit_scale=True, desc=f"[{ontology_prefix}]"
|
|
766
|
+
):
|
|
767
|
+
name = data.get("name")
|
|
768
|
+
match _parse_str_or_curie_or_uri_helper(
|
|
769
|
+
node,
|
|
770
|
+
ontology_prefix=ontology_prefix,
|
|
771
|
+
name=name,
|
|
772
|
+
upgrade=upgrade,
|
|
773
|
+
context="stanza ID",
|
|
774
|
+
):
|
|
775
|
+
case Reference() as reference:
|
|
776
|
+
yield reference, data
|
|
777
|
+
case NotCURIEError() as exc:
|
|
778
|
+
if _is_valid_identifier(node):
|
|
779
|
+
yield default_reference(ontology_prefix, node, name=name), data
|
|
780
|
+
elif strict:
|
|
781
|
+
raise exc
|
|
782
|
+
else:
|
|
783
|
+
logger.warning(str(exc))
|
|
784
|
+
case ParseError() as exc:
|
|
785
|
+
if strict:
|
|
786
|
+
raise exc
|
|
787
|
+
else:
|
|
788
|
+
logger.warning(str(exc))
|
|
789
|
+
# if blacklisted, just skip it with no warning
|
|
790
|
+
|
|
791
|
+
|
|
792
|
+
def _get_date(graph, ontology_prefix: str) -> datetime | None:
|
|
290
793
|
try:
|
|
291
794
|
rv = datetime.strptime(graph.graph["date"], DATE_FORMAT)
|
|
292
795
|
except KeyError:
|
|
293
|
-
logger.info("[%s] does not report a date",
|
|
796
|
+
logger.info("[%s] does not report a date", ontology_prefix)
|
|
294
797
|
return None
|
|
295
798
|
except ValueError:
|
|
296
|
-
logger.info(
|
|
799
|
+
logger.info(
|
|
800
|
+
"[%s] reports a date that can't be parsed: %s", ontology_prefix, graph.graph["date"]
|
|
801
|
+
)
|
|
297
802
|
return None
|
|
298
803
|
else:
|
|
299
804
|
return rv
|
|
300
805
|
|
|
301
806
|
|
|
302
|
-
def _get_name(graph,
|
|
807
|
+
def _get_name(graph, ontology_prefix: str) -> str:
|
|
303
808
|
try:
|
|
304
809
|
rv = graph.graph["name"]
|
|
305
810
|
except KeyError:
|
|
306
|
-
logger.info("[%s] does not report a name",
|
|
307
|
-
rv =
|
|
811
|
+
logger.info("[%s] does not report a name", ontology_prefix)
|
|
812
|
+
rv = ontology_prefix
|
|
308
813
|
return rv
|
|
309
814
|
|
|
310
815
|
|
|
311
816
|
def iterate_graph_synonym_typedefs(
|
|
312
|
-
graph: nx.MultiDiGraph, *,
|
|
817
|
+
graph: nx.MultiDiGraph, *, ontology_prefix: str, strict: bool = False, upgrade: bool
|
|
313
818
|
) -> Iterable[SynonymTypeDef]:
|
|
314
819
|
"""Get synonym type definitions from an :mod:`obonet` graph."""
|
|
315
|
-
for
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
else: # assume it's a curie
|
|
323
|
-
reference = Reference.from_curie(sid, name=name, strict=strict)
|
|
324
|
-
|
|
325
|
-
if reference is None:
|
|
820
|
+
for line in graph.graph.get("synonymtypedef", []):
|
|
821
|
+
# TODO handle trailing comments
|
|
822
|
+
line, _, specificity = (x.strip() for x in line.rpartition('"'))
|
|
823
|
+
specificity = specificity.upper()
|
|
824
|
+
if not specificity:
|
|
825
|
+
specificity = None
|
|
826
|
+
elif specificity not in t.get_args(SynonymScope):
|
|
326
827
|
if strict:
|
|
327
|
-
raise ValueError(f"
|
|
328
|
-
|
|
329
|
-
|
|
828
|
+
raise ValueError(f"invalid synonym specificty: {specificity}")
|
|
829
|
+
logger.warning("[%s] invalid synonym specificty: %s", ontology_prefix, specificity)
|
|
830
|
+
specificity = None
|
|
330
831
|
|
|
331
|
-
|
|
832
|
+
curie, name = line.split(" ", 1)
|
|
833
|
+
# the name should be in quotes, so strip them out
|
|
834
|
+
name = name.strip().strip('"')
|
|
835
|
+
# TODO unquote the string?
|
|
836
|
+
reference = _obo_parse_identifier(
|
|
837
|
+
curie,
|
|
838
|
+
ontology_prefix=ontology_prefix,
|
|
839
|
+
name=name,
|
|
840
|
+
upgrade=upgrade,
|
|
841
|
+
strict=strict,
|
|
842
|
+
)
|
|
843
|
+
if reference is None:
|
|
844
|
+
logger.warning("[%s] unable to parse synonym typedef ID %s", ontology_prefix, curie)
|
|
845
|
+
continue
|
|
846
|
+
yield SynonymTypeDef(reference=reference, specificity=specificity)
|
|
332
847
|
|
|
333
848
|
|
|
334
|
-
def
|
|
335
|
-
graph: nx.MultiDiGraph,
|
|
849
|
+
def iterate_typedefs(
|
|
850
|
+
graph: nx.MultiDiGraph,
|
|
851
|
+
*,
|
|
852
|
+
ontology_prefix: str,
|
|
853
|
+
strict: bool = False,
|
|
854
|
+
upgrade: bool,
|
|
855
|
+
macro_config: MacroConfig | None = None,
|
|
336
856
|
) -> Iterable[TypeDef]:
|
|
337
857
|
"""Get type definitions from an :mod:`obonet` graph."""
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
858
|
+
if macro_config is None:
|
|
859
|
+
macro_config = MacroConfig(strict=strict, ontology_prefix=ontology_prefix)
|
|
860
|
+
# can't really have a pre-defined set of synonym typedefs here!
|
|
861
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] = {}
|
|
862
|
+
typedefs: Mapping[ReferenceTuple, TypeDef] = {}
|
|
863
|
+
missing_typedefs: set[ReferenceTuple] = set()
|
|
864
|
+
for data in graph.graph.get("typedefs", []):
|
|
865
|
+
if "id" in data:
|
|
866
|
+
typedef_id = data["id"]
|
|
867
|
+
elif "identifier" in data:
|
|
868
|
+
typedef_id = data["identifier"]
|
|
343
869
|
else:
|
|
344
|
-
raise KeyError
|
|
870
|
+
raise KeyError("typedef is missing an `id`")
|
|
345
871
|
|
|
346
|
-
name =
|
|
872
|
+
name = data.get("name")
|
|
347
873
|
if name is None:
|
|
348
|
-
logger.debug("[%s] typedef %s is missing a name",
|
|
874
|
+
logger.debug("[%s] typedef %s is missing a name", ontology_prefix, typedef_id)
|
|
349
875
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
reference = Reference(prefix=graph.graph["ontology"], identifier=curie, name=name)
|
|
876
|
+
reference = _obo_parse_identifier(
|
|
877
|
+
typedef_id, strict=strict, ontology_prefix=ontology_prefix, name=name, upgrade=upgrade
|
|
878
|
+
)
|
|
354
879
|
if reference is None:
|
|
355
|
-
logger.warning("[%s] unable to parse typedef
|
|
880
|
+
logger.warning("[%s] unable to parse typedef ID %s", ontology_prefix, typedef_id)
|
|
356
881
|
continue
|
|
357
882
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
883
|
+
typedef = TypeDef(
|
|
884
|
+
reference=reference,
|
|
885
|
+
namespace=data.get("namespace"),
|
|
886
|
+
is_metadata_tag=_get_boolean(data, "is_metadata_tag"),
|
|
887
|
+
is_class_level=_get_boolean(data, "is_class_level"),
|
|
888
|
+
builtin=_get_boolean(data, "builtin"),
|
|
889
|
+
is_obsolete=_get_boolean(data, "is_obsolete"),
|
|
890
|
+
is_anonymous=_get_boolean(data, "is_anonymous"),
|
|
891
|
+
is_anti_symmetric=_get_boolean(data, "is_anti_symmetric"),
|
|
892
|
+
is_symmetric=_get_boolean(data, "is_symmetric"),
|
|
893
|
+
is_reflexive=_get_boolean(data, "is_reflexive"),
|
|
894
|
+
is_cyclic=_get_boolean(data, "is_cyclic"),
|
|
895
|
+
is_transitive=_get_boolean(data, "is_transitive"),
|
|
896
|
+
is_functional=_get_boolean(data, "is_functional"),
|
|
897
|
+
is_inverse_functional=_get_boolean(data, "is_inverse_functional"),
|
|
898
|
+
domain=_get_reference(data, "domain", ontology_prefix=ontology_prefix, strict=strict),
|
|
899
|
+
range=_get_reference(data, "range", ontology_prefix=ontology_prefix, strict=strict),
|
|
900
|
+
inverse=_get_reference(
|
|
901
|
+
data, "inverse_of", ontology_prefix=ontology_prefix, strict=strict
|
|
902
|
+
),
|
|
903
|
+
)
|
|
904
|
+
_process_alts(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
905
|
+
_process_parents(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
906
|
+
_process_synonyms(
|
|
907
|
+
typedef,
|
|
908
|
+
data,
|
|
909
|
+
ontology_prefix=ontology_prefix,
|
|
910
|
+
strict=strict,
|
|
911
|
+
upgrade=upgrade,
|
|
912
|
+
synonym_typedefs=synonym_typedefs,
|
|
913
|
+
)
|
|
914
|
+
_process_xrefs(
|
|
915
|
+
typedef,
|
|
916
|
+
data,
|
|
917
|
+
ontology_prefix=ontology_prefix,
|
|
918
|
+
strict=strict,
|
|
919
|
+
macro_config=macro_config,
|
|
920
|
+
upgrade=upgrade,
|
|
921
|
+
)
|
|
922
|
+
_process_properties(
|
|
923
|
+
typedef,
|
|
924
|
+
data,
|
|
925
|
+
ontology_prefix=ontology_prefix,
|
|
926
|
+
strict=strict,
|
|
927
|
+
upgrade=upgrade,
|
|
928
|
+
typedefs=typedefs,
|
|
929
|
+
)
|
|
930
|
+
_process_relations(
|
|
931
|
+
typedef,
|
|
932
|
+
data,
|
|
933
|
+
ontology_prefix=ontology_prefix,
|
|
934
|
+
strict=strict,
|
|
935
|
+
upgrade=upgrade,
|
|
936
|
+
typedefs=typedefs,
|
|
937
|
+
missing_typedefs=missing_typedefs,
|
|
938
|
+
)
|
|
939
|
+
_process_replaced_by(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
940
|
+
_process_subsets(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
941
|
+
_process_intersection_of(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
942
|
+
_process_union_of(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
943
|
+
_process_equivalent_to(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
944
|
+
_process_disjoint_from(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
945
|
+
_process_consider(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
946
|
+
_process_comment(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
947
|
+
_process_description(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
948
|
+
_process_creation_date(typedef, data)
|
|
949
|
+
|
|
950
|
+
# the next 4 are typedef-specific
|
|
951
|
+
_process_equivalent_to_chain(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
952
|
+
_process_holds_over_chain(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
953
|
+
typedef.disjoint_over.extend(
|
|
954
|
+
iterate_node_reference_tag(
|
|
955
|
+
"disjoint_over",
|
|
956
|
+
data,
|
|
957
|
+
node=typedef.reference,
|
|
958
|
+
ontology_prefix=ontology_prefix,
|
|
959
|
+
strict=strict,
|
|
960
|
+
)
|
|
961
|
+
)
|
|
962
|
+
typedef.transitive_over.extend(
|
|
963
|
+
iterate_node_reference_tag(
|
|
964
|
+
"transitive_over",
|
|
965
|
+
data,
|
|
966
|
+
node=typedef.reference,
|
|
967
|
+
ontology_prefix=ontology_prefix,
|
|
968
|
+
strict=strict,
|
|
969
|
+
)
|
|
970
|
+
)
|
|
971
|
+
|
|
972
|
+
yield typedef
|
|
973
|
+
|
|
974
|
+
|
|
975
|
+
def _process_consider(stanza: Stanza, data, *, ontology_prefix: str, strict: bool = False):
|
|
976
|
+
for reference in iterate_node_reference_tag(
|
|
977
|
+
"consider",
|
|
978
|
+
data,
|
|
979
|
+
node=stanza.reference,
|
|
980
|
+
ontology_prefix=ontology_prefix,
|
|
981
|
+
strict=strict,
|
|
982
|
+
):
|
|
983
|
+
stanza.append_see_also(reference)
|
|
984
|
+
|
|
985
|
+
|
|
986
|
+
def _process_equivalent_to_chain(
|
|
987
|
+
typedef: TypeDef, data, *, ontology_prefix: str, strict: bool = False
|
|
988
|
+
) -> None:
|
|
989
|
+
for chain in _iterate_chain(
|
|
990
|
+
"equivalent_to_chain", typedef, data, ontology_prefix=ontology_prefix, strict=strict
|
|
991
|
+
):
|
|
992
|
+
typedef.equivalent_to_chain.append(chain)
|
|
993
|
+
|
|
994
|
+
|
|
995
|
+
def _process_holds_over_chain(
|
|
996
|
+
typedef: TypeDef, data, *, ontology_prefix: str, strict: bool = False
|
|
997
|
+
) -> None:
|
|
998
|
+
for chain in _iterate_chain(
|
|
999
|
+
"holds_over_chain", typedef, data, ontology_prefix=ontology_prefix, strict=strict
|
|
1000
|
+
):
|
|
1001
|
+
typedef.holds_over_chain.append(chain)
|
|
1002
|
+
|
|
1003
|
+
|
|
1004
|
+
def _iterate_chain(
|
|
1005
|
+
tag: str, typedef: TypeDef, data, *, ontology_prefix: str, strict: bool = False
|
|
1006
|
+
) -> Iterable[list[Reference]]:
|
|
1007
|
+
for chain in data.get(tag, []):
|
|
1008
|
+
# chain is a list of CURIEs
|
|
1009
|
+
predicate_chain = _process_chain_helper(typedef, chain, ontology_prefix=ontology_prefix)
|
|
1010
|
+
if predicate_chain is None:
|
|
1011
|
+
logger.warning(
|
|
1012
|
+
"[%s - %s] could not parse line: %s: %s",
|
|
1013
|
+
ontology_prefix,
|
|
1014
|
+
typedef.curie,
|
|
1015
|
+
tag,
|
|
1016
|
+
chain,
|
|
1017
|
+
)
|
|
1018
|
+
else:
|
|
1019
|
+
yield predicate_chain
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
def _process_chain_helper(
|
|
1023
|
+
term: Stanza, chain: str, ontology_prefix: str, strict: bool = False
|
|
1024
|
+
) -> list[Reference] | None:
|
|
1025
|
+
rv = []
|
|
1026
|
+
for curie in chain.split():
|
|
1027
|
+
curie = curie.strip()
|
|
1028
|
+
r = _obo_parse_identifier(
|
|
1029
|
+
curie, ontology_prefix=ontology_prefix, strict=strict, node=term.reference
|
|
1030
|
+
)
|
|
1031
|
+
if r is None:
|
|
1032
|
+
return None
|
|
1033
|
+
rv.append(r)
|
|
1034
|
+
return rv
|
|
364
1035
|
|
|
365
1036
|
|
|
366
1037
|
def get_definition(
|
|
367
|
-
data, *,
|
|
368
|
-
) ->
|
|
1038
|
+
data, *, node: Reference, ontology_prefix: str, strict: bool = False
|
|
1039
|
+
) -> tuple[None | str, list[Reference | OBOLiteral]]:
|
|
369
1040
|
"""Extract the definition from the data."""
|
|
370
1041
|
definition = data.get("def") # it's allowed not to have a definition
|
|
371
1042
|
if not definition:
|
|
372
|
-
return None,
|
|
373
|
-
return _extract_definition(
|
|
1043
|
+
return None, []
|
|
1044
|
+
return _extract_definition(
|
|
1045
|
+
definition, node=node, strict=strict, ontology_prefix=ontology_prefix
|
|
1046
|
+
)
|
|
374
1047
|
|
|
375
1048
|
|
|
376
1049
|
def _extract_definition(
|
|
377
1050
|
s: str,
|
|
378
1051
|
*,
|
|
379
|
-
|
|
380
|
-
identifier: str,
|
|
1052
|
+
node: Reference,
|
|
381
1053
|
strict: bool = False,
|
|
382
|
-
|
|
1054
|
+
ontology_prefix: str,
|
|
1055
|
+
) -> tuple[None | str, list[Reference | OBOLiteral]]:
|
|
383
1056
|
"""Extract the definitions."""
|
|
384
1057
|
if not s.startswith('"'):
|
|
385
|
-
|
|
1058
|
+
logger.warning(f"[{node.curie}] definition does not start with a quote")
|
|
1059
|
+
return None, []
|
|
386
1060
|
|
|
387
1061
|
try:
|
|
388
1062
|
definition, rest = _quote_split(s)
|
|
389
|
-
except ValueError:
|
|
390
|
-
logger.warning("[%s
|
|
391
|
-
return None,
|
|
1063
|
+
except ValueError as e:
|
|
1064
|
+
logger.warning("[%s] failed to parse definition quotes: %s", node.curie, str(e))
|
|
1065
|
+
return None, []
|
|
392
1066
|
|
|
393
|
-
if not rest.startswith("[")
|
|
394
|
-
logger.
|
|
1067
|
+
if not rest.startswith("["):
|
|
1068
|
+
logger.debug("[%s] no square brackets for provenance on line: %s", node.curie, s)
|
|
395
1069
|
provenance = []
|
|
396
1070
|
else:
|
|
397
|
-
|
|
398
|
-
|
|
1071
|
+
rest = rest.lstrip("[").rstrip("]") # FIXME this doesn't account for trailing annotations
|
|
1072
|
+
provenance = _parse_provenance_list(
|
|
1073
|
+
rest,
|
|
1074
|
+
node=node,
|
|
1075
|
+
ontology_prefix=ontology_prefix,
|
|
1076
|
+
counter=DEFINITION_PROVENANCE_COUNTER,
|
|
1077
|
+
scope_text="definition provenance",
|
|
1078
|
+
line=s,
|
|
1079
|
+
strict=strict,
|
|
1080
|
+
)
|
|
1081
|
+
return definition or None, provenance
|
|
399
1082
|
|
|
400
1083
|
|
|
401
|
-
def
|
|
1084
|
+
def get_first_nonescaped_quote(s: str) -> int | None:
|
|
1085
|
+
"""Get the first non-escaped quote."""
|
|
1086
|
+
if not s:
|
|
1087
|
+
return None
|
|
1088
|
+
if s[0] == '"':
|
|
1089
|
+
# special case first position
|
|
1090
|
+
return 0
|
|
402
1091
|
for i, (a, b) in enumerate(pairwise(s), start=1):
|
|
403
1092
|
if b == '"' and a != "\\":
|
|
404
1093
|
return i
|
|
@@ -406,10 +1095,12 @@ def _get_first_nonquoted(s: str) -> Optional[int]:
|
|
|
406
1095
|
|
|
407
1096
|
|
|
408
1097
|
def _quote_split(s: str) -> tuple[str, str]:
|
|
409
|
-
|
|
410
|
-
|
|
1098
|
+
if not s.startswith('"'):
|
|
1099
|
+
raise ValueError(f"'{s}' does not start with a quote")
|
|
1100
|
+
s = s.removeprefix('"')
|
|
1101
|
+
i = get_first_nonescaped_quote(s)
|
|
411
1102
|
if i is None:
|
|
412
|
-
raise ValueError
|
|
1103
|
+
raise ValueError(f"no closing quote found in `{s}`")
|
|
413
1104
|
return _clean_definition(s[:i].strip()), s[i + 1 :].strip()
|
|
414
1105
|
|
|
415
1106
|
|
|
@@ -421,78 +1112,64 @@ def _clean_definition(s: str) -> str:
|
|
|
421
1112
|
|
|
422
1113
|
def _extract_synonym(
|
|
423
1114
|
s: str,
|
|
424
|
-
synonym_typedefs: Mapping[
|
|
1115
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
|
|
425
1116
|
*,
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
1117
|
+
node: Reference,
|
|
1118
|
+
strict: bool = False,
|
|
1119
|
+
ontology_prefix: str,
|
|
1120
|
+
upgrade: bool,
|
|
1121
|
+
) -> Synonym | None:
|
|
430
1122
|
# TODO check if the synonym is written like a CURIE... it shouldn't but I've seen it happen
|
|
431
1123
|
try:
|
|
432
1124
|
name, rest = _quote_split(s)
|
|
433
1125
|
except ValueError:
|
|
434
|
-
logger.warning("[%s
|
|
1126
|
+
logger.warning("[%s] invalid synonym: %s", node.curie, s)
|
|
435
1127
|
return None
|
|
436
1128
|
|
|
437
|
-
specificity
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
elif rest.startswith(_stype.preferred_curie):
|
|
455
|
-
rest = rest[len(_stype.preferred_curie) :].strip()
|
|
456
|
-
stype = _stype
|
|
457
|
-
break
|
|
458
|
-
elif rest.startswith(_stype.identifier):
|
|
459
|
-
rest = rest[len(_stype.identifier) :].strip()
|
|
460
|
-
stype = _stype
|
|
461
|
-
break
|
|
462
|
-
|
|
463
|
-
if not rest.startswith("[") or not rest.endswith("]"):
|
|
464
|
-
logger.warning("[%s:%s] problem with synonym: %s", prefix, identifier, s)
|
|
465
|
-
return None
|
|
1129
|
+
specificity, rest = _chomp_specificity(rest)
|
|
1130
|
+
synonym_typedef, rest = _chomp_typedef(
|
|
1131
|
+
rest,
|
|
1132
|
+
synonym_typedefs=synonym_typedefs,
|
|
1133
|
+
strict=strict,
|
|
1134
|
+
node=node,
|
|
1135
|
+
ontology_prefix=ontology_prefix,
|
|
1136
|
+
upgrade=upgrade,
|
|
1137
|
+
)
|
|
1138
|
+
provenance, rest = _chomp_references(
|
|
1139
|
+
rest,
|
|
1140
|
+
strict=strict,
|
|
1141
|
+
node=node,
|
|
1142
|
+
ontology_prefix=ontology_prefix,
|
|
1143
|
+
line=s,
|
|
1144
|
+
)
|
|
1145
|
+
annotations = _chomp_axioms(rest, node=node, strict=strict)
|
|
466
1146
|
|
|
467
|
-
provenance = _parse_trailing_ref_list(rest, strict=strict)
|
|
468
1147
|
return Synonym(
|
|
469
1148
|
name=name,
|
|
470
|
-
specificity=specificity
|
|
471
|
-
type=
|
|
472
|
-
provenance=provenance,
|
|
1149
|
+
specificity=specificity,
|
|
1150
|
+
type=synonym_typedef.reference if synonym_typedef else None,
|
|
1151
|
+
provenance=list(provenance or []),
|
|
1152
|
+
annotations=annotations,
|
|
473
1153
|
)
|
|
474
1154
|
|
|
475
1155
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
return [
|
|
479
|
-
Reference.from_curie(curie.strip(), strict=strict)
|
|
480
|
-
for curie in rest.split(",")
|
|
481
|
-
if curie.strip()
|
|
482
|
-
]
|
|
1156
|
+
#: A counter for errors in parsing provenance
|
|
1157
|
+
DEFINITION_PROVENANCE_COUNTER: Counter[tuple[str, str]] = Counter()
|
|
483
1158
|
|
|
484
1159
|
|
|
485
1160
|
def iterate_node_synonyms(
|
|
486
1161
|
data: Mapping[str, Any],
|
|
487
|
-
synonym_typedefs: Mapping[
|
|
1162
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
|
|
488
1163
|
*,
|
|
489
|
-
|
|
490
|
-
identifier: str,
|
|
1164
|
+
node: Reference,
|
|
491
1165
|
strict: bool = False,
|
|
1166
|
+
ontology_prefix: str,
|
|
1167
|
+
upgrade: bool,
|
|
492
1168
|
) -> Iterable[Synonym]:
|
|
493
1169
|
"""Extract synonyms from a :mod:`obonet` node's data.
|
|
494
1170
|
|
|
495
|
-
Example strings
|
|
1171
|
+
Example strings
|
|
1172
|
+
|
|
496
1173
|
- "LTEC I" EXACT [Orphanet:93938,DOI:xxxx]
|
|
497
1174
|
- "LTEC I" EXACT [Orphanet:93938]
|
|
498
1175
|
- "LTEC I" [Orphanet:93938]
|
|
@@ -500,121 +1177,407 @@ def iterate_node_synonyms(
|
|
|
500
1177
|
"""
|
|
501
1178
|
for s in data.get("synonym", []):
|
|
502
1179
|
s = _extract_synonym(
|
|
503
|
-
s,
|
|
1180
|
+
s,
|
|
1181
|
+
synonym_typedefs,
|
|
1182
|
+
node=node,
|
|
1183
|
+
strict=strict,
|
|
1184
|
+
ontology_prefix=ontology_prefix,
|
|
1185
|
+
upgrade=upgrade,
|
|
504
1186
|
)
|
|
505
1187
|
if s is not None:
|
|
506
1188
|
yield s
|
|
507
1189
|
|
|
508
1190
|
|
|
509
|
-
HANDLED_PROPERTY_TYPES = {
|
|
510
|
-
"xsd:string": str,
|
|
511
|
-
"xsd:dateTime": datetime,
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
|
|
515
1191
|
def iterate_node_properties(
|
|
516
|
-
data: Mapping[str, Any],
|
|
517
|
-
|
|
1192
|
+
data: Mapping[str, Any],
|
|
1193
|
+
*,
|
|
1194
|
+
node: Reference,
|
|
1195
|
+
strict: bool = False,
|
|
1196
|
+
ontology_prefix: str,
|
|
1197
|
+
upgrade: bool,
|
|
1198
|
+
context: str,
|
|
1199
|
+
) -> Iterable[Annotation]:
|
|
518
1200
|
"""Extract properties from a :mod:`obonet` node's data."""
|
|
519
1201
|
for prop_value_type in data.get("property_value", []):
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
1202
|
+
if yv := _handle_prop(
|
|
1203
|
+
prop_value_type,
|
|
1204
|
+
node=node,
|
|
1205
|
+
strict=strict,
|
|
1206
|
+
ontology_prefix=ontology_prefix,
|
|
1207
|
+
upgrade=upgrade,
|
|
1208
|
+
context=context,
|
|
1209
|
+
):
|
|
1210
|
+
yield yv
|
|
1211
|
+
|
|
1212
|
+
|
|
1213
|
+
#: Keep track of property-value pairs for which the value couldn't be parsed,
|
|
1214
|
+
#: such as `dc:conformsTo autoimmune:inflammation.yaml` in MONDO
|
|
1215
|
+
UNHANDLED_PROP_OBJECTS: Counter[tuple[str, str]] = Counter()
|
|
1216
|
+
|
|
1217
|
+
UNHANDLED_PROPS: Counter[tuple[str, str]] = Counter()
|
|
1218
|
+
|
|
1219
|
+
|
|
1220
|
+
def _handle_prop(
|
|
1221
|
+
prop_value_type: str,
|
|
1222
|
+
*,
|
|
1223
|
+
node: Reference,
|
|
1224
|
+
strict: bool = False,
|
|
1225
|
+
ontology_prefix: str,
|
|
1226
|
+
upgrade: bool,
|
|
1227
|
+
context: str | None,
|
|
1228
|
+
) -> Annotation | None:
|
|
1229
|
+
try:
|
|
1230
|
+
prop, value_type = prop_value_type.split(" ", 1)
|
|
1231
|
+
except ValueError:
|
|
1232
|
+
logger.warning("[%s] property_value is missing a space: %s", node.curie, prop_value_type)
|
|
1233
|
+
return None
|
|
1234
|
+
|
|
1235
|
+
prop_reference = _get_prop(
|
|
1236
|
+
prop,
|
|
1237
|
+
node=node,
|
|
1238
|
+
strict=strict,
|
|
1239
|
+
ontology_prefix=ontology_prefix,
|
|
1240
|
+
upgrade=upgrade,
|
|
1241
|
+
line=prop_value_type,
|
|
1242
|
+
counter=UNHANDLED_PROPS,
|
|
1243
|
+
context=context,
|
|
1244
|
+
)
|
|
1245
|
+
if prop_reference is None:
|
|
1246
|
+
return None
|
|
1247
|
+
|
|
1248
|
+
value_type = value_type.strip()
|
|
1249
|
+
datatype: Reference | None
|
|
1250
|
+
if " " not in value_type:
|
|
1251
|
+
value, datatype = value_type, None
|
|
1252
|
+
else:
|
|
1253
|
+
value, datatype_raw = (s.strip() for s in value_type.rsplit(" ", 1))
|
|
1254
|
+
match _parse_str_or_curie_or_uri_helper(
|
|
1255
|
+
datatype_raw,
|
|
1256
|
+
ontology_prefix=ontology_prefix,
|
|
1257
|
+
node=node,
|
|
1258
|
+
predicate=prop_reference,
|
|
1259
|
+
line=prop_value_type,
|
|
1260
|
+
upgrade=upgrade,
|
|
1261
|
+
context="property datatype",
|
|
1262
|
+
):
|
|
1263
|
+
case Reference() as datatype_:
|
|
1264
|
+
datatype = datatype_
|
|
1265
|
+
case BlacklistedError():
|
|
1266
|
+
return None
|
|
1267
|
+
case ParseError() as exc:
|
|
1268
|
+
if strict:
|
|
1269
|
+
raise exc
|
|
1270
|
+
else:
|
|
1271
|
+
logger.warning(str(exc))
|
|
1272
|
+
return None
|
|
1273
|
+
|
|
1274
|
+
# if it's an empty string, like the ones removed in https://github.com/oborel/obo-relations/pull/830,
|
|
1275
|
+
# just quit
|
|
1276
|
+
if value == '""':
|
|
1277
|
+
return None
|
|
1278
|
+
|
|
1279
|
+
quoted = value.startswith('"') and value.endswith('"')
|
|
1280
|
+
value = value.strip('"').strip()
|
|
527
1281
|
|
|
1282
|
+
# first, special case datetimes. Whether it's quoted or not,
|
|
1283
|
+
# we always deal with this first
|
|
1284
|
+
if datatype and datatype.curie == "xsd:dateTime":
|
|
528
1285
|
try:
|
|
529
|
-
|
|
1286
|
+
obo_literal = OBOLiteral.datetime(value)
|
|
530
1287
|
except ValueError:
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
1288
|
+
logger.warning(
|
|
1289
|
+
"[%s - %s] could not parse date: %s", node.curie, prop_reference.curie, value
|
|
1290
|
+
)
|
|
1291
|
+
return None
|
|
1292
|
+
else:
|
|
1293
|
+
return Annotation(prop_reference, obo_literal)
|
|
1294
|
+
|
|
1295
|
+
if datatype and datatype.curie == "xsd:anyURI":
|
|
1296
|
+
match _parse_str_or_curie_or_uri_helper(
|
|
1297
|
+
value,
|
|
1298
|
+
node=node,
|
|
1299
|
+
predicate=prop_reference,
|
|
1300
|
+
ontology_prefix=ontology_prefix,
|
|
1301
|
+
line=prop_value_type,
|
|
1302
|
+
upgrade=upgrade,
|
|
1303
|
+
context="property object",
|
|
1304
|
+
):
|
|
1305
|
+
case Reference() as obj_reference:
|
|
1306
|
+
return Annotation(prop_reference, obj_reference)
|
|
1307
|
+
case BlacklistedError():
|
|
1308
|
+
return None
|
|
1309
|
+
case UnparsableIRIError():
|
|
1310
|
+
return Annotation(prop_reference, OBOLiteral.uri(value))
|
|
1311
|
+
case ParseError() as exc:
|
|
1312
|
+
if strict:
|
|
1313
|
+
raise exc
|
|
1314
|
+
else:
|
|
1315
|
+
logger.warning(str(exc))
|
|
1316
|
+
return None
|
|
1317
|
+
|
|
1318
|
+
# if it's quoted and there's a data try parsing as a CURIE/URI anyway (this is a bit
|
|
1319
|
+
# aggressive, but more useful than spec).
|
|
1320
|
+
if quoted:
|
|
1321
|
+
# give a try parsing it anyway, just in case ;)
|
|
1322
|
+
match _parse_str_or_curie_or_uri_helper(
|
|
1323
|
+
value,
|
|
1324
|
+
ontology_prefix=ontology_prefix,
|
|
1325
|
+
node=node,
|
|
1326
|
+
line=prop_value_type,
|
|
1327
|
+
upgrade=upgrade,
|
|
1328
|
+
predicate=prop_reference,
|
|
1329
|
+
context="property object",
|
|
1330
|
+
):
|
|
1331
|
+
case Reference() as obj_reference:
|
|
1332
|
+
return Annotation(prop_reference, obj_reference)
|
|
1333
|
+
case BlacklistedError():
|
|
1334
|
+
return None
|
|
1335
|
+
case ParseError():
|
|
1336
|
+
if datatype:
|
|
1337
|
+
return Annotation(prop_reference, OBOLiteral(value, datatype, None))
|
|
1338
|
+
else:
|
|
1339
|
+
return Annotation(prop_reference, OBOLiteral.string(value))
|
|
1340
|
+
else:
|
|
1341
|
+
if datatype:
|
|
1342
|
+
logger.debug(
|
|
1343
|
+
"[%s] throwing away datatype since no quotes were used: %s", node.curie, value_type
|
|
1344
|
+
)
|
|
1345
|
+
|
|
1346
|
+
# if it wasn't quoted and there was no datatype, go for parsing as an object
|
|
1347
|
+
match _obo_parse_identifier(
|
|
1348
|
+
value,
|
|
1349
|
+
strict=strict,
|
|
1350
|
+
ontology_prefix=ontology_prefix,
|
|
1351
|
+
node=node,
|
|
1352
|
+
predicate=prop_reference,
|
|
1353
|
+
line=prop_value_type,
|
|
1354
|
+
context="property object",
|
|
1355
|
+
counter=UNHANDLED_PROP_OBJECTS,
|
|
1356
|
+
):
|
|
1357
|
+
case Reference() as obj_reference:
|
|
1358
|
+
return Annotation(prop_reference, obj_reference)
|
|
1359
|
+
case None:
|
|
1360
|
+
return None
|
|
1361
|
+
|
|
1362
|
+
|
|
1363
|
+
def _get_prop(
|
|
1364
|
+
property_id: str,
|
|
1365
|
+
*,
|
|
1366
|
+
node: Reference,
|
|
1367
|
+
strict: bool,
|
|
1368
|
+
ontology_prefix: str,
|
|
1369
|
+
upgrade: bool,
|
|
1370
|
+
line: str,
|
|
1371
|
+
counter: Counter[tuple[str, str]] | None = None,
|
|
1372
|
+
context: str | None = None,
|
|
1373
|
+
) -> Reference | None:
|
|
1374
|
+
if rv := _parse_default_prop(property_id, ontology_prefix):
|
|
1375
|
+
return rv
|
|
1376
|
+
return _obo_parse_identifier(
|
|
1377
|
+
property_id,
|
|
1378
|
+
strict=strict,
|
|
1379
|
+
node=node,
|
|
1380
|
+
ontology_prefix=ontology_prefix,
|
|
1381
|
+
upgrade=upgrade,
|
|
1382
|
+
counter=counter,
|
|
1383
|
+
context=context,
|
|
1384
|
+
line=line,
|
|
1385
|
+
)
|
|
1386
|
+
|
|
1387
|
+
|
|
1388
|
+
def _parse_default_prop(property_id, ontology_prefix) -> Reference | None:
|
|
1389
|
+
for delim in "#/":
|
|
1390
|
+
sw = f"http://purl.obolibrary.org/obo/{ontology_prefix}{delim}"
|
|
1391
|
+
if property_id.startswith(sw):
|
|
1392
|
+
identifier = property_id.removeprefix(sw)
|
|
1393
|
+
return default_reference(ontology_prefix, identifier)
|
|
1394
|
+
return None
|
|
535
1395
|
|
|
536
1396
|
|
|
537
|
-
def
|
|
1397
|
+
def iterate_node_reference_tag(
|
|
1398
|
+
tag: str,
|
|
538
1399
|
data: Mapping[str, Any],
|
|
539
1400
|
*,
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
1401
|
+
node: Reference,
|
|
1402
|
+
strict: bool = False,
|
|
1403
|
+
ontology_prefix: str,
|
|
1404
|
+
upgrade: bool = True,
|
|
1405
|
+
counter: Counter[tuple[str, str]] | None = None,
|
|
543
1406
|
) -> Iterable[Reference]:
|
|
544
|
-
"""Extract
|
|
545
|
-
for
|
|
546
|
-
reference =
|
|
1407
|
+
"""Extract a list of CURIEs from the data."""
|
|
1408
|
+
for identifier in data.get(tag, []):
|
|
1409
|
+
reference = _obo_parse_identifier(
|
|
1410
|
+
identifier,
|
|
1411
|
+
strict=strict,
|
|
1412
|
+
node=node,
|
|
1413
|
+
ontology_prefix=ontology_prefix,
|
|
1414
|
+
upgrade=upgrade,
|
|
1415
|
+
counter=counter,
|
|
1416
|
+
)
|
|
547
1417
|
if reference is None:
|
|
548
1418
|
logger.warning(
|
|
549
|
-
"[%s
|
|
1419
|
+
"[%s] %s - could not parse identifier: %s", ontology_prefix, tag, identifier
|
|
550
1420
|
)
|
|
551
|
-
|
|
552
|
-
|
|
1421
|
+
else:
|
|
1422
|
+
yield reference
|
|
553
1423
|
|
|
554
1424
|
|
|
555
|
-
def
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
1425
|
+
def _process_intersection_of(
|
|
1426
|
+
term: Stanza,
|
|
1427
|
+
data: Mapping[str, Any],
|
|
1428
|
+
*,
|
|
1429
|
+
strict: bool = False,
|
|
1430
|
+
ontology_prefix: str,
|
|
1431
|
+
upgrade: bool = True,
|
|
1432
|
+
) -> None:
|
|
1433
|
+
"""Extract a list of CURIEs from the data."""
|
|
1434
|
+
for line in data.get("intersection_of", []):
|
|
1435
|
+
predicate_id, _, target_id = line.partition(" ")
|
|
1436
|
+
predicate = _obo_parse_identifier(
|
|
1437
|
+
predicate_id,
|
|
1438
|
+
strict=strict,
|
|
1439
|
+
node=term.reference,
|
|
1440
|
+
ontology_prefix=ontology_prefix,
|
|
1441
|
+
upgrade=upgrade,
|
|
1442
|
+
)
|
|
1443
|
+
if predicate is None:
|
|
1444
|
+
logger.warning("[%s] - could not parse intersection_of: %s", ontology_prefix, line)
|
|
1445
|
+
continue
|
|
1446
|
+
|
|
1447
|
+
if target_id:
|
|
1448
|
+
# this means that there's a second part, so let's try parsing it
|
|
1449
|
+
target = _obo_parse_identifier(
|
|
1450
|
+
target_id,
|
|
1451
|
+
strict=strict,
|
|
1452
|
+
node=term.reference,
|
|
1453
|
+
predicate=predicate,
|
|
1454
|
+
ontology_prefix=ontology_prefix,
|
|
1455
|
+
upgrade=upgrade,
|
|
1456
|
+
)
|
|
1457
|
+
if target is None:
|
|
1458
|
+
logger.warning(
|
|
1459
|
+
"[%s] could not parse intersection_of target: %s", ontology_prefix, line
|
|
1460
|
+
)
|
|
1461
|
+
continue
|
|
1462
|
+
term.append_intersection_of(predicate, target)
|
|
1463
|
+
else:
|
|
1464
|
+
term.append_intersection_of(predicate)
|
|
561
1465
|
|
|
562
1466
|
|
|
563
1467
|
def iterate_node_relationships(
|
|
564
1468
|
data: Mapping[str, Any],
|
|
565
1469
|
*,
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
1470
|
+
node: Reference,
|
|
1471
|
+
strict: bool = False,
|
|
1472
|
+
ontology_prefix: str,
|
|
1473
|
+
upgrade: bool,
|
|
569
1474
|
) -> Iterable[tuple[Reference, Reference]]:
|
|
570
1475
|
"""Extract relationships from a :mod:`obonet` node's data."""
|
|
571
|
-
for
|
|
572
|
-
relation_curie, target_curie =
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
target = Reference.from_curie(target_curie, strict=strict)
|
|
589
|
-
if target is None:
|
|
590
|
-
logger.warning(
|
|
591
|
-
"[%s:%s] %s could not parse target %s", prefix, identifier, relation, target_curie
|
|
592
|
-
)
|
|
593
|
-
continue
|
|
1476
|
+
for line in data.get("relationship", []):
|
|
1477
|
+
relation_curie, target_curie = line.split(" ")
|
|
1478
|
+
|
|
1479
|
+
predicate = _obo_parse_identifier(
|
|
1480
|
+
relation_curie,
|
|
1481
|
+
strict=strict,
|
|
1482
|
+
ontology_prefix=ontology_prefix,
|
|
1483
|
+
node=node,
|
|
1484
|
+
upgrade=upgrade,
|
|
1485
|
+
line=line,
|
|
1486
|
+
context="relationship predicate",
|
|
1487
|
+
)
|
|
1488
|
+
match predicate:
|
|
1489
|
+
# TODO extend with other exception handling
|
|
1490
|
+
case None:
|
|
1491
|
+
logger.warning("[%s] could not parse relation %s", node.curie, relation_curie)
|
|
1492
|
+
continue
|
|
594
1493
|
|
|
595
|
-
|
|
1494
|
+
match _parse_str_or_curie_or_uri_helper(
|
|
1495
|
+
target_curie,
|
|
1496
|
+
ontology_prefix=ontology_prefix,
|
|
1497
|
+
node=node,
|
|
1498
|
+
predicate=predicate,
|
|
1499
|
+
line=line,
|
|
1500
|
+
context="relationship target",
|
|
1501
|
+
upgrade=upgrade,
|
|
1502
|
+
):
|
|
1503
|
+
case Reference() as target:
|
|
1504
|
+
yield predicate, target
|
|
1505
|
+
case ParseError() as exc:
|
|
1506
|
+
if strict:
|
|
1507
|
+
raise exc
|
|
1508
|
+
else:
|
|
1509
|
+
logger.warning(str(exc))
|
|
596
1510
|
|
|
597
1511
|
|
|
598
1512
|
def iterate_node_xrefs(
|
|
599
|
-
*,
|
|
600
|
-
|
|
1513
|
+
*,
|
|
1514
|
+
data: Mapping[str, Any],
|
|
1515
|
+
strict: bool = False,
|
|
1516
|
+
ontology_prefix: str,
|
|
1517
|
+
node: Reference,
|
|
1518
|
+
upgrade: bool,
|
|
1519
|
+
) -> Iterable[tuple[Reference, list[Reference | OBOLiteral]]]:
|
|
601
1520
|
"""Extract xrefs from a :mod:`obonet` node's data."""
|
|
602
|
-
for
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
1521
|
+
for line in data.get("xref", []):
|
|
1522
|
+
line = line.strip()
|
|
1523
|
+
if pair := _parse_xref_line(
|
|
1524
|
+
line.strip(),
|
|
1525
|
+
strict=strict,
|
|
1526
|
+
node=node,
|
|
1527
|
+
ontology_prefix=ontology_prefix,
|
|
1528
|
+
upgrade=upgrade,
|
|
1529
|
+
):
|
|
1530
|
+
yield pair
|
|
1531
|
+
|
|
1532
|
+
|
|
1533
|
+
def _parse_xref_line(
|
|
1534
|
+
line: str, *, strict: bool = False, ontology_prefix: str, node: Reference, upgrade: bool
|
|
1535
|
+
) -> tuple[Reference, list[Reference | OBOLiteral]] | None:
|
|
1536
|
+
xref, _, rest = line.partition(" [")
|
|
1537
|
+
|
|
1538
|
+
if str_is_blacklisted(xref, ontology_prefix=ontology_prefix) or ":" not in xref:
|
|
1539
|
+
return None # sometimes xref to self... weird
|
|
1540
|
+
|
|
1541
|
+
xref = remap_prefix(xref, ontology_prefix=ontology_prefix)
|
|
1542
|
+
|
|
1543
|
+
split_space = " " in xref
|
|
1544
|
+
if split_space:
|
|
1545
|
+
_xref_split = xref.split(" ", 1)
|
|
1546
|
+
if _xref_split[1][0] not in {'"', "("}:
|
|
1547
|
+
logger.debug("[%s] Problem with space in xref %s", node.curie, xref)
|
|
1548
|
+
return None
|
|
1549
|
+
xref = _xref_split[0]
|
|
1550
|
+
|
|
1551
|
+
xref_ref = _parse_str_or_curie_or_uri_helper(
|
|
1552
|
+
xref, ontology_prefix=ontology_prefix, node=node, line=line, context="xref", upgrade=upgrade
|
|
1553
|
+
)
|
|
1554
|
+
match xref_ref:
|
|
1555
|
+
case BlacklistedError():
|
|
1556
|
+
return None
|
|
1557
|
+
case ParseError() as exc:
|
|
1558
|
+
if strict:
|
|
1559
|
+
raise exc
|
|
1560
|
+
else:
|
|
1561
|
+
if not XREF_PROVENANCE_COUNTER[ontology_prefix, xref]:
|
|
1562
|
+
logger.warning(str(exc))
|
|
1563
|
+
XREF_PROVENANCE_COUNTER[ontology_prefix, xref] += 1
|
|
1564
|
+
return None
|
|
1565
|
+
|
|
1566
|
+
if rest:
|
|
1567
|
+
rest_front, _, _rest_rest = rest.partition("]")
|
|
1568
|
+
provenance = _parse_provenance_list(
|
|
1569
|
+
rest_front,
|
|
1570
|
+
node=node,
|
|
1571
|
+
ontology_prefix=ontology_prefix,
|
|
1572
|
+
counter=XREF_PROVENANCE_COUNTER,
|
|
1573
|
+
scope_text="xref provenance",
|
|
1574
|
+
line=line,
|
|
1575
|
+
strict=strict,
|
|
1576
|
+
)
|
|
1577
|
+
else:
|
|
1578
|
+
provenance = []
|
|
607
1579
|
|
|
608
|
-
|
|
1580
|
+
return xref_ref, provenance
|
|
609
1581
|
|
|
610
|
-
split_space = " " in xref
|
|
611
|
-
if split_space:
|
|
612
|
-
_xref_split = xref.split(" ", 1)
|
|
613
|
-
if _xref_split[1][0] not in {'"', "("}:
|
|
614
|
-
logger.debug("[%s] Problem with space in xref %s", prefix, xref)
|
|
615
|
-
continue
|
|
616
|
-
xref = _xref_split[0]
|
|
617
1582
|
|
|
618
|
-
|
|
619
|
-
if yv is not None:
|
|
620
|
-
yield yv
|
|
1583
|
+
XREF_PROVENANCE_COUNTER: Counter[tuple[str, str]] = Counter()
|