pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -117
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +107 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +210 -160
- pyobo/cli/database_utils.py +155 -0
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +209 -191
- pyobo/gilda_utils.py +52 -250
- pyobo/identifier_utils/__init__.py +33 -0
- pyobo/identifier_utils/api.py +305 -0
- pyobo/identifier_utils/preprocessing.json +873 -0
- pyobo/identifier_utils/preprocessing.py +27 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +48 -40
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1354 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +9 -6
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +8 -13
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +11 -4
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +272 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1484 -657
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +16 -15
- pyobo/utils/io.py +51 -41
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +73 -70
- pyobo/version.py +3 -3
- pyobo-0.12.1.dist-info/METADATA +671 -0
- pyobo-0.12.1.dist-info/RECORD +201 -0
- pyobo-0.12.1.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo/xrefdb/xrefs_pipeline.py +0 -180
- pyobo-0.11.2.dist-info/METADATA +0 -711
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/reader.py
CHANGED
|
@@ -1,33 +1,59 @@
|
|
|
1
1
|
"""OBO Readers."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import logging
|
|
6
|
+
import typing as t
|
|
7
|
+
from collections import Counter
|
|
4
8
|
from collections.abc import Iterable, Mapping
|
|
5
9
|
from datetime import datetime
|
|
10
|
+
from io import StringIO
|
|
6
11
|
from pathlib import Path
|
|
7
|
-
from
|
|
12
|
+
from textwrap import dedent
|
|
13
|
+
from typing import Any
|
|
8
14
|
|
|
9
15
|
import bioregistry
|
|
10
16
|
import networkx as nx
|
|
17
|
+
from curies import ReferenceTuple
|
|
18
|
+
from curies.preprocessing import BlocklistError
|
|
19
|
+
from curies.vocabulary import SynonymScope
|
|
11
20
|
from more_itertools import pairwise
|
|
12
21
|
from tqdm.auto import tqdm
|
|
13
22
|
|
|
14
23
|
from .constants import DATE_FORMAT, PROVENANCE_PREFIXES
|
|
15
|
-
from .identifier_utils import
|
|
16
|
-
|
|
24
|
+
from .identifier_utils import (
|
|
25
|
+
NotCURIEError,
|
|
26
|
+
ParseError,
|
|
27
|
+
UnparsableIRIError,
|
|
28
|
+
_is_valid_identifier,
|
|
29
|
+
_parse_str_or_curie_or_uri_helper,
|
|
30
|
+
get_rules,
|
|
31
|
+
)
|
|
32
|
+
from .reader_utils import (
|
|
33
|
+
_chomp_axioms,
|
|
34
|
+
_chomp_references,
|
|
35
|
+
_chomp_specificity,
|
|
36
|
+
_chomp_typedef,
|
|
37
|
+
_parse_provenance_list,
|
|
38
|
+
)
|
|
17
39
|
from .struct import (
|
|
18
40
|
Obo,
|
|
19
41
|
Reference,
|
|
20
42
|
Synonym,
|
|
21
|
-
SynonymSpecificities,
|
|
22
|
-
SynonymSpecificity,
|
|
23
43
|
SynonymTypeDef,
|
|
24
44
|
Term,
|
|
25
45
|
TypeDef,
|
|
46
|
+
default_reference,
|
|
26
47
|
make_ad_hoc_ontology,
|
|
27
48
|
)
|
|
28
|
-
from .struct
|
|
29
|
-
from .struct.
|
|
30
|
-
from .
|
|
49
|
+
from .struct import vocabulary as v
|
|
50
|
+
from .struct.reference import OBOLiteral, _obo_parse_identifier
|
|
51
|
+
from .struct.struct_utils import Annotation, Stanza
|
|
52
|
+
from .struct.typedef import comment as has_comment
|
|
53
|
+
from .struct.typedef import default_typedefs, has_ontology_root_term
|
|
54
|
+
from .utils.cache import write_gzipped_graph
|
|
55
|
+
from .utils.io import safe_open
|
|
56
|
+
from .utils.misc import STATIC_VERSION_REWRITES, cleanup_version
|
|
31
57
|
|
|
32
58
|
__all__ = [
|
|
33
59
|
"from_obo_path",
|
|
@@ -36,369 +62,1026 @@ __all__ = [
|
|
|
36
62
|
|
|
37
63
|
logger = logging.getLogger(__name__)
|
|
38
64
|
|
|
39
|
-
# FIXME use bioontologies
|
|
40
|
-
# RELATION_REMAPPINGS: Mapping[str, Tuple[str, str]] = bioontologies.upgrade.load()
|
|
41
|
-
RELATION_REMAPPINGS: Mapping[str, tuple[str, str]] = {
|
|
42
|
-
"part_of": part_of.pair,
|
|
43
|
-
"has_part": has_part.pair,
|
|
44
|
-
"develops_from": develops_from.pair,
|
|
45
|
-
"seeAlso": ("rdf", "seeAlso"),
|
|
46
|
-
"dc-contributor": ("dc", "contributor"),
|
|
47
|
-
"dc-creator": ("dc", "creator"),
|
|
48
|
-
}
|
|
49
|
-
|
|
50
65
|
|
|
51
66
|
def from_obo_path(
|
|
52
|
-
path:
|
|
67
|
+
path: str | Path,
|
|
68
|
+
prefix: str | None = None,
|
|
69
|
+
*,
|
|
70
|
+
strict: bool = False,
|
|
71
|
+
version: str | None,
|
|
72
|
+
upgrade: bool = True,
|
|
73
|
+
use_tqdm: bool = False,
|
|
74
|
+
ignore_obsolete: bool = False,
|
|
75
|
+
_cache_path: Path | None = None,
|
|
53
76
|
) -> Obo:
|
|
54
77
|
"""Get the OBO graph from a path."""
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
78
|
+
path = Path(path).expanduser().resolve()
|
|
79
|
+
if path.suffix.endswith(".zip"):
|
|
80
|
+
import io
|
|
81
|
+
import zipfile
|
|
82
|
+
|
|
83
|
+
logger.info("[%s] parsing zipped OBO with obonet from %s", prefix or "<unknown>", path)
|
|
84
|
+
with zipfile.ZipFile(path) as zf:
|
|
85
|
+
with zf.open(path.name.removesuffix(".zip"), "r") as file:
|
|
86
|
+
content = file.read().decode("utf-8")
|
|
87
|
+
graph = _read_obo(
|
|
88
|
+
io.StringIO(content), prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm
|
|
89
|
+
)
|
|
90
|
+
else:
|
|
91
|
+
logger.info("[%s] parsing OBO with obonet from %s", prefix or "<unknown>", path)
|
|
92
|
+
with safe_open(path, read=True) as file:
|
|
93
|
+
graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
|
|
68
94
|
|
|
69
95
|
if prefix:
|
|
70
96
|
# Make sure the graph is named properly
|
|
71
97
|
_clean_graph_ontology(graph, prefix)
|
|
72
98
|
|
|
99
|
+
if _cache_path:
|
|
100
|
+
logger.info("[%s] writing obonet cache to %s", prefix, _cache_path)
|
|
101
|
+
write_gzipped_graph(path=_cache_path, graph=graph)
|
|
102
|
+
|
|
73
103
|
# Convert to an Obo instance and return
|
|
74
|
-
return from_obonet(graph, strict=strict,
|
|
104
|
+
return from_obonet(graph, strict=strict, version=version, upgrade=upgrade, use_tqdm=use_tqdm)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _read_obo(
|
|
108
|
+
filelike, prefix: str | None, ignore_obsolete: bool, use_tqdm: bool = True
|
|
109
|
+
) -> nx.MultiDiGraph:
|
|
110
|
+
import obonet
|
|
111
|
+
|
|
112
|
+
return obonet.read_obo(
|
|
113
|
+
tqdm(
|
|
114
|
+
filelike,
|
|
115
|
+
unit_scale=True,
|
|
116
|
+
desc=f"[{prefix or ''}] parsing OBO",
|
|
117
|
+
disable=not use_tqdm,
|
|
118
|
+
leave=True,
|
|
119
|
+
),
|
|
120
|
+
ignore_obsolete=ignore_obsolete,
|
|
121
|
+
)
|
|
75
122
|
|
|
76
123
|
|
|
77
|
-
def
|
|
124
|
+
def _normalize_prefix_strict(prefix: str) -> str:
|
|
125
|
+
n = bioregistry.normalize_prefix(prefix)
|
|
126
|
+
if n is None:
|
|
127
|
+
raise ValueError(f"unknown prefix: {prefix}")
|
|
128
|
+
return n
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def from_str(
|
|
132
|
+
text: str,
|
|
133
|
+
*,
|
|
134
|
+
strict: bool = False,
|
|
135
|
+
version: str | None = None,
|
|
136
|
+
upgrade: bool = True,
|
|
137
|
+
ignore_obsolete: bool = False,
|
|
138
|
+
use_tqdm: bool = False,
|
|
139
|
+
) -> Obo:
|
|
140
|
+
"""Read an ontology from a string representation."""
|
|
141
|
+
import obonet
|
|
142
|
+
|
|
143
|
+
text = dedent(text).strip()
|
|
144
|
+
io = StringIO()
|
|
145
|
+
io.write(text)
|
|
146
|
+
io.seek(0)
|
|
147
|
+
graph = obonet.read_obo(io, ignore_obsolete=ignore_obsolete)
|
|
148
|
+
return from_obonet(graph, strict=strict, version=version, upgrade=upgrade, use_tqdm=use_tqdm)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def from_obonet(
|
|
152
|
+
graph: nx.MultiDiGraph,
|
|
153
|
+
*,
|
|
154
|
+
strict: bool = False,
|
|
155
|
+
version: str | None = None,
|
|
156
|
+
upgrade: bool = True,
|
|
157
|
+
use_tqdm: bool = False,
|
|
158
|
+
) -> Obo:
|
|
78
159
|
"""Get all of the terms from a OBO graph."""
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
name = _get_name(graph=graph, ontology=ontology)
|
|
87
|
-
|
|
88
|
-
data_version = graph.graph.get("data-version")
|
|
89
|
-
if not data_version:
|
|
90
|
-
if date is not None:
|
|
91
|
-
data_version = date.strftime("%Y-%m-%d")
|
|
92
|
-
logger.info(
|
|
93
|
-
"[%s] does not report a version. falling back to date: %s",
|
|
94
|
-
ontology,
|
|
95
|
-
data_version,
|
|
96
|
-
)
|
|
97
|
-
else:
|
|
98
|
-
logger.warning("[%s] does not report a version nor a date", ontology)
|
|
99
|
-
else:
|
|
100
|
-
data_version = cleanup_version(data_version=data_version, prefix=ontology)
|
|
101
|
-
if data_version is not None:
|
|
102
|
-
logger.info("[%s] using version %s", ontology, data_version)
|
|
103
|
-
elif date is not None:
|
|
104
|
-
logger.info(
|
|
105
|
-
"[%s] unrecognized version format, falling back to date: %s",
|
|
106
|
-
ontology,
|
|
107
|
-
data_version,
|
|
108
|
-
)
|
|
109
|
-
data_version = date.strftime("%Y-%m-%d")
|
|
110
|
-
else:
|
|
111
|
-
logger.warning(
|
|
112
|
-
"[%s] UNRECOGNIZED VERSION FORMAT AND MISSING DATE: %s", ontology, data_version
|
|
113
|
-
)
|
|
160
|
+
ontology_prefix_raw = graph.graph["ontology"]
|
|
161
|
+
ontology_prefix = _normalize_prefix_strict(ontology_prefix_raw)
|
|
162
|
+
logger.info("[%s] extracting OBO using obonet", ontology_prefix)
|
|
163
|
+
|
|
164
|
+
date = _get_date(graph=graph, ontology_prefix=ontology_prefix)
|
|
165
|
+
name = _get_name(graph=graph, ontology_prefix=ontology_prefix)
|
|
166
|
+
imports = graph.graph.get("import")
|
|
114
167
|
|
|
168
|
+
macro_config = MacroConfig(graph.graph, strict=strict, ontology_prefix=ontology_prefix)
|
|
169
|
+
|
|
170
|
+
data_version = _clean_graph_version(
|
|
171
|
+
graph, ontology_prefix=ontology_prefix, version=version, date=date
|
|
172
|
+
)
|
|
115
173
|
if data_version and "/" in data_version:
|
|
116
|
-
raise ValueError(
|
|
117
|
-
|
|
118
|
-
#: Parsed CURIEs to references (even external ones)
|
|
119
|
-
reference_it = (
|
|
120
|
-
Reference(
|
|
121
|
-
prefix=prefix,
|
|
122
|
-
identifier=bioregistry.standardize_identifier(prefix, identifier),
|
|
123
|
-
# if name isn't available, it means its external to this ontology
|
|
124
|
-
name=data.get("name"),
|
|
174
|
+
raise ValueError(
|
|
175
|
+
f"[{ontology_prefix}] slashes not allowed in data versions because of filesystem usage: {data_version}"
|
|
125
176
|
)
|
|
126
|
-
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
177
|
+
|
|
178
|
+
missing_typedefs: set[ReferenceTuple] = set()
|
|
179
|
+
|
|
180
|
+
subset_typedefs = _get_subsetdefs(graph.graph, ontology_prefix=ontology_prefix)
|
|
181
|
+
|
|
182
|
+
root_terms: list[Reference] = []
|
|
183
|
+
property_values: list[Annotation] = []
|
|
184
|
+
for ann in iterate_node_properties(
|
|
185
|
+
graph.graph,
|
|
186
|
+
ontology_prefix=ontology_prefix,
|
|
187
|
+
upgrade=upgrade,
|
|
188
|
+
node=Reference(prefix="obo", identifier=ontology_prefix),
|
|
189
|
+
strict=strict,
|
|
190
|
+
context="graph property",
|
|
191
|
+
):
|
|
192
|
+
if ann.predicate.pair == has_ontology_root_term.pair:
|
|
193
|
+
match ann.value:
|
|
194
|
+
case OBOLiteral():
|
|
195
|
+
logger.warning(
|
|
196
|
+
"[%s] tried to use a literal as an ontology root: %s",
|
|
197
|
+
ontology_prefix,
|
|
198
|
+
ann.value.value,
|
|
199
|
+
)
|
|
200
|
+
continue
|
|
201
|
+
case Reference():
|
|
202
|
+
root_terms.append(ann.value)
|
|
203
|
+
else:
|
|
204
|
+
property_values.append(ann)
|
|
205
|
+
|
|
206
|
+
for remark in graph.graph.get("remark", []):
|
|
207
|
+
property_values.append(Annotation(has_comment.reference, OBOLiteral.string(remark)))
|
|
208
|
+
|
|
209
|
+
idspaces: dict[str, str] = {}
|
|
210
|
+
for x in graph.graph.get("idspace", []):
|
|
211
|
+
prefix, uri_prefix, *_ = (y.strip() for y in x.split(" ", 2))
|
|
212
|
+
idspaces[prefix] = uri_prefix
|
|
131
213
|
|
|
132
214
|
#: CURIEs to typedefs
|
|
133
|
-
typedefs: Mapping[
|
|
134
|
-
typedef.pair: typedef
|
|
215
|
+
typedefs: Mapping[ReferenceTuple, TypeDef] = {
|
|
216
|
+
typedef.pair: typedef
|
|
217
|
+
for typedef in iterate_typedefs(
|
|
218
|
+
graph,
|
|
219
|
+
ontology_prefix=ontology_prefix,
|
|
220
|
+
strict=strict,
|
|
221
|
+
upgrade=upgrade,
|
|
222
|
+
macro_config=macro_config,
|
|
223
|
+
)
|
|
135
224
|
}
|
|
136
225
|
|
|
137
|
-
synonym_typedefs: Mapping[
|
|
138
|
-
synonym_typedef.
|
|
139
|
-
for synonym_typedef in iterate_graph_synonym_typedefs(
|
|
226
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] = {
|
|
227
|
+
synonym_typedef.pair: synonym_typedef
|
|
228
|
+
for synonym_typedef in iterate_graph_synonym_typedefs(
|
|
229
|
+
graph,
|
|
230
|
+
ontology_prefix=ontology_prefix,
|
|
231
|
+
strict=strict,
|
|
232
|
+
upgrade=upgrade,
|
|
233
|
+
)
|
|
140
234
|
}
|
|
141
235
|
|
|
142
|
-
|
|
236
|
+
terms = _get_terms(
|
|
237
|
+
graph,
|
|
238
|
+
strict=strict,
|
|
239
|
+
ontology_prefix=ontology_prefix,
|
|
240
|
+
upgrade=upgrade,
|
|
241
|
+
typedefs=typedefs,
|
|
242
|
+
missing_typedefs=missing_typedefs,
|
|
243
|
+
synonym_typedefs=synonym_typedefs,
|
|
244
|
+
subset_typedefs=subset_typedefs,
|
|
245
|
+
macro_config=macro_config,
|
|
246
|
+
use_tqdm=use_tqdm,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
return make_ad_hoc_ontology(
|
|
250
|
+
_ontology=ontology_prefix,
|
|
251
|
+
_name=name,
|
|
252
|
+
_auto_generated_by=graph.graph.get("auto-generated-by"),
|
|
253
|
+
_typedefs=list(typedefs.values()),
|
|
254
|
+
_synonym_typedefs=list(synonym_typedefs.values()),
|
|
255
|
+
_date=date,
|
|
256
|
+
_data_version=data_version,
|
|
257
|
+
_root_terms=root_terms,
|
|
258
|
+
terms=terms,
|
|
259
|
+
_property_values=property_values,
|
|
260
|
+
_subsetdefs=subset_typedefs,
|
|
261
|
+
_imports=imports,
|
|
262
|
+
_idspaces=idspaces,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _get_terms(
|
|
267
|
+
graph,
|
|
268
|
+
*,
|
|
269
|
+
strict: bool,
|
|
270
|
+
ontology_prefix: str,
|
|
271
|
+
upgrade: bool,
|
|
272
|
+
typedefs: Mapping[ReferenceTuple, TypeDef],
|
|
273
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
|
|
274
|
+
subset_typedefs,
|
|
275
|
+
missing_typedefs: set[ReferenceTuple],
|
|
276
|
+
macro_config: MacroConfig,
|
|
277
|
+
use_tqdm: bool = False,
|
|
278
|
+
) -> list[Term]:
|
|
143
279
|
terms = []
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
280
|
+
for reference, data in _iter_obo_graph(
|
|
281
|
+
graph=graph,
|
|
282
|
+
strict=strict,
|
|
283
|
+
ontology_prefix=ontology_prefix,
|
|
284
|
+
use_tqdm=use_tqdm,
|
|
285
|
+
upgrade=upgrade,
|
|
286
|
+
):
|
|
287
|
+
if reference.prefix != ontology_prefix:
|
|
288
|
+
continue
|
|
289
|
+
if not data:
|
|
290
|
+
# this allows us to skip anything that isn't really defined
|
|
291
|
+
# caveat: this misses terms that are just defined with an ID
|
|
147
292
|
continue
|
|
148
293
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
raise e
|
|
157
|
-
xrefs, provenance = [], []
|
|
158
|
-
for node_xref in node_xrefs:
|
|
159
|
-
if node_xref.prefix in PROVENANCE_PREFIXES:
|
|
160
|
-
provenance.append(node_xref)
|
|
161
|
-
else:
|
|
162
|
-
xrefs.append(node_xref)
|
|
163
|
-
n_xrefs += len(xrefs)
|
|
294
|
+
term = Term(
|
|
295
|
+
reference=reference,
|
|
296
|
+
builtin=_get_boolean(data, "builtin"),
|
|
297
|
+
is_anonymous=_get_boolean(data, "is_anonymous"),
|
|
298
|
+
is_obsolete=_get_boolean(data, "is_obsolete"),
|
|
299
|
+
namespace=data.get("namespace"),
|
|
300
|
+
)
|
|
164
301
|
|
|
165
|
-
|
|
166
|
-
|
|
302
|
+
_process_alts(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
303
|
+
_process_parents(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
304
|
+
_process_synonyms(
|
|
305
|
+
term,
|
|
306
|
+
data,
|
|
307
|
+
ontology_prefix=ontology_prefix,
|
|
308
|
+
strict=strict,
|
|
309
|
+
upgrade=upgrade,
|
|
310
|
+
synonym_typedefs=synonym_typedefs,
|
|
311
|
+
)
|
|
312
|
+
_process_xrefs(
|
|
313
|
+
term,
|
|
314
|
+
data,
|
|
315
|
+
ontology_prefix=ontology_prefix,
|
|
316
|
+
strict=strict,
|
|
317
|
+
macro_config=macro_config,
|
|
318
|
+
upgrade=upgrade,
|
|
319
|
+
)
|
|
320
|
+
_process_properties(
|
|
321
|
+
term,
|
|
322
|
+
data,
|
|
323
|
+
ontology_prefix=ontology_prefix,
|
|
324
|
+
strict=strict,
|
|
325
|
+
upgrade=upgrade,
|
|
326
|
+
typedefs=typedefs,
|
|
167
327
|
)
|
|
168
|
-
|
|
169
|
-
|
|
328
|
+
_process_relations(
|
|
329
|
+
term,
|
|
330
|
+
data,
|
|
331
|
+
ontology_prefix=ontology_prefix,
|
|
332
|
+
strict=strict,
|
|
333
|
+
upgrade=upgrade,
|
|
334
|
+
typedefs=typedefs,
|
|
335
|
+
missing_typedefs=missing_typedefs,
|
|
336
|
+
)
|
|
337
|
+
_process_replaced_by(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
338
|
+
_process_subsets(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
339
|
+
_process_intersection_of(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
340
|
+
_process_union_of(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
341
|
+
_process_equivalent_to(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
342
|
+
_process_disjoint_from(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
343
|
+
_process_consider(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
344
|
+
_process_comment(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
345
|
+
_process_description(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
346
|
+
_process_creation_date(term, data)
|
|
170
347
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
except MissingPrefixError as e:
|
|
174
|
-
e.reference = reference
|
|
175
|
-
raise e
|
|
176
|
-
n_alt_ids += len(alt_ids)
|
|
348
|
+
terms.append(term)
|
|
349
|
+
return terms
|
|
177
350
|
|
|
178
|
-
try:
|
|
179
|
-
parents = list(
|
|
180
|
-
iterate_node_parents(
|
|
181
|
-
data,
|
|
182
|
-
prefix=prefix,
|
|
183
|
-
identifier=identifier,
|
|
184
|
-
strict=strict,
|
|
185
|
-
)
|
|
186
|
-
)
|
|
187
|
-
except MissingPrefixError as e:
|
|
188
|
-
e.reference = reference
|
|
189
|
-
raise e
|
|
190
|
-
n_parents += len(parents)
|
|
191
351
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
352
|
+
def _process_description(term: Stanza, data, *, ontology_prefix: str, strict: bool):
|
|
353
|
+
definition, definition_references = get_definition(
|
|
354
|
+
data, node=term.reference, strict=strict, ontology_prefix=ontology_prefix
|
|
355
|
+
)
|
|
356
|
+
term.definition = definition
|
|
357
|
+
if term.definition:
|
|
358
|
+
for definition_reference in definition_references:
|
|
359
|
+
term._append_annotation(
|
|
360
|
+
v.has_description,
|
|
361
|
+
OBOLiteral.string(term.definition),
|
|
362
|
+
Annotation(v.has_dbxref, definition_reference),
|
|
199
363
|
)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def _process_comment(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
367
|
+
if comment := data.get("comment"):
|
|
368
|
+
term.append_comment(comment)
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _process_creation_date(term: Stanza, data) -> None:
|
|
372
|
+
date_str = data.get("creation_date")
|
|
373
|
+
if not date_str:
|
|
374
|
+
return
|
|
375
|
+
if isinstance(date_str, list):
|
|
376
|
+
date_str = date_str[0]
|
|
377
|
+
try:
|
|
378
|
+
term.append_creation_date(date_str)
|
|
379
|
+
except ValueError:
|
|
380
|
+
logger.warning("[%s] failed to parse creation_date: %s", term.reference.curie, date_str)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _process_union_of(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
384
|
+
for reference in iterate_node_reference_tag(
|
|
385
|
+
"union_of", data=data, ontology_prefix=ontology_prefix, strict=strict, node=term.reference
|
|
386
|
+
):
|
|
387
|
+
term.append_union_of(reference)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def _process_equivalent_to(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
391
|
+
for reference in iterate_node_reference_tag(
|
|
392
|
+
"equivalent_to",
|
|
393
|
+
data=data,
|
|
394
|
+
ontology_prefix=ontology_prefix,
|
|
395
|
+
strict=strict,
|
|
396
|
+
node=term.reference,
|
|
397
|
+
):
|
|
398
|
+
term.append_equivalent_to(reference)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _process_disjoint_from(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
402
|
+
for reference in iterate_node_reference_tag(
|
|
403
|
+
"disjoint_from",
|
|
404
|
+
data=data,
|
|
405
|
+
ontology_prefix=ontology_prefix,
|
|
406
|
+
strict=strict,
|
|
407
|
+
node=term.reference,
|
|
408
|
+
):
|
|
409
|
+
term.append_disjoint_from(reference)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def _process_alts(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
413
|
+
for alt_reference in iterate_node_reference_tag(
|
|
414
|
+
"alt_id", data, node=term.reference, strict=strict, ontology_prefix=ontology_prefix
|
|
415
|
+
):
|
|
416
|
+
term.append_alt(alt_reference)
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def _process_parents(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
420
|
+
for tag in ["is_a", "instance_of"]:
|
|
421
|
+
for parent in iterate_node_reference_tag(
|
|
422
|
+
tag, data, node=term.reference, strict=strict, ontology_prefix=ontology_prefix
|
|
423
|
+
):
|
|
424
|
+
term.append_parent(parent)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def _process_synonyms(
|
|
428
|
+
term: Stanza,
|
|
429
|
+
data,
|
|
430
|
+
*,
|
|
431
|
+
ontology_prefix: str,
|
|
432
|
+
strict: bool,
|
|
433
|
+
upgrade: bool,
|
|
434
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
|
|
435
|
+
) -> None:
|
|
436
|
+
synonyms = list(
|
|
437
|
+
iterate_node_synonyms(
|
|
438
|
+
data,
|
|
439
|
+
synonym_typedefs,
|
|
440
|
+
node=term.reference,
|
|
441
|
+
strict=strict,
|
|
442
|
+
ontology_prefix=ontology_prefix,
|
|
443
|
+
upgrade=upgrade,
|
|
200
444
|
)
|
|
201
|
-
|
|
445
|
+
)
|
|
446
|
+
for synonym in synonyms:
|
|
447
|
+
term.append_synonym(synonym)
|
|
202
448
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
449
|
+
|
|
450
|
+
def _process_xrefs(
|
|
451
|
+
term: Stanza,
|
|
452
|
+
data,
|
|
453
|
+
*,
|
|
454
|
+
ontology_prefix: str,
|
|
455
|
+
strict: bool,
|
|
456
|
+
macro_config: MacroConfig,
|
|
457
|
+
upgrade: bool,
|
|
458
|
+
) -> None:
|
|
459
|
+
for reference, provenance in iterate_node_xrefs(
|
|
460
|
+
data=data,
|
|
461
|
+
strict=strict,
|
|
462
|
+
ontology_prefix=ontology_prefix,
|
|
463
|
+
node=term.reference,
|
|
464
|
+
upgrade=upgrade,
|
|
465
|
+
):
|
|
466
|
+
_handle_xref(term, reference, provenance=provenance, macro_config=macro_config)
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def _process_properties(
|
|
470
|
+
term: Stanza, data, *, ontology_prefix: str, strict: bool, upgrade: bool, typedefs
|
|
471
|
+
) -> None:
|
|
472
|
+
for ann in iterate_node_properties(
|
|
473
|
+
data,
|
|
474
|
+
node=term.reference,
|
|
475
|
+
strict=strict,
|
|
476
|
+
ontology_prefix=ontology_prefix,
|
|
477
|
+
upgrade=upgrade,
|
|
478
|
+
context="stanza property",
|
|
479
|
+
):
|
|
480
|
+
# TODO parse axioms
|
|
481
|
+
term.append_property(ann)
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def _process_relations(
|
|
485
|
+
term: Stanza,
|
|
486
|
+
data,
|
|
487
|
+
*,
|
|
488
|
+
ontology_prefix: str,
|
|
489
|
+
strict: bool,
|
|
490
|
+
upgrade: bool,
|
|
491
|
+
typedefs: Mapping[ReferenceTuple, TypeDef],
|
|
492
|
+
missing_typedefs: set[ReferenceTuple],
|
|
493
|
+
) -> None:
|
|
494
|
+
relations_references = list(
|
|
495
|
+
iterate_node_relationships(
|
|
496
|
+
data,
|
|
497
|
+
node=term.reference,
|
|
498
|
+
strict=strict,
|
|
499
|
+
ontology_prefix=ontology_prefix,
|
|
500
|
+
upgrade=upgrade,
|
|
211
501
|
)
|
|
502
|
+
)
|
|
503
|
+
for relation, reference in relations_references:
|
|
504
|
+
if (
|
|
505
|
+
relation.pair not in typedefs
|
|
506
|
+
and relation.pair not in default_typedefs
|
|
507
|
+
and relation.pair not in missing_typedefs
|
|
508
|
+
):
|
|
509
|
+
missing_typedefs.add(relation.pair)
|
|
510
|
+
logger.warning("[%s] has no typedef for %s", ontology_prefix, relation.curie)
|
|
511
|
+
logger.debug("[%s] available typedefs: %s", ontology_prefix, set(typedefs))
|
|
512
|
+
# TODO parse axioms
|
|
513
|
+
term.append_relationship(relation, reference)
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def _process_replaced_by(stanza: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
517
|
+
for reference in iterate_node_reference_tag(
|
|
518
|
+
"replaced_by", data, node=stanza.reference, strict=strict, ontology_prefix=ontology_prefix
|
|
519
|
+
):
|
|
520
|
+
stanza.append_replaced_by(reference)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def _process_subsets(stanza: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
|
|
524
|
+
for reference in iterate_node_reference_tag(
|
|
525
|
+
"subset",
|
|
526
|
+
data,
|
|
527
|
+
node=stanza.reference,
|
|
528
|
+
strict=strict,
|
|
529
|
+
ontology_prefix=ontology_prefix,
|
|
530
|
+
counter=SUBSET_ERROR_COUNTER,
|
|
531
|
+
):
|
|
532
|
+
stanza.append_subset(reference)
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def _get_boolean(data: Mapping[str, Any], tag: str) -> bool | None:
|
|
536
|
+
value = data.get(tag)
|
|
537
|
+
if value is None:
|
|
538
|
+
return None
|
|
539
|
+
if isinstance(value, list):
|
|
540
|
+
value = value[0]
|
|
541
|
+
if value == "false":
|
|
542
|
+
return False
|
|
543
|
+
if value == "true":
|
|
544
|
+
return True
|
|
545
|
+
raise ValueError(value)
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def _get_reference(
|
|
549
|
+
data: Mapping[str, Any], tag: str, *, ontology_prefix: str, strict: bool, **kwargs
|
|
550
|
+
) -> Reference | None:
|
|
551
|
+
value = data.get(tag)
|
|
552
|
+
if value is None:
|
|
553
|
+
return None
|
|
554
|
+
if isinstance(value, list):
|
|
555
|
+
value = value[0]
|
|
556
|
+
return _obo_parse_identifier(
|
|
557
|
+
value, ontology_prefix=ontology_prefix, strict=strict, context=tag, **kwargs
|
|
558
|
+
)
|
|
212
559
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
560
|
+
|
|
561
|
+
class MacroConfig:
|
|
562
|
+
"""A configuration data class for reader macros."""
|
|
563
|
+
|
|
564
|
+
def __init__(
|
|
565
|
+
self, data: Mapping[str, list[str]] | None = None, *, strict: bool, ontology_prefix: str
|
|
566
|
+
):
|
|
567
|
+
"""Instantiate the configuration from obonet graph metadata."""
|
|
568
|
+
if data is None:
|
|
569
|
+
data = {}
|
|
570
|
+
|
|
571
|
+
self.treat_xrefs_as_equivalent: set[str] = set()
|
|
572
|
+
for prefix in data.get("treat-xrefs-as-equivalent", []):
|
|
573
|
+
prefix_norm = bioregistry.normalize_prefix(prefix)
|
|
574
|
+
if prefix_norm is None:
|
|
575
|
+
continue
|
|
576
|
+
self.treat_xrefs_as_equivalent.add(prefix_norm)
|
|
577
|
+
|
|
578
|
+
self.treat_xrefs_as_genus_differentia: dict[str, tuple[Reference, Reference]] = {}
|
|
579
|
+
for line in data.get("treat-xrefs-as-genus-differentia", []):
|
|
580
|
+
try:
|
|
581
|
+
gd_prefix, gd_predicate, gd_target = line.split()
|
|
582
|
+
except ValueError:
|
|
583
|
+
# this happens in `plana`, where there's an incorrectly written
|
|
584
|
+
# line `CARO part_of NCBITaxon:79327; CL part_of NCBITaxon:79327`
|
|
585
|
+
tqdm.write(
|
|
586
|
+
f"[{ontology_prefix}] failed to parse treat-xrefs-as-genus-differentia: {line}"
|
|
220
587
|
)
|
|
588
|
+
continue
|
|
589
|
+
|
|
590
|
+
gd_prefix_norm = bioregistry.normalize_prefix(gd_prefix)
|
|
591
|
+
if gd_prefix_norm is None:
|
|
592
|
+
continue
|
|
593
|
+
gd_predicate_re = _obo_parse_identifier(
|
|
594
|
+
gd_predicate, ontology_prefix=ontology_prefix, strict=strict
|
|
221
595
|
)
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
596
|
+
if gd_predicate_re is None:
|
|
597
|
+
continue
|
|
598
|
+
gd_target_re = _obo_parse_identifier(
|
|
599
|
+
gd_target, ontology_prefix=ontology_prefix, strict=strict
|
|
600
|
+
)
|
|
601
|
+
if gd_target_re is None:
|
|
602
|
+
continue
|
|
603
|
+
self.treat_xrefs_as_genus_differentia[gd_prefix_norm] = (gd_predicate_re, gd_target_re)
|
|
604
|
+
|
|
605
|
+
self.treat_xrefs_as_relationship: dict[str, Reference] = {}
|
|
606
|
+
for line in data.get("treat-xrefs-as-relationship", []):
|
|
607
|
+
try:
|
|
608
|
+
gd_prefix, gd_predicate = line.split()
|
|
609
|
+
except ValueError:
|
|
610
|
+
tqdm.write(
|
|
611
|
+
f"[{ontology_prefix}] failed to parse treat-xrefs-as-relationship: {line}"
|
|
612
|
+
)
|
|
235
613
|
continue
|
|
236
|
-
n_relations += 1
|
|
237
|
-
term.append_relationship(typedef, reference)
|
|
238
|
-
for prop, value in iterate_node_properties(data, term=term):
|
|
239
|
-
n_properties += 1
|
|
240
|
-
term.append_property(prop, value)
|
|
241
|
-
terms.append(term)
|
|
242
614
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
615
|
+
gd_prefix_norm = bioregistry.normalize_prefix(gd_prefix)
|
|
616
|
+
if gd_prefix_norm is None:
|
|
617
|
+
continue
|
|
618
|
+
gd_predicate_re = _obo_parse_identifier(
|
|
619
|
+
gd_predicate, ontology_prefix=ontology_prefix, strict=strict
|
|
620
|
+
)
|
|
621
|
+
if gd_predicate_re is None:
|
|
622
|
+
continue
|
|
623
|
+
self.treat_xrefs_as_relationship[gd_prefix_norm] = gd_predicate_re
|
|
248
624
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
625
|
+
self.treat_xrefs_as_is_a: set[str] = set()
|
|
626
|
+
for prefix in data.get("treat-xrefs-as-is_a", []):
|
|
627
|
+
gd_prefix_norm = bioregistry.normalize_prefix(prefix)
|
|
628
|
+
if gd_prefix_norm is None:
|
|
629
|
+
continue
|
|
630
|
+
self.treat_xrefs_as_is_a.add(gd_prefix_norm)
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def _handle_xref(
|
|
634
|
+
term: Stanza,
|
|
635
|
+
xref: Reference,
|
|
636
|
+
*,
|
|
637
|
+
provenance: list[Reference | OBOLiteral],
|
|
638
|
+
macro_config: MacroConfig | None = None,
|
|
639
|
+
) -> Stanza:
|
|
640
|
+
annotations = [Annotation(v.has_dbxref, p) for p in provenance]
|
|
641
|
+
|
|
642
|
+
if macro_config is not None:
|
|
643
|
+
if xref.prefix in macro_config.treat_xrefs_as_equivalent:
|
|
644
|
+
return term.append_equivalent(xref, annotations=annotations)
|
|
645
|
+
elif object_property := macro_config.treat_xrefs_as_genus_differentia.get(xref.prefix):
|
|
646
|
+
# TODO how to add annotations here?
|
|
647
|
+
if annotations:
|
|
648
|
+
logger.warning(
|
|
649
|
+
"[%s] unable to add provenance to xref upgraded to intersection_of: %s",
|
|
650
|
+
term.reference.curie,
|
|
651
|
+
xref,
|
|
652
|
+
)
|
|
653
|
+
return term.append_intersection_of(xref).append_intersection_of(object_property)
|
|
654
|
+
elif predicate := macro_config.treat_xrefs_as_relationship.get(xref.prefix):
|
|
655
|
+
return term.append_relationship(predicate, xref, annotations=annotations)
|
|
656
|
+
elif xref.prefix in macro_config.treat_xrefs_as_is_a:
|
|
657
|
+
return term.append_parent(xref, annotations=annotations)
|
|
658
|
+
|
|
659
|
+
# TODO this is not what spec calls for, maybe
|
|
660
|
+
# need a flag in macro config for this
|
|
661
|
+
if xref.prefix in PROVENANCE_PREFIXES:
|
|
662
|
+
return term.append_provenance(xref, annotations=annotations)
|
|
663
|
+
|
|
664
|
+
return term.append_xref(xref, annotations=annotations)
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
SUBSET_ERROR_COUNTER: Counter[tuple[str, str]] = Counter()
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
def _get_subsetdefs(graph: nx.MultiDiGraph, ontology_prefix: str) -> list[tuple[Reference, str]]:
|
|
671
|
+
rv = []
|
|
672
|
+
for subsetdef in graph.get("subsetdef", []):
|
|
673
|
+
left, _, right = subsetdef.partition(" ")
|
|
674
|
+
if not right:
|
|
675
|
+
logger.warning("[%s] subsetdef did not have two parts", ontology_prefix, subsetdef)
|
|
676
|
+
continue
|
|
677
|
+
left_ref = _obo_parse_identifier(
|
|
678
|
+
left,
|
|
679
|
+
ontology_prefix=ontology_prefix,
|
|
680
|
+
name=right,
|
|
681
|
+
line=subsetdef,
|
|
682
|
+
counter=SUBSET_ERROR_COUNTER,
|
|
683
|
+
)
|
|
684
|
+
if left_ref is None:
|
|
685
|
+
continue
|
|
686
|
+
right = right.strip('"')
|
|
687
|
+
rv.append((left_ref, right))
|
|
688
|
+
return rv
|
|
260
689
|
|
|
261
690
|
|
|
262
691
|
def _clean_graph_ontology(graph, prefix: str) -> None:
|
|
263
692
|
"""Update the ontology entry in the graph's metadata, if necessary."""
|
|
264
693
|
if "ontology" not in graph.graph:
|
|
265
|
-
logger.
|
|
694
|
+
logger.debug('[%s] missing "ontology" key', prefix)
|
|
266
695
|
graph.graph["ontology"] = prefix
|
|
267
696
|
elif not graph.graph["ontology"].isalpha():
|
|
268
|
-
logger.
|
|
269
|
-
"[%s] ontology
|
|
697
|
+
logger.debug(
|
|
698
|
+
"[%s] ontology prefix `%s` has a strange format. replacing with prefix",
|
|
270
699
|
prefix,
|
|
271
700
|
graph.graph["ontology"],
|
|
272
701
|
)
|
|
273
702
|
graph.graph["ontology"] = prefix
|
|
274
703
|
|
|
275
704
|
|
|
705
|
+
def _clean_graph_version(
|
|
706
|
+
graph, ontology_prefix: str, version: str | None, date: datetime | None
|
|
707
|
+
) -> str | None:
|
|
708
|
+
if ontology_prefix in STATIC_VERSION_REWRITES:
|
|
709
|
+
return STATIC_VERSION_REWRITES[ontology_prefix]
|
|
710
|
+
|
|
711
|
+
data_version: str | None = graph.graph.get("data-version") or None
|
|
712
|
+
if version:
|
|
713
|
+
clean_injected_version = cleanup_version(version, prefix=ontology_prefix)
|
|
714
|
+
if not data_version:
|
|
715
|
+
logger.debug(
|
|
716
|
+
"[%s] did not have a version, overriding with %s",
|
|
717
|
+
ontology_prefix,
|
|
718
|
+
clean_injected_version,
|
|
719
|
+
)
|
|
720
|
+
return clean_injected_version
|
|
721
|
+
|
|
722
|
+
clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
|
|
723
|
+
if clean_data_version != clean_injected_version:
|
|
724
|
+
# in this case, we're going to trust the one that's passed
|
|
725
|
+
# through explicitly more than the graph's content
|
|
726
|
+
logger.debug(
|
|
727
|
+
"[%s] had version %s, overriding with %s", ontology_prefix, data_version, version
|
|
728
|
+
)
|
|
729
|
+
return clean_injected_version
|
|
730
|
+
|
|
731
|
+
if data_version:
|
|
732
|
+
clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
|
|
733
|
+
logger.debug("[%s] using version %s", ontology_prefix, clean_data_version)
|
|
734
|
+
return clean_data_version
|
|
735
|
+
|
|
736
|
+
if date is not None:
|
|
737
|
+
derived_date_version = date.strftime("%Y-%m-%d")
|
|
738
|
+
logger.debug(
|
|
739
|
+
"[%s] does not report a version. falling back to date: %s",
|
|
740
|
+
ontology_prefix,
|
|
741
|
+
derived_date_version,
|
|
742
|
+
)
|
|
743
|
+
return derived_date_version
|
|
744
|
+
|
|
745
|
+
logger.debug("[%s] does not report a version nor a date", ontology_prefix)
|
|
746
|
+
return None
|
|
747
|
+
|
|
748
|
+
|
|
276
749
|
def _iter_obo_graph(
|
|
277
750
|
graph: nx.MultiDiGraph,
|
|
278
751
|
*,
|
|
279
|
-
strict: bool =
|
|
280
|
-
|
|
752
|
+
strict: bool = False,
|
|
753
|
+
ontology_prefix: str,
|
|
754
|
+
use_tqdm: bool = False,
|
|
755
|
+
upgrade: bool,
|
|
756
|
+
) -> Iterable[tuple[Reference, Mapping[str, Any]]]:
|
|
281
757
|
"""Iterate over the nodes in the graph with the prefix stripped (if it's there)."""
|
|
282
|
-
for node, data in
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
758
|
+
for node, data in tqdm(
|
|
759
|
+
graph.nodes(data=True), disable=not use_tqdm, unit_scale=True, desc=f"[{ontology_prefix}]"
|
|
760
|
+
):
|
|
761
|
+
name = data.get("name")
|
|
762
|
+
match _parse_str_or_curie_or_uri_helper(
|
|
763
|
+
node,
|
|
764
|
+
ontology_prefix=ontology_prefix,
|
|
765
|
+
name=name,
|
|
766
|
+
upgrade=upgrade,
|
|
767
|
+
context="stanza ID",
|
|
768
|
+
):
|
|
769
|
+
case Reference() as reference:
|
|
770
|
+
yield reference, data
|
|
771
|
+
case NotCURIEError() as exc:
|
|
772
|
+
if _is_valid_identifier(node):
|
|
773
|
+
yield default_reference(ontology_prefix, node, name=name), data
|
|
774
|
+
elif strict:
|
|
775
|
+
raise exc
|
|
776
|
+
else:
|
|
777
|
+
logger.warning(str(exc))
|
|
778
|
+
case ParseError() as exc:
|
|
779
|
+
if strict:
|
|
780
|
+
raise exc
|
|
781
|
+
else:
|
|
782
|
+
logger.warning(str(exc))
|
|
783
|
+
# if blacklisted, just skip it with no warning
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
def _get_date(graph, ontology_prefix: str) -> datetime | None:
|
|
290
787
|
try:
|
|
291
788
|
rv = datetime.strptime(graph.graph["date"], DATE_FORMAT)
|
|
292
789
|
except KeyError:
|
|
293
|
-
logger.info("[%s] does not report a date",
|
|
790
|
+
logger.info("[%s] does not report a date", ontology_prefix)
|
|
294
791
|
return None
|
|
295
792
|
except ValueError:
|
|
296
|
-
logger.info(
|
|
793
|
+
logger.info(
|
|
794
|
+
"[%s] reports a date that can't be parsed: %s", ontology_prefix, graph.graph["date"]
|
|
795
|
+
)
|
|
297
796
|
return None
|
|
298
797
|
else:
|
|
299
798
|
return rv
|
|
300
799
|
|
|
301
800
|
|
|
302
|
-
def _get_name(graph,
|
|
801
|
+
def _get_name(graph, ontology_prefix: str) -> str:
|
|
303
802
|
try:
|
|
304
803
|
rv = graph.graph["name"]
|
|
305
804
|
except KeyError:
|
|
306
|
-
logger.info("[%s] does not report a name",
|
|
307
|
-
rv =
|
|
805
|
+
logger.info("[%s] does not report a name", ontology_prefix)
|
|
806
|
+
rv = ontology_prefix
|
|
308
807
|
return rv
|
|
309
808
|
|
|
310
809
|
|
|
311
810
|
def iterate_graph_synonym_typedefs(
|
|
312
|
-
graph: nx.MultiDiGraph, *,
|
|
811
|
+
graph: nx.MultiDiGraph, *, ontology_prefix: str, strict: bool = False, upgrade: bool
|
|
313
812
|
) -> Iterable[SynonymTypeDef]:
|
|
314
813
|
"""Get synonym type definitions from an :mod:`obonet` graph."""
|
|
315
|
-
for
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
else: # assume it's a curie
|
|
323
|
-
reference = Reference.from_curie(sid, name=name, strict=strict)
|
|
324
|
-
|
|
325
|
-
if reference is None:
|
|
814
|
+
for line in graph.graph.get("synonymtypedef", []):
|
|
815
|
+
# TODO handle trailing comments
|
|
816
|
+
line, _, specificity = (x.strip() for x in line.rpartition('"'))
|
|
817
|
+
specificity = specificity.upper()
|
|
818
|
+
if not specificity:
|
|
819
|
+
specificity = None
|
|
820
|
+
elif specificity not in t.get_args(SynonymScope):
|
|
326
821
|
if strict:
|
|
327
|
-
raise ValueError(f"
|
|
328
|
-
|
|
329
|
-
|
|
822
|
+
raise ValueError(f"invalid synonym specificty: {specificity}")
|
|
823
|
+
logger.warning("[%s] invalid synonym specificty: %s", ontology_prefix, specificity)
|
|
824
|
+
specificity = None
|
|
330
825
|
|
|
331
|
-
|
|
826
|
+
curie, name = line.split(" ", 1)
|
|
827
|
+
# the name should be in quotes, so strip them out
|
|
828
|
+
name = name.strip().strip('"')
|
|
829
|
+
# TODO unquote the string?
|
|
830
|
+
reference = _obo_parse_identifier(
|
|
831
|
+
curie,
|
|
832
|
+
ontology_prefix=ontology_prefix,
|
|
833
|
+
name=name,
|
|
834
|
+
upgrade=upgrade,
|
|
835
|
+
strict=strict,
|
|
836
|
+
)
|
|
837
|
+
if reference is None:
|
|
838
|
+
logger.warning("[%s] unable to parse synonym typedef ID %s", ontology_prefix, curie)
|
|
839
|
+
continue
|
|
840
|
+
yield SynonymTypeDef(reference=reference, specificity=specificity)
|
|
332
841
|
|
|
333
842
|
|
|
334
|
-
def
|
|
335
|
-
graph: nx.MultiDiGraph,
|
|
843
|
+
def iterate_typedefs(
|
|
844
|
+
graph: nx.MultiDiGraph,
|
|
845
|
+
*,
|
|
846
|
+
ontology_prefix: str,
|
|
847
|
+
strict: bool = False,
|
|
848
|
+
upgrade: bool,
|
|
849
|
+
macro_config: MacroConfig | None = None,
|
|
336
850
|
) -> Iterable[TypeDef]:
|
|
337
851
|
"""Get type definitions from an :mod:`obonet` graph."""
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
852
|
+
if macro_config is None:
|
|
853
|
+
macro_config = MacroConfig(strict=strict, ontology_prefix=ontology_prefix)
|
|
854
|
+
# can't really have a pre-defined set of synonym typedefs here!
|
|
855
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] = {}
|
|
856
|
+
typedefs: Mapping[ReferenceTuple, TypeDef] = {}
|
|
857
|
+
missing_typedefs: set[ReferenceTuple] = set()
|
|
858
|
+
for data in graph.graph.get("typedefs", []):
|
|
859
|
+
if "id" in data:
|
|
860
|
+
typedef_id = data["id"]
|
|
861
|
+
elif "identifier" in data:
|
|
862
|
+
typedef_id = data["identifier"]
|
|
343
863
|
else:
|
|
344
|
-
raise KeyError
|
|
864
|
+
raise KeyError("typedef is missing an `id`")
|
|
345
865
|
|
|
346
|
-
name =
|
|
866
|
+
name = data.get("name")
|
|
347
867
|
if name is None:
|
|
348
|
-
logger.debug("[%s] typedef %s is missing a name",
|
|
868
|
+
logger.debug("[%s] typedef %s is missing a name", ontology_prefix, typedef_id)
|
|
349
869
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
reference = Reference(prefix=graph.graph["ontology"], identifier=curie, name=name)
|
|
870
|
+
reference = _obo_parse_identifier(
|
|
871
|
+
typedef_id, strict=strict, ontology_prefix=ontology_prefix, name=name, upgrade=upgrade
|
|
872
|
+
)
|
|
354
873
|
if reference is None:
|
|
355
|
-
logger.warning("[%s] unable to parse typedef
|
|
874
|
+
logger.warning("[%s] unable to parse typedef ID %s", ontology_prefix, typedef_id)
|
|
356
875
|
continue
|
|
357
876
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
877
|
+
typedef = TypeDef(
|
|
878
|
+
reference=reference,
|
|
879
|
+
namespace=data.get("namespace"),
|
|
880
|
+
is_metadata_tag=_get_boolean(data, "is_metadata_tag"),
|
|
881
|
+
is_class_level=_get_boolean(data, "is_class_level"),
|
|
882
|
+
builtin=_get_boolean(data, "builtin"),
|
|
883
|
+
is_obsolete=_get_boolean(data, "is_obsolete"),
|
|
884
|
+
is_anonymous=_get_boolean(data, "is_anonymous"),
|
|
885
|
+
is_anti_symmetric=_get_boolean(data, "is_anti_symmetric"),
|
|
886
|
+
is_symmetric=_get_boolean(data, "is_symmetric"),
|
|
887
|
+
is_reflexive=_get_boolean(data, "is_reflexive"),
|
|
888
|
+
is_cyclic=_get_boolean(data, "is_cyclic"),
|
|
889
|
+
is_transitive=_get_boolean(data, "is_transitive"),
|
|
890
|
+
is_functional=_get_boolean(data, "is_functional"),
|
|
891
|
+
is_inverse_functional=_get_boolean(data, "is_inverse_functional"),
|
|
892
|
+
domain=_get_reference(data, "domain", ontology_prefix=ontology_prefix, strict=strict),
|
|
893
|
+
range=_get_reference(data, "range", ontology_prefix=ontology_prefix, strict=strict),
|
|
894
|
+
inverse=_get_reference(
|
|
895
|
+
data, "inverse_of", ontology_prefix=ontology_prefix, strict=strict
|
|
896
|
+
),
|
|
897
|
+
)
|
|
898
|
+
_process_alts(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
899
|
+
_process_parents(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
900
|
+
_process_synonyms(
|
|
901
|
+
typedef,
|
|
902
|
+
data,
|
|
903
|
+
ontology_prefix=ontology_prefix,
|
|
904
|
+
strict=strict,
|
|
905
|
+
upgrade=upgrade,
|
|
906
|
+
synonym_typedefs=synonym_typedefs,
|
|
907
|
+
)
|
|
908
|
+
_process_xrefs(
|
|
909
|
+
typedef,
|
|
910
|
+
data,
|
|
911
|
+
ontology_prefix=ontology_prefix,
|
|
912
|
+
strict=strict,
|
|
913
|
+
macro_config=macro_config,
|
|
914
|
+
upgrade=upgrade,
|
|
915
|
+
)
|
|
916
|
+
_process_properties(
|
|
917
|
+
typedef,
|
|
918
|
+
data,
|
|
919
|
+
ontology_prefix=ontology_prefix,
|
|
920
|
+
strict=strict,
|
|
921
|
+
upgrade=upgrade,
|
|
922
|
+
typedefs=typedefs,
|
|
923
|
+
)
|
|
924
|
+
_process_relations(
|
|
925
|
+
typedef,
|
|
926
|
+
data,
|
|
927
|
+
ontology_prefix=ontology_prefix,
|
|
928
|
+
strict=strict,
|
|
929
|
+
upgrade=upgrade,
|
|
930
|
+
typedefs=typedefs,
|
|
931
|
+
missing_typedefs=missing_typedefs,
|
|
932
|
+
)
|
|
933
|
+
_process_replaced_by(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
934
|
+
_process_subsets(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
935
|
+
_process_intersection_of(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
936
|
+
_process_union_of(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
937
|
+
_process_equivalent_to(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
938
|
+
_process_disjoint_from(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
939
|
+
_process_consider(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
940
|
+
_process_comment(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
941
|
+
_process_description(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
942
|
+
_process_creation_date(typedef, data)
|
|
943
|
+
|
|
944
|
+
# the next 4 are typedef-specific
|
|
945
|
+
_process_equivalent_to_chain(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
946
|
+
_process_holds_over_chain(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
947
|
+
typedef.disjoint_over.extend(
|
|
948
|
+
iterate_node_reference_tag(
|
|
949
|
+
"disjoint_over",
|
|
950
|
+
data,
|
|
951
|
+
node=typedef.reference,
|
|
952
|
+
ontology_prefix=ontology_prefix,
|
|
953
|
+
strict=strict,
|
|
954
|
+
)
|
|
955
|
+
)
|
|
956
|
+
typedef.transitive_over.extend(
|
|
957
|
+
iterate_node_reference_tag(
|
|
958
|
+
"transitive_over",
|
|
959
|
+
data,
|
|
960
|
+
node=typedef.reference,
|
|
961
|
+
ontology_prefix=ontology_prefix,
|
|
962
|
+
strict=strict,
|
|
963
|
+
)
|
|
964
|
+
)
|
|
965
|
+
|
|
966
|
+
yield typedef
|
|
967
|
+
|
|
968
|
+
|
|
969
|
+
def _process_consider(stanza: Stanza, data, *, ontology_prefix: str, strict: bool = False):
|
|
970
|
+
for reference in iterate_node_reference_tag(
|
|
971
|
+
"consider",
|
|
972
|
+
data,
|
|
973
|
+
node=stanza.reference,
|
|
974
|
+
ontology_prefix=ontology_prefix,
|
|
975
|
+
strict=strict,
|
|
976
|
+
):
|
|
977
|
+
stanza.append_see_also(reference)
|
|
978
|
+
|
|
979
|
+
|
|
980
|
+
def _process_equivalent_to_chain(
|
|
981
|
+
typedef: TypeDef, data, *, ontology_prefix: str, strict: bool = False
|
|
982
|
+
) -> None:
|
|
983
|
+
for chain in _iterate_chain(
|
|
984
|
+
"equivalent_to_chain", typedef, data, ontology_prefix=ontology_prefix, strict=strict
|
|
985
|
+
):
|
|
986
|
+
typedef.equivalent_to_chain.append(chain)
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
def _process_holds_over_chain(
|
|
990
|
+
typedef: TypeDef, data, *, ontology_prefix: str, strict: bool = False
|
|
991
|
+
) -> None:
|
|
992
|
+
for chain in _iterate_chain(
|
|
993
|
+
"holds_over_chain", typedef, data, ontology_prefix=ontology_prefix, strict=strict
|
|
994
|
+
):
|
|
995
|
+
typedef.holds_over_chain.append(chain)
|
|
996
|
+
|
|
997
|
+
|
|
998
|
+
def _iterate_chain(
|
|
999
|
+
tag: str, typedef: TypeDef, data, *, ontology_prefix: str, strict: bool = False
|
|
1000
|
+
) -> Iterable[list[Reference]]:
|
|
1001
|
+
for chain in data.get(tag, []):
|
|
1002
|
+
# chain is a list of CURIEs
|
|
1003
|
+
predicate_chain = _process_chain_helper(typedef, chain, ontology_prefix=ontology_prefix)
|
|
1004
|
+
if predicate_chain is None:
|
|
1005
|
+
logger.warning(
|
|
1006
|
+
"[%s - %s] could not parse line: %s: %s",
|
|
1007
|
+
ontology_prefix,
|
|
1008
|
+
typedef.curie,
|
|
1009
|
+
tag,
|
|
1010
|
+
chain,
|
|
1011
|
+
)
|
|
1012
|
+
else:
|
|
1013
|
+
yield predicate_chain
|
|
1014
|
+
|
|
1015
|
+
|
|
1016
|
+
def _process_chain_helper(
|
|
1017
|
+
term: Stanza, chain: str, ontology_prefix: str, strict: bool = False
|
|
1018
|
+
) -> list[Reference] | None:
|
|
1019
|
+
rv = []
|
|
1020
|
+
for curie in chain.split():
|
|
1021
|
+
curie = curie.strip()
|
|
1022
|
+
r = _obo_parse_identifier(
|
|
1023
|
+
curie, ontology_prefix=ontology_prefix, strict=strict, node=term.reference
|
|
1024
|
+
)
|
|
1025
|
+
if r is None:
|
|
1026
|
+
return None
|
|
1027
|
+
rv.append(r)
|
|
1028
|
+
return rv
|
|
364
1029
|
|
|
365
1030
|
|
|
366
1031
|
def get_definition(
|
|
367
|
-
data, *,
|
|
368
|
-
) ->
|
|
1032
|
+
data, *, node: Reference, ontology_prefix: str, strict: bool = False
|
|
1033
|
+
) -> tuple[None | str, list[Reference | OBOLiteral]]:
|
|
369
1034
|
"""Extract the definition from the data."""
|
|
370
1035
|
definition = data.get("def") # it's allowed not to have a definition
|
|
371
1036
|
if not definition:
|
|
372
|
-
return None,
|
|
373
|
-
return _extract_definition(
|
|
1037
|
+
return None, []
|
|
1038
|
+
return _extract_definition(
|
|
1039
|
+
definition, node=node, strict=strict, ontology_prefix=ontology_prefix
|
|
1040
|
+
)
|
|
374
1041
|
|
|
375
1042
|
|
|
376
1043
|
def _extract_definition(
|
|
377
1044
|
s: str,
|
|
378
1045
|
*,
|
|
379
|
-
|
|
380
|
-
identifier: str,
|
|
1046
|
+
node: Reference,
|
|
381
1047
|
strict: bool = False,
|
|
382
|
-
|
|
1048
|
+
ontology_prefix: str,
|
|
1049
|
+
) -> tuple[None | str, list[Reference | OBOLiteral]]:
|
|
383
1050
|
"""Extract the definitions."""
|
|
384
1051
|
if not s.startswith('"'):
|
|
385
|
-
|
|
1052
|
+
logger.warning(f"[{node.curie}] definition does not start with a quote")
|
|
1053
|
+
return None, []
|
|
386
1054
|
|
|
387
1055
|
try:
|
|
388
1056
|
definition, rest = _quote_split(s)
|
|
389
|
-
except ValueError:
|
|
390
|
-
logger.warning("[%s
|
|
391
|
-
return None,
|
|
1057
|
+
except ValueError as e:
|
|
1058
|
+
logger.warning("[%s] failed to parse definition quotes: %s", node.curie, str(e))
|
|
1059
|
+
return None, []
|
|
392
1060
|
|
|
393
|
-
if not rest.startswith("[")
|
|
394
|
-
logger.
|
|
1061
|
+
if not rest.startswith("["):
|
|
1062
|
+
logger.debug("[%s] no square brackets for provenance on line: %s", node.curie, s)
|
|
395
1063
|
provenance = []
|
|
396
1064
|
else:
|
|
397
|
-
|
|
398
|
-
|
|
1065
|
+
rest = rest.lstrip("[").rstrip("]") # FIXME this doesn't account for trailing annotations
|
|
1066
|
+
provenance = _parse_provenance_list(
|
|
1067
|
+
rest,
|
|
1068
|
+
node=node,
|
|
1069
|
+
ontology_prefix=ontology_prefix,
|
|
1070
|
+
counter=DEFINITION_PROVENANCE_COUNTER,
|
|
1071
|
+
scope_text="definition provenance",
|
|
1072
|
+
line=s,
|
|
1073
|
+
strict=strict,
|
|
1074
|
+
)
|
|
1075
|
+
return definition or None, provenance
|
|
399
1076
|
|
|
400
1077
|
|
|
401
|
-
def
|
|
1078
|
+
def get_first_nonescaped_quote(s: str) -> int | None:
|
|
1079
|
+
"""Get the first non-escaped quote."""
|
|
1080
|
+
if not s:
|
|
1081
|
+
return None
|
|
1082
|
+
if s[0] == '"':
|
|
1083
|
+
# special case first position
|
|
1084
|
+
return 0
|
|
402
1085
|
for i, (a, b) in enumerate(pairwise(s), start=1):
|
|
403
1086
|
if b == '"' and a != "\\":
|
|
404
1087
|
return i
|
|
@@ -406,10 +1089,12 @@ def _get_first_nonquoted(s: str) -> Optional[int]:
|
|
|
406
1089
|
|
|
407
1090
|
|
|
408
1091
|
def _quote_split(s: str) -> tuple[str, str]:
|
|
409
|
-
|
|
410
|
-
|
|
1092
|
+
if not s.startswith('"'):
|
|
1093
|
+
raise ValueError(f"'{s}' does not start with a quote")
|
|
1094
|
+
s = s.removeprefix('"')
|
|
1095
|
+
i = get_first_nonescaped_quote(s)
|
|
411
1096
|
if i is None:
|
|
412
|
-
raise ValueError
|
|
1097
|
+
raise ValueError(f"no closing quote found in `{s}`")
|
|
413
1098
|
return _clean_definition(s[:i].strip()), s[i + 1 :].strip()
|
|
414
1099
|
|
|
415
1100
|
|
|
@@ -421,78 +1106,64 @@ def _clean_definition(s: str) -> str:
|
|
|
421
1106
|
|
|
422
1107
|
def _extract_synonym(
|
|
423
1108
|
s: str,
|
|
424
|
-
synonym_typedefs: Mapping[
|
|
1109
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
|
|
425
1110
|
*,
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
1111
|
+
node: Reference,
|
|
1112
|
+
strict: bool = False,
|
|
1113
|
+
ontology_prefix: str,
|
|
1114
|
+
upgrade: bool,
|
|
1115
|
+
) -> Synonym | None:
|
|
430
1116
|
# TODO check if the synonym is written like a CURIE... it shouldn't but I've seen it happen
|
|
431
1117
|
try:
|
|
432
1118
|
name, rest = _quote_split(s)
|
|
433
1119
|
except ValueError:
|
|
434
|
-
logger.warning("[%s
|
|
1120
|
+
logger.warning("[%s] invalid synonym: %s", node.curie, s)
|
|
435
1121
|
return None
|
|
436
1122
|
|
|
437
|
-
specificity
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
elif rest.startswith(_stype.preferred_curie):
|
|
455
|
-
rest = rest[len(_stype.preferred_curie) :].strip()
|
|
456
|
-
stype = _stype
|
|
457
|
-
break
|
|
458
|
-
elif rest.startswith(_stype.identifier):
|
|
459
|
-
rest = rest[len(_stype.identifier) :].strip()
|
|
460
|
-
stype = _stype
|
|
461
|
-
break
|
|
462
|
-
|
|
463
|
-
if not rest.startswith("[") or not rest.endswith("]"):
|
|
464
|
-
logger.warning("[%s:%s] problem with synonym: %s", prefix, identifier, s)
|
|
465
|
-
return None
|
|
1123
|
+
specificity, rest = _chomp_specificity(rest)
|
|
1124
|
+
synonym_typedef, rest = _chomp_typedef(
|
|
1125
|
+
rest,
|
|
1126
|
+
synonym_typedefs=synonym_typedefs,
|
|
1127
|
+
strict=strict,
|
|
1128
|
+
node=node,
|
|
1129
|
+
ontology_prefix=ontology_prefix,
|
|
1130
|
+
upgrade=upgrade,
|
|
1131
|
+
)
|
|
1132
|
+
provenance, rest = _chomp_references(
|
|
1133
|
+
rest,
|
|
1134
|
+
strict=strict,
|
|
1135
|
+
node=node,
|
|
1136
|
+
ontology_prefix=ontology_prefix,
|
|
1137
|
+
line=s,
|
|
1138
|
+
)
|
|
1139
|
+
annotations = _chomp_axioms(rest, node=node, strict=strict)
|
|
466
1140
|
|
|
467
|
-
provenance = _parse_trailing_ref_list(rest, strict=strict)
|
|
468
1141
|
return Synonym(
|
|
469
1142
|
name=name,
|
|
470
|
-
specificity=specificity
|
|
471
|
-
type=
|
|
472
|
-
provenance=provenance,
|
|
1143
|
+
specificity=specificity,
|
|
1144
|
+
type=synonym_typedef.reference if synonym_typedef else None,
|
|
1145
|
+
provenance=list(provenance or []),
|
|
1146
|
+
annotations=annotations,
|
|
473
1147
|
)
|
|
474
1148
|
|
|
475
1149
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
return [
|
|
479
|
-
Reference.from_curie(curie.strip(), strict=strict)
|
|
480
|
-
for curie in rest.split(",")
|
|
481
|
-
if curie.strip()
|
|
482
|
-
]
|
|
1150
|
+
#: A counter for errors in parsing provenance
|
|
1151
|
+
DEFINITION_PROVENANCE_COUNTER: Counter[tuple[str, str]] = Counter()
|
|
483
1152
|
|
|
484
1153
|
|
|
485
1154
|
def iterate_node_synonyms(
|
|
486
1155
|
data: Mapping[str, Any],
|
|
487
|
-
synonym_typedefs: Mapping[
|
|
1156
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
|
|
488
1157
|
*,
|
|
489
|
-
|
|
490
|
-
identifier: str,
|
|
1158
|
+
node: Reference,
|
|
491
1159
|
strict: bool = False,
|
|
1160
|
+
ontology_prefix: str,
|
|
1161
|
+
upgrade: bool,
|
|
492
1162
|
) -> Iterable[Synonym]:
|
|
493
1163
|
"""Extract synonyms from a :mod:`obonet` node's data.
|
|
494
1164
|
|
|
495
|
-
Example strings
|
|
1165
|
+
Example strings
|
|
1166
|
+
|
|
496
1167
|
- "LTEC I" EXACT [Orphanet:93938,DOI:xxxx]
|
|
497
1168
|
- "LTEC I" EXACT [Orphanet:93938]
|
|
498
1169
|
- "LTEC I" [Orphanet:93938]
|
|
@@ -500,121 +1171,409 @@ def iterate_node_synonyms(
|
|
|
500
1171
|
"""
|
|
501
1172
|
for s in data.get("synonym", []):
|
|
502
1173
|
s = _extract_synonym(
|
|
503
|
-
s,
|
|
1174
|
+
s,
|
|
1175
|
+
synonym_typedefs,
|
|
1176
|
+
node=node,
|
|
1177
|
+
strict=strict,
|
|
1178
|
+
ontology_prefix=ontology_prefix,
|
|
1179
|
+
upgrade=upgrade,
|
|
504
1180
|
)
|
|
505
1181
|
if s is not None:
|
|
506
1182
|
yield s
|
|
507
1183
|
|
|
508
1184
|
|
|
509
|
-
HANDLED_PROPERTY_TYPES = {
|
|
510
|
-
"xsd:string": str,
|
|
511
|
-
"xsd:dateTime": datetime,
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
|
|
515
1185
|
def iterate_node_properties(
|
|
516
|
-
data: Mapping[str, Any],
|
|
517
|
-
|
|
1186
|
+
data: Mapping[str, Any],
|
|
1187
|
+
*,
|
|
1188
|
+
node: Reference,
|
|
1189
|
+
strict: bool = False,
|
|
1190
|
+
ontology_prefix: str,
|
|
1191
|
+
upgrade: bool,
|
|
1192
|
+
context: str,
|
|
1193
|
+
) -> Iterable[Annotation]:
|
|
518
1194
|
"""Extract properties from a :mod:`obonet` node's data."""
|
|
519
1195
|
for prop_value_type in data.get("property_value", []):
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
1196
|
+
if yv := _handle_prop(
|
|
1197
|
+
prop_value_type,
|
|
1198
|
+
node=node,
|
|
1199
|
+
strict=strict,
|
|
1200
|
+
ontology_prefix=ontology_prefix,
|
|
1201
|
+
upgrade=upgrade,
|
|
1202
|
+
context=context,
|
|
1203
|
+
):
|
|
1204
|
+
yield yv
|
|
1205
|
+
|
|
1206
|
+
|
|
1207
|
+
#: Keep track of property-value pairs for which the value couldn't be parsed,
|
|
1208
|
+
#: such as `dc:conformsTo autoimmune:inflammation.yaml` in MONDO
|
|
1209
|
+
UNHANDLED_PROP_OBJECTS: Counter[tuple[str, str]] = Counter()
|
|
1210
|
+
|
|
1211
|
+
UNHANDLED_PROPS: Counter[tuple[str, str]] = Counter()
|
|
1212
|
+
|
|
1213
|
+
|
|
1214
|
+
def _handle_prop(
|
|
1215
|
+
prop_value_type: str,
|
|
1216
|
+
*,
|
|
1217
|
+
node: Reference,
|
|
1218
|
+
strict: bool = False,
|
|
1219
|
+
ontology_prefix: str,
|
|
1220
|
+
upgrade: bool,
|
|
1221
|
+
context: str | None,
|
|
1222
|
+
) -> Annotation | None:
|
|
1223
|
+
try:
|
|
1224
|
+
prop, value_type = prop_value_type.split(" ", 1)
|
|
1225
|
+
except ValueError:
|
|
1226
|
+
logger.warning("[%s] property_value is missing a space: %s", node.curie, prop_value_type)
|
|
1227
|
+
return None
|
|
1228
|
+
|
|
1229
|
+
prop_reference = _get_prop(
|
|
1230
|
+
prop,
|
|
1231
|
+
node=node,
|
|
1232
|
+
strict=strict,
|
|
1233
|
+
ontology_prefix=ontology_prefix,
|
|
1234
|
+
upgrade=upgrade,
|
|
1235
|
+
line=prop_value_type,
|
|
1236
|
+
counter=UNHANDLED_PROPS,
|
|
1237
|
+
context=context,
|
|
1238
|
+
)
|
|
1239
|
+
if prop_reference is None:
|
|
1240
|
+
return None
|
|
1241
|
+
|
|
1242
|
+
value_type = value_type.strip()
|
|
1243
|
+
datatype: Reference | None
|
|
1244
|
+
if " " not in value_type:
|
|
1245
|
+
value, datatype = value_type, None
|
|
1246
|
+
else:
|
|
1247
|
+
value, datatype_raw = (s.strip() for s in value_type.rsplit(" ", 1))
|
|
1248
|
+
match _parse_str_or_curie_or_uri_helper(
|
|
1249
|
+
datatype_raw,
|
|
1250
|
+
ontology_prefix=ontology_prefix,
|
|
1251
|
+
node=node,
|
|
1252
|
+
predicate=prop_reference,
|
|
1253
|
+
line=prop_value_type,
|
|
1254
|
+
upgrade=upgrade,
|
|
1255
|
+
context="property datatype",
|
|
1256
|
+
):
|
|
1257
|
+
case Reference() as datatype_:
|
|
1258
|
+
datatype = datatype_
|
|
1259
|
+
case BlocklistError():
|
|
1260
|
+
return None
|
|
1261
|
+
case ParseError() as exc:
|
|
1262
|
+
if strict:
|
|
1263
|
+
raise exc
|
|
1264
|
+
else:
|
|
1265
|
+
logger.warning(str(exc))
|
|
1266
|
+
return None
|
|
1267
|
+
|
|
1268
|
+
# if it's an empty string, like the ones removed in https://github.com/oborel/obo-relations/pull/830,
|
|
1269
|
+
# just quit
|
|
1270
|
+
if value == '""':
|
|
1271
|
+
return None
|
|
1272
|
+
|
|
1273
|
+
quoted = value.startswith('"') and value.endswith('"')
|
|
1274
|
+
value = value.strip('"').strip()
|
|
527
1275
|
|
|
1276
|
+
# first, special case datetimes. Whether it's quoted or not,
|
|
1277
|
+
# we always deal with this first
|
|
1278
|
+
if datatype and datatype.curie == "xsd:dateTime":
|
|
528
1279
|
try:
|
|
529
|
-
|
|
1280
|
+
obo_literal = OBOLiteral.datetime(value)
|
|
530
1281
|
except ValueError:
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
1282
|
+
logger.warning(
|
|
1283
|
+
"[%s - %s] could not parse date: %s", node.curie, prop_reference.curie, value
|
|
1284
|
+
)
|
|
1285
|
+
return None
|
|
1286
|
+
else:
|
|
1287
|
+
return Annotation(prop_reference, obo_literal)
|
|
1288
|
+
|
|
1289
|
+
if datatype and datatype.curie == "xsd:anyURI":
|
|
1290
|
+
match _parse_str_or_curie_or_uri_helper(
|
|
1291
|
+
value,
|
|
1292
|
+
node=node,
|
|
1293
|
+
predicate=prop_reference,
|
|
1294
|
+
ontology_prefix=ontology_prefix,
|
|
1295
|
+
line=prop_value_type,
|
|
1296
|
+
upgrade=upgrade,
|
|
1297
|
+
context="property object",
|
|
1298
|
+
):
|
|
1299
|
+
case Reference() as obj_reference:
|
|
1300
|
+
return Annotation(prop_reference, obj_reference)
|
|
1301
|
+
case BlocklistError():
|
|
1302
|
+
return None
|
|
1303
|
+
case UnparsableIRIError():
|
|
1304
|
+
return Annotation(prop_reference, OBOLiteral.uri(value))
|
|
1305
|
+
case ParseError() as exc:
|
|
1306
|
+
if strict:
|
|
1307
|
+
raise exc
|
|
1308
|
+
else:
|
|
1309
|
+
logger.warning(str(exc))
|
|
1310
|
+
return None
|
|
1311
|
+
|
|
1312
|
+
# if it's quoted and there's a data try parsing as a CURIE/URI anyway (this is a bit
|
|
1313
|
+
# aggressive, but more useful than spec).
|
|
1314
|
+
if quoted:
|
|
1315
|
+
# give a try parsing it anyway, just in case ;)
|
|
1316
|
+
match _parse_str_or_curie_or_uri_helper(
|
|
1317
|
+
value,
|
|
1318
|
+
ontology_prefix=ontology_prefix,
|
|
1319
|
+
node=node,
|
|
1320
|
+
line=prop_value_type,
|
|
1321
|
+
upgrade=upgrade,
|
|
1322
|
+
predicate=prop_reference,
|
|
1323
|
+
context="property object",
|
|
1324
|
+
):
|
|
1325
|
+
case Reference() as obj_reference:
|
|
1326
|
+
return Annotation(prop_reference, obj_reference)
|
|
1327
|
+
case BlocklistError():
|
|
1328
|
+
return None
|
|
1329
|
+
case ParseError():
|
|
1330
|
+
if datatype:
|
|
1331
|
+
return Annotation(prop_reference, OBOLiteral(value, datatype, None))
|
|
1332
|
+
else:
|
|
1333
|
+
return Annotation(prop_reference, OBOLiteral.string(value))
|
|
1334
|
+
else:
|
|
1335
|
+
if datatype:
|
|
1336
|
+
logger.debug(
|
|
1337
|
+
"[%s] throwing away datatype since no quotes were used: %s", node.curie, value_type
|
|
1338
|
+
)
|
|
1339
|
+
|
|
1340
|
+
# if it wasn't quoted and there was no datatype, go for parsing as an object
|
|
1341
|
+
match _obo_parse_identifier(
|
|
1342
|
+
value,
|
|
1343
|
+
strict=strict,
|
|
1344
|
+
ontology_prefix=ontology_prefix,
|
|
1345
|
+
node=node,
|
|
1346
|
+
predicate=prop_reference,
|
|
1347
|
+
line=prop_value_type,
|
|
1348
|
+
context="property object",
|
|
1349
|
+
counter=UNHANDLED_PROP_OBJECTS,
|
|
1350
|
+
):
|
|
1351
|
+
case Reference() as obj_reference:
|
|
1352
|
+
return Annotation(prop_reference, obj_reference)
|
|
1353
|
+
case None:
|
|
1354
|
+
return None
|
|
1355
|
+
|
|
1356
|
+
|
|
1357
|
+
def _get_prop(
|
|
1358
|
+
property_id: str,
|
|
1359
|
+
*,
|
|
1360
|
+
node: Reference,
|
|
1361
|
+
strict: bool,
|
|
1362
|
+
ontology_prefix: str,
|
|
1363
|
+
upgrade: bool,
|
|
1364
|
+
line: str,
|
|
1365
|
+
counter: Counter[tuple[str, str]] | None = None,
|
|
1366
|
+
context: str | None = None,
|
|
1367
|
+
) -> Reference | None:
|
|
1368
|
+
if rv := _parse_default_prop(property_id, ontology_prefix):
|
|
1369
|
+
return rv
|
|
1370
|
+
return _obo_parse_identifier(
|
|
1371
|
+
property_id,
|
|
1372
|
+
strict=strict,
|
|
1373
|
+
node=node,
|
|
1374
|
+
ontology_prefix=ontology_prefix,
|
|
1375
|
+
upgrade=upgrade,
|
|
1376
|
+
counter=counter,
|
|
1377
|
+
context=context,
|
|
1378
|
+
line=line,
|
|
1379
|
+
)
|
|
1380
|
+
|
|
1381
|
+
|
|
1382
|
+
def _parse_default_prop(property_id, ontology_prefix) -> Reference | None:
|
|
1383
|
+
for delim in "#/":
|
|
1384
|
+
sw = f"http://purl.obolibrary.org/obo/{ontology_prefix}{delim}"
|
|
1385
|
+
if property_id.startswith(sw):
|
|
1386
|
+
identifier = property_id.removeprefix(sw)
|
|
1387
|
+
return default_reference(ontology_prefix, identifier)
|
|
1388
|
+
return None
|
|
535
1389
|
|
|
536
1390
|
|
|
537
|
-
def
|
|
1391
|
+
def iterate_node_reference_tag(
|
|
1392
|
+
tag: str,
|
|
538
1393
|
data: Mapping[str, Any],
|
|
539
1394
|
*,
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
1395
|
+
node: Reference,
|
|
1396
|
+
strict: bool = False,
|
|
1397
|
+
ontology_prefix: str,
|
|
1398
|
+
upgrade: bool = True,
|
|
1399
|
+
counter: Counter[tuple[str, str]] | None = None,
|
|
543
1400
|
) -> Iterable[Reference]:
|
|
544
|
-
"""Extract
|
|
545
|
-
for
|
|
546
|
-
reference =
|
|
1401
|
+
"""Extract a list of CURIEs from the data."""
|
|
1402
|
+
for identifier in data.get(tag, []):
|
|
1403
|
+
reference = _obo_parse_identifier(
|
|
1404
|
+
identifier,
|
|
1405
|
+
strict=strict,
|
|
1406
|
+
node=node,
|
|
1407
|
+
ontology_prefix=ontology_prefix,
|
|
1408
|
+
upgrade=upgrade,
|
|
1409
|
+
counter=counter,
|
|
1410
|
+
)
|
|
547
1411
|
if reference is None:
|
|
548
1412
|
logger.warning(
|
|
549
|
-
"[%s
|
|
1413
|
+
"[%s] %s - could not parse identifier: %s", ontology_prefix, tag, identifier
|
|
550
1414
|
)
|
|
551
|
-
|
|
552
|
-
|
|
1415
|
+
else:
|
|
1416
|
+
yield reference
|
|
553
1417
|
|
|
554
1418
|
|
|
555
|
-
def
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
1419
|
+
def _process_intersection_of(
|
|
1420
|
+
term: Stanza,
|
|
1421
|
+
data: Mapping[str, Any],
|
|
1422
|
+
*,
|
|
1423
|
+
strict: bool = False,
|
|
1424
|
+
ontology_prefix: str,
|
|
1425
|
+
upgrade: bool = True,
|
|
1426
|
+
) -> None:
|
|
1427
|
+
"""Extract a list of CURIEs from the data."""
|
|
1428
|
+
for line in data.get("intersection_of", []):
|
|
1429
|
+
predicate_id, _, target_id = line.partition(" ")
|
|
1430
|
+
predicate = _obo_parse_identifier(
|
|
1431
|
+
predicate_id,
|
|
1432
|
+
strict=strict,
|
|
1433
|
+
node=term.reference,
|
|
1434
|
+
ontology_prefix=ontology_prefix,
|
|
1435
|
+
upgrade=upgrade,
|
|
1436
|
+
)
|
|
1437
|
+
if predicate is None:
|
|
1438
|
+
logger.warning("[%s] - could not parse intersection_of: %s", ontology_prefix, line)
|
|
1439
|
+
continue
|
|
1440
|
+
|
|
1441
|
+
if target_id:
|
|
1442
|
+
# this means that there's a second part, so let's try parsing it
|
|
1443
|
+
target = _obo_parse_identifier(
|
|
1444
|
+
target_id,
|
|
1445
|
+
strict=strict,
|
|
1446
|
+
node=term.reference,
|
|
1447
|
+
predicate=predicate,
|
|
1448
|
+
ontology_prefix=ontology_prefix,
|
|
1449
|
+
upgrade=upgrade,
|
|
1450
|
+
)
|
|
1451
|
+
if target is None:
|
|
1452
|
+
logger.warning(
|
|
1453
|
+
"[%s] could not parse intersection_of target: %s", ontology_prefix, line
|
|
1454
|
+
)
|
|
1455
|
+
continue
|
|
1456
|
+
term.append_intersection_of(predicate, target)
|
|
1457
|
+
else:
|
|
1458
|
+
term.append_intersection_of(predicate)
|
|
561
1459
|
|
|
562
1460
|
|
|
563
1461
|
def iterate_node_relationships(
|
|
564
1462
|
data: Mapping[str, Any],
|
|
565
1463
|
*,
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
1464
|
+
node: Reference,
|
|
1465
|
+
strict: bool = False,
|
|
1466
|
+
ontology_prefix: str,
|
|
1467
|
+
upgrade: bool,
|
|
569
1468
|
) -> Iterable[tuple[Reference, Reference]]:
|
|
570
1469
|
"""Extract relationships from a :mod:`obonet` node's data."""
|
|
571
|
-
for
|
|
572
|
-
relation_curie, target_curie =
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
target = Reference.from_curie(target_curie, strict=strict)
|
|
589
|
-
if target is None:
|
|
590
|
-
logger.warning(
|
|
591
|
-
"[%s:%s] %s could not parse target %s", prefix, identifier, relation, target_curie
|
|
592
|
-
)
|
|
593
|
-
continue
|
|
1470
|
+
for line in data.get("relationship", []):
|
|
1471
|
+
relation_curie, target_curie = line.split(" ")
|
|
1472
|
+
|
|
1473
|
+
predicate = _obo_parse_identifier(
|
|
1474
|
+
relation_curie,
|
|
1475
|
+
strict=strict,
|
|
1476
|
+
ontology_prefix=ontology_prefix,
|
|
1477
|
+
node=node,
|
|
1478
|
+
upgrade=upgrade,
|
|
1479
|
+
line=line,
|
|
1480
|
+
context="relationship predicate",
|
|
1481
|
+
)
|
|
1482
|
+
match predicate:
|
|
1483
|
+
# TODO extend with other exception handling
|
|
1484
|
+
case None:
|
|
1485
|
+
logger.warning("[%s] could not parse relation %s", node.curie, relation_curie)
|
|
1486
|
+
continue
|
|
594
1487
|
|
|
595
|
-
|
|
1488
|
+
match _parse_str_or_curie_or_uri_helper(
|
|
1489
|
+
target_curie,
|
|
1490
|
+
ontology_prefix=ontology_prefix,
|
|
1491
|
+
node=node,
|
|
1492
|
+
predicate=predicate,
|
|
1493
|
+
line=line,
|
|
1494
|
+
context="relationship target",
|
|
1495
|
+
upgrade=upgrade,
|
|
1496
|
+
):
|
|
1497
|
+
case Reference() as target:
|
|
1498
|
+
yield predicate, target
|
|
1499
|
+
case ParseError() as exc:
|
|
1500
|
+
if strict:
|
|
1501
|
+
raise exc
|
|
1502
|
+
else:
|
|
1503
|
+
logger.warning(str(exc))
|
|
596
1504
|
|
|
597
1505
|
|
|
598
1506
|
def iterate_node_xrefs(
|
|
599
|
-
*,
|
|
600
|
-
|
|
1507
|
+
*,
|
|
1508
|
+
data: Mapping[str, Any],
|
|
1509
|
+
strict: bool = False,
|
|
1510
|
+
ontology_prefix: str,
|
|
1511
|
+
node: Reference,
|
|
1512
|
+
upgrade: bool,
|
|
1513
|
+
) -> Iterable[tuple[Reference, list[Reference | OBOLiteral]]]:
|
|
601
1514
|
"""Extract xrefs from a :mod:`obonet` node's data."""
|
|
602
|
-
for
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
1515
|
+
for line in data.get("xref", []):
|
|
1516
|
+
line = line.strip()
|
|
1517
|
+
if pair := _parse_xref_line(
|
|
1518
|
+
line.strip(),
|
|
1519
|
+
strict=strict,
|
|
1520
|
+
node=node,
|
|
1521
|
+
ontology_prefix=ontology_prefix,
|
|
1522
|
+
upgrade=upgrade,
|
|
1523
|
+
):
|
|
1524
|
+
yield pair
|
|
1525
|
+
|
|
1526
|
+
|
|
1527
|
+
def _parse_xref_line(
|
|
1528
|
+
line: str, *, strict: bool = False, ontology_prefix: str, node: Reference, upgrade: bool
|
|
1529
|
+
) -> tuple[Reference, list[Reference | OBOLiteral]] | None:
|
|
1530
|
+
xref, _, rest = line.partition(" [")
|
|
1531
|
+
|
|
1532
|
+
rules = get_rules()
|
|
1533
|
+
|
|
1534
|
+
if rules.str_is_blocked(xref, context=ontology_prefix) or ":" not in xref:
|
|
1535
|
+
return None # sometimes xref to self... weird
|
|
1536
|
+
|
|
1537
|
+
xref = rules.remap_prefix(xref, context=ontology_prefix)
|
|
1538
|
+
|
|
1539
|
+
split_space = " " in xref
|
|
1540
|
+
if split_space:
|
|
1541
|
+
_xref_split = xref.split(" ", 1)
|
|
1542
|
+
if _xref_split[1][0] not in {'"', "("}:
|
|
1543
|
+
logger.debug("[%s] Problem with space in xref %s", node.curie, xref)
|
|
1544
|
+
return None
|
|
1545
|
+
xref = _xref_split[0]
|
|
1546
|
+
|
|
1547
|
+
xref_ref = _parse_str_or_curie_or_uri_helper(
|
|
1548
|
+
xref, ontology_prefix=ontology_prefix, node=node, line=line, context="xref", upgrade=upgrade
|
|
1549
|
+
)
|
|
1550
|
+
match xref_ref:
|
|
1551
|
+
case BlocklistError():
|
|
1552
|
+
return None
|
|
1553
|
+
case ParseError() as exc:
|
|
1554
|
+
if strict:
|
|
1555
|
+
raise exc
|
|
1556
|
+
else:
|
|
1557
|
+
if not XREF_PROVENANCE_COUNTER[ontology_prefix, xref]:
|
|
1558
|
+
logger.warning(str(exc))
|
|
1559
|
+
XREF_PROVENANCE_COUNTER[ontology_prefix, xref] += 1
|
|
1560
|
+
return None
|
|
1561
|
+
|
|
1562
|
+
if rest:
|
|
1563
|
+
rest_front, _, _rest_rest = rest.partition("]")
|
|
1564
|
+
provenance = _parse_provenance_list(
|
|
1565
|
+
rest_front,
|
|
1566
|
+
node=node,
|
|
1567
|
+
ontology_prefix=ontology_prefix,
|
|
1568
|
+
counter=XREF_PROVENANCE_COUNTER,
|
|
1569
|
+
scope_text="xref provenance",
|
|
1570
|
+
line=line,
|
|
1571
|
+
strict=strict,
|
|
1572
|
+
)
|
|
1573
|
+
else:
|
|
1574
|
+
provenance = []
|
|
607
1575
|
|
|
608
|
-
|
|
1576
|
+
return xref_ref, provenance
|
|
609
1577
|
|
|
610
|
-
split_space = " " in xref
|
|
611
|
-
if split_space:
|
|
612
|
-
_xref_split = xref.split(" ", 1)
|
|
613
|
-
if _xref_split[1][0] not in {'"', "("}:
|
|
614
|
-
logger.debug("[%s] Problem with space in xref %s", prefix, xref)
|
|
615
|
-
continue
|
|
616
|
-
xref = _xref_split[0]
|
|
617
1578
|
|
|
618
|
-
|
|
619
|
-
if yv is not None:
|
|
620
|
-
yield yv
|
|
1579
|
+
XREF_PROVENANCE_COUNTER: Counter[tuple[str, str]] = Counter()
|