pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -117
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +107 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +210 -160
- pyobo/cli/database_utils.py +155 -0
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +209 -191
- pyobo/gilda_utils.py +52 -250
- pyobo/identifier_utils/__init__.py +33 -0
- pyobo/identifier_utils/api.py +305 -0
- pyobo/identifier_utils/preprocessing.json +873 -0
- pyobo/identifier_utils/preprocessing.py +27 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +48 -40
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1354 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +9 -6
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +8 -13
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +11 -4
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +272 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1484 -657
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +16 -15
- pyobo/utils/io.py +51 -41
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +73 -70
- pyobo/version.py +3 -3
- pyobo-0.12.1.dist-info/METADATA +671 -0
- pyobo-0.12.1.dist-info/RECORD +201 -0
- pyobo-0.12.1.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo/xrefdb/xrefs_pipeline.py +0 -180
- pyobo-0.11.2.dist-info/METADATA +0 -711
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/struct/struct.py
CHANGED
|
@@ -1,111 +1,209 @@
|
|
|
1
1
|
"""Data structures for OBO."""
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime
|
|
6
|
+
import itertools as itt
|
|
4
7
|
import json
|
|
5
8
|
import logging
|
|
6
9
|
import os
|
|
7
10
|
import sys
|
|
8
|
-
|
|
9
|
-
from collections
|
|
11
|
+
import warnings
|
|
12
|
+
from collections import ChainMap, defaultdict
|
|
13
|
+
from collections.abc import Callable, Collection, Iterable, Iterator, Mapping, Sequence
|
|
10
14
|
from dataclasses import dataclass, field
|
|
11
|
-
from datetime import datetime
|
|
12
|
-
from operator import attrgetter
|
|
13
15
|
from pathlib import Path
|
|
14
16
|
from textwrap import dedent
|
|
15
|
-
from typing import
|
|
16
|
-
Any,
|
|
17
|
-
Callable,
|
|
18
|
-
ClassVar,
|
|
19
|
-
Optional,
|
|
20
|
-
TextIO,
|
|
21
|
-
Union,
|
|
22
|
-
)
|
|
17
|
+
from typing import Annotated, Any, ClassVar, TextIO
|
|
23
18
|
|
|
24
19
|
import bioregistry
|
|
25
20
|
import click
|
|
21
|
+
import curies
|
|
26
22
|
import networkx as nx
|
|
27
23
|
import pandas as pd
|
|
24
|
+
import ssslm
|
|
25
|
+
from curies import ReferenceTuple
|
|
26
|
+
from curies import vocabulary as _cv
|
|
28
27
|
from more_click import force_option, verbose_option
|
|
29
28
|
from tqdm.auto import tqdm
|
|
30
|
-
from typing_extensions import
|
|
31
|
-
|
|
32
|
-
from .
|
|
33
|
-
from .
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
29
|
+
from typing_extensions import Self
|
|
30
|
+
|
|
31
|
+
from . import vocabulary as v
|
|
32
|
+
from .reference import (
|
|
33
|
+
OBOLiteral,
|
|
34
|
+
Reference,
|
|
35
|
+
Referenced,
|
|
36
|
+
_reference_list_tag,
|
|
37
|
+
comma_separate_references,
|
|
38
|
+
default_reference,
|
|
39
|
+
get_preferred_curie,
|
|
40
|
+
reference_escape,
|
|
41
|
+
reference_or_literal_to_str,
|
|
42
|
+
)
|
|
43
|
+
from .struct_utils import (
|
|
44
|
+
Annotation,
|
|
45
|
+
AnnotationsDict,
|
|
46
|
+
HasReferencesMixin,
|
|
47
|
+
IntersectionOfHint,
|
|
48
|
+
PropertiesHint,
|
|
49
|
+
ReferenceHint,
|
|
50
|
+
RelationsHint,
|
|
51
|
+
Stanza,
|
|
52
|
+
StanzaType,
|
|
53
|
+
UnionOfHint,
|
|
54
|
+
_chain_tag,
|
|
55
|
+
_ensure_ref,
|
|
56
|
+
_get_prefixes_from_annotations,
|
|
57
|
+
_get_references_from_annotations,
|
|
58
|
+
_tag_property_targets,
|
|
48
59
|
)
|
|
49
|
-
from .utils import
|
|
60
|
+
from .utils import _boolean_tag, obo_escape_slim
|
|
50
61
|
from ..api.utils import get_version
|
|
51
62
|
from ..constants import (
|
|
63
|
+
BUILD_SUBDIRECTORY_NAME,
|
|
52
64
|
DATE_FORMAT,
|
|
65
|
+
DEFAULT_PREFIX_MAP,
|
|
53
66
|
NCBITAXON_PREFIX,
|
|
54
67
|
RELATION_ID,
|
|
55
68
|
RELATION_PREFIX,
|
|
56
69
|
TARGET_ID,
|
|
57
70
|
TARGET_PREFIX,
|
|
58
71
|
)
|
|
59
|
-
from ..
|
|
60
|
-
from ..utils.io import multidict, write_iterable_tsv
|
|
61
|
-
from ..utils.
|
|
62
|
-
|
|
72
|
+
from ..utils.cache import write_gzipped_graph
|
|
73
|
+
from ..utils.io import multidict, safe_open, write_iterable_tsv
|
|
74
|
+
from ..utils.path import (
|
|
75
|
+
CacheArtifact,
|
|
76
|
+
get_cache_path,
|
|
77
|
+
get_relation_cache_path,
|
|
78
|
+
prefix_directory_join,
|
|
79
|
+
)
|
|
80
|
+
from ..version import get_version as get_pyobo_version
|
|
63
81
|
|
|
64
82
|
__all__ = [
|
|
83
|
+
"Obo",
|
|
65
84
|
"Synonym",
|
|
66
85
|
"SynonymTypeDef",
|
|
67
|
-
"SynonymSpecificity",
|
|
68
|
-
"SynonymSpecificities",
|
|
69
86
|
"Term",
|
|
70
|
-
"Obo",
|
|
71
|
-
"make_ad_hoc_ontology",
|
|
72
87
|
"abbreviation",
|
|
73
88
|
"acronym",
|
|
89
|
+
"make_ad_hoc_ontology",
|
|
74
90
|
]
|
|
75
91
|
|
|
76
92
|
logger = logging.getLogger(__name__)
|
|
77
93
|
|
|
78
|
-
|
|
79
|
-
|
|
94
|
+
#: This is what happens if no specificity is given
|
|
95
|
+
DEFAULT_SPECIFICITY: _cv.SynonymScope = "RELATED"
|
|
96
|
+
|
|
97
|
+
#: Columns in the SSSOM dataframe
|
|
98
|
+
SSSOM_DF_COLUMNS = [
|
|
99
|
+
"subject_id",
|
|
100
|
+
"subject_label",
|
|
101
|
+
"object_id",
|
|
102
|
+
"predicate_id",
|
|
103
|
+
"mapping_justification",
|
|
104
|
+
"confidence",
|
|
105
|
+
"contributor",
|
|
106
|
+
]
|
|
107
|
+
UNSPECIFIED_MATCHING_CURIE = "sempav:UnspecifiedMatching"
|
|
108
|
+
FORMAT_VERSION = "1.4"
|
|
80
109
|
|
|
81
110
|
|
|
82
111
|
@dataclass
|
|
83
|
-
class Synonym:
|
|
112
|
+
class Synonym(HasReferencesMixin):
|
|
84
113
|
"""A synonym with optional specificity and references."""
|
|
85
114
|
|
|
86
115
|
#: The string representing the synonym
|
|
87
116
|
name: str
|
|
88
117
|
|
|
89
118
|
#: The specificity of the synonym
|
|
90
|
-
specificity:
|
|
119
|
+
specificity: _cv.SynonymScope | None = None
|
|
91
120
|
|
|
92
121
|
#: The type of synonym. Must be defined in OBO document!
|
|
93
|
-
type:
|
|
94
|
-
default_factory=lambda: DEFAULT_SYNONYM_TYPE # type:ignore
|
|
95
|
-
)
|
|
122
|
+
type: Reference | None = None
|
|
96
123
|
|
|
97
124
|
#: References to articles where the synonym appears
|
|
98
|
-
provenance:
|
|
125
|
+
provenance: Sequence[Reference | OBOLiteral] = field(default_factory=list)
|
|
126
|
+
|
|
127
|
+
#: Extra annotations
|
|
128
|
+
annotations: list[Annotation] = field(default_factory=list)
|
|
129
|
+
|
|
130
|
+
#: Language tag for the synonym
|
|
131
|
+
language: str | None = None
|
|
132
|
+
|
|
133
|
+
def __lt__(self, other: Synonym) -> bool:
|
|
134
|
+
"""Sort lexically by name."""
|
|
135
|
+
return self._sort_key() < other._sort_key()
|
|
136
|
+
|
|
137
|
+
def _get_references(self) -> defaultdict[str, set[Reference]]:
|
|
138
|
+
"""Get all prefixes used by the typedef."""
|
|
139
|
+
rv: defaultdict[str, set[Reference]] = defaultdict(set)
|
|
140
|
+
rv[v.has_dbxref.prefix].add(v.has_dbxref)
|
|
141
|
+
if self.type is not None:
|
|
142
|
+
rv[self.type.prefix].add(self.type)
|
|
143
|
+
for provenance in self.provenance:
|
|
144
|
+
match provenance:
|
|
145
|
+
case Reference():
|
|
146
|
+
rv[provenance.prefix].add(provenance)
|
|
147
|
+
case OBOLiteral(_, datatype, _language):
|
|
148
|
+
rv[datatype.prefix].add(v._c(datatype))
|
|
149
|
+
for prefix, references in _get_references_from_annotations(self.annotations).items():
|
|
150
|
+
rv[prefix].update(references)
|
|
151
|
+
return rv
|
|
152
|
+
|
|
153
|
+
def _sort_key(self) -> tuple[str, _cv.SynonymScope, str]:
|
|
154
|
+
return (
|
|
155
|
+
self.name,
|
|
156
|
+
self.specificity or DEFAULT_SPECIFICITY,
|
|
157
|
+
self.type.curie if self.type else "",
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def predicate(self) -> curies.NamedReference:
|
|
162
|
+
"""Get the specificity reference."""
|
|
163
|
+
return _cv.synonym_scopes[self.specificity or DEFAULT_SPECIFICITY]
|
|
99
164
|
|
|
100
|
-
def to_obo(
|
|
165
|
+
def to_obo(
|
|
166
|
+
self,
|
|
167
|
+
ontology_prefix: str,
|
|
168
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
|
|
169
|
+
) -> str:
|
|
101
170
|
"""Write this synonym as an OBO line to appear in a [Term] stanza."""
|
|
102
|
-
return f"synonym: {self._fp()}"
|
|
171
|
+
return f"synonym: {self._fp(ontology_prefix, synonym_typedefs)}"
|
|
103
172
|
|
|
104
|
-
def _fp(
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
173
|
+
def _fp(
|
|
174
|
+
self,
|
|
175
|
+
ontology_prefix: str,
|
|
176
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
|
|
177
|
+
) -> str:
|
|
178
|
+
if synonym_typedefs is None:
|
|
179
|
+
synonym_typedefs = {}
|
|
180
|
+
|
|
181
|
+
x = f'"{self._escape(self.name)}"'
|
|
182
|
+
|
|
183
|
+
# Add on the specificity, e.g., EXACT
|
|
184
|
+
synonym_typedef = _synonym_typedef_warn(ontology_prefix, self.type, synonym_typedefs)
|
|
185
|
+
if synonym_typedef is not None and synonym_typedef.specificity is not None:
|
|
186
|
+
x = f"{x} {synonym_typedef.specificity}"
|
|
187
|
+
elif self.specificity is not None:
|
|
188
|
+
x = f"{x} {self.specificity}"
|
|
189
|
+
elif self.type is not None:
|
|
190
|
+
# it's not valid to have a synonym type without a specificity,
|
|
191
|
+
# so automatically assign one if we'll need it
|
|
192
|
+
x = f"{x} {DEFAULT_SPECIFICITY}"
|
|
193
|
+
|
|
194
|
+
# Add on the synonym type, if exists
|
|
195
|
+
if self.type is not None:
|
|
196
|
+
x = f"{x} {reference_escape(self.type, ontology_prefix=ontology_prefix)}"
|
|
197
|
+
|
|
198
|
+
# the provenance list is required, even if it's empty :/
|
|
199
|
+
x = f"{x} [{comma_separate_references(self.provenance)}]"
|
|
200
|
+
|
|
201
|
+
# OBO flat file format does not support language,
|
|
202
|
+
# but at least we can mention it here as a comment
|
|
203
|
+
if self.language:
|
|
204
|
+
x += f" ! language: {self.language}"
|
|
205
|
+
|
|
206
|
+
return x
|
|
109
207
|
|
|
110
208
|
@staticmethod
|
|
111
209
|
def _escape(s: str) -> str:
|
|
@@ -113,113 +211,100 @@ class Synonym:
|
|
|
113
211
|
|
|
114
212
|
|
|
115
213
|
@dataclass
|
|
116
|
-
class SynonymTypeDef(Referenced):
|
|
214
|
+
class SynonymTypeDef(Referenced, HasReferencesMixin):
|
|
117
215
|
"""A type definition for synonyms in OBO."""
|
|
118
216
|
|
|
119
217
|
reference: Reference
|
|
120
|
-
specificity:
|
|
218
|
+
specificity: _cv.SynonymScope | None = None
|
|
121
219
|
|
|
122
|
-
def
|
|
220
|
+
def __hash__(self) -> int:
|
|
221
|
+
# have to re-define hash because of the @dataclass
|
|
222
|
+
return hash((self.__class__, self.prefix, self.identifier))
|
|
223
|
+
|
|
224
|
+
def to_obo(self, ontology_prefix: str) -> str:
|
|
123
225
|
"""Serialize to OBO."""
|
|
124
|
-
rv = f
|
|
226
|
+
rv = f"synonymtypedef: {reference_escape(self.reference, ontology_prefix=ontology_prefix)}"
|
|
227
|
+
name = self.name or ""
|
|
228
|
+
rv = f'{rv} "{name}"'
|
|
125
229
|
if self.specificity:
|
|
126
230
|
rv = f"{rv} {self.specificity}"
|
|
127
231
|
return rv
|
|
128
232
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
specificity
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
text.replace("-", "_")
|
|
140
|
-
.replace(" ", "_")
|
|
141
|
-
.replace('"', "")
|
|
142
|
-
.replace(")", "")
|
|
143
|
-
.replace("(", "")
|
|
144
|
-
)
|
|
145
|
-
if lower:
|
|
146
|
-
identifier = identifier.lower()
|
|
147
|
-
return cls(
|
|
148
|
-
reference=Reference(prefix="obo", identifier=identifier, name=text.replace('"', "")),
|
|
149
|
-
specificity=specificity,
|
|
150
|
-
)
|
|
233
|
+
def _get_references(self) -> dict[str, set[Reference]]:
|
|
234
|
+
"""Get all references used by the typedef."""
|
|
235
|
+
rv: defaultdict[str, set[Reference]] = defaultdict(set)
|
|
236
|
+
rv[self.reference.prefix].add(self.reference)
|
|
237
|
+
if self.specificity is not None:
|
|
238
|
+
# weird syntax, but this just gets the synonym scope
|
|
239
|
+
# predicate as a pyobo reference
|
|
240
|
+
r = v._c(_cv.synonym_scopes[self.specificity])
|
|
241
|
+
rv[r.prefix].add(r)
|
|
242
|
+
return dict(rv)
|
|
151
243
|
|
|
152
244
|
|
|
153
245
|
DEFAULT_SYNONYM_TYPE = SynonymTypeDef(
|
|
154
|
-
reference=Reference(prefix="oboInOwl", identifier="SynonymType", name="
|
|
246
|
+
reference=Reference(prefix="oboInOwl", identifier="SynonymType", name="synonym type"),
|
|
155
247
|
)
|
|
156
248
|
abbreviation = SynonymTypeDef(
|
|
157
249
|
reference=Reference(prefix="OMO", identifier="0003000", name="abbreviation")
|
|
158
250
|
)
|
|
159
251
|
acronym = SynonymTypeDef(reference=Reference(prefix="omo", identifier="0003012", name="acronym"))
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
if isinstance(reference, Term):
|
|
169
|
-
return reference.reference
|
|
170
|
-
if isinstance(reference, str):
|
|
171
|
-
_rv = Reference.from_curie(reference)
|
|
172
|
-
if _rv is None:
|
|
173
|
-
raise ValueError(f"could not parse CURIE from {reference}")
|
|
174
|
-
return _rv
|
|
175
|
-
if isinstance(reference, tuple):
|
|
176
|
-
return Reference(prefix=reference[0], identifier=reference[1])
|
|
177
|
-
if isinstance(reference, Reference):
|
|
178
|
-
return reference
|
|
179
|
-
raise TypeError(f"invalid type given for a reference ({type(reference)}): {reference}")
|
|
252
|
+
uk_spelling = SynonymTypeDef(
|
|
253
|
+
reference=Reference(prefix="omo", identifier="0003005", name="UK spelling synonym")
|
|
254
|
+
)
|
|
255
|
+
default_synonym_typedefs: dict[ReferenceTuple, SynonymTypeDef] = {
|
|
256
|
+
abbreviation.pair: abbreviation,
|
|
257
|
+
acronym.pair: acronym,
|
|
258
|
+
uk_spelling.pair: uk_spelling,
|
|
259
|
+
}
|
|
180
260
|
|
|
181
261
|
|
|
182
262
|
@dataclass
|
|
183
|
-
class Term(
|
|
263
|
+
class Term(Stanza):
|
|
184
264
|
"""A term in OBO."""
|
|
185
265
|
|
|
186
266
|
#: The primary reference for the entity
|
|
187
267
|
reference: Reference
|
|
188
268
|
|
|
189
269
|
#: A description of the entity
|
|
190
|
-
definition:
|
|
270
|
+
definition: str | None = None
|
|
191
271
|
|
|
192
|
-
#:
|
|
193
|
-
|
|
272
|
+
#: Object properties
|
|
273
|
+
relationships: RelationsHint = field(default_factory=lambda: defaultdict(list))
|
|
194
274
|
|
|
195
|
-
|
|
196
|
-
relationships: dict[TypeDef, list[Reference]] = field(default_factory=lambda: defaultdict(list))
|
|
275
|
+
_axioms: AnnotationsDict = field(default_factory=lambda: defaultdict(list))
|
|
197
276
|
|
|
198
|
-
|
|
199
|
-
properties: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))
|
|
277
|
+
properties: PropertiesHint = field(default_factory=lambda: defaultdict(list))
|
|
200
278
|
|
|
201
279
|
#: Relationships with the default "is_a"
|
|
202
280
|
parents: list[Reference] = field(default_factory=list)
|
|
203
281
|
|
|
282
|
+
intersection_of: IntersectionOfHint = field(default_factory=list)
|
|
283
|
+
union_of: UnionOfHint = field(default_factory=list)
|
|
284
|
+
equivalent_to: list[Reference] = field(default_factory=list)
|
|
285
|
+
disjoint_from: list[Reference] = field(default_factory=list)
|
|
286
|
+
|
|
204
287
|
#: Synonyms of this term
|
|
205
288
|
synonyms: list[Synonym] = field(default_factory=list)
|
|
206
289
|
|
|
207
|
-
#:
|
|
290
|
+
#: Database cross-references, see :func:`get_mappings` for
|
|
291
|
+
#: access to all mappings in an SSSOM-like interface
|
|
208
292
|
xrefs: list[Reference] = field(default_factory=list)
|
|
209
|
-
xref_types: list[Reference] = field(default_factory=list)
|
|
210
|
-
|
|
211
|
-
#: Alternate Identifiers
|
|
212
|
-
alt_ids: list[Reference] = field(default_factory=list)
|
|
213
293
|
|
|
214
294
|
#: The sub-namespace within the ontology
|
|
215
|
-
namespace:
|
|
295
|
+
namespace: str | None = None
|
|
216
296
|
|
|
217
297
|
#: An annotation for obsolescence. By default, is None, but this means that it is not obsolete.
|
|
218
|
-
is_obsolete:
|
|
298
|
+
is_obsolete: bool | None = None
|
|
299
|
+
|
|
300
|
+
type: StanzaType = "Term"
|
|
219
301
|
|
|
220
|
-
|
|
302
|
+
builtin: bool | None = None
|
|
303
|
+
is_anonymous: bool | None = None
|
|
304
|
+
subsets: list[Reference] = field(default_factory=list)
|
|
221
305
|
|
|
222
|
-
def __hash__(self):
|
|
306
|
+
def __hash__(self) -> int:
|
|
307
|
+
# have to re-define hash because of the @dataclass
|
|
223
308
|
return hash((self.__class__, self.prefix, self.identifier))
|
|
224
309
|
|
|
225
310
|
@classmethod
|
|
@@ -227,10 +312,10 @@ class Term(Referenced):
|
|
|
227
312
|
cls,
|
|
228
313
|
prefix: str,
|
|
229
314
|
identifier: str,
|
|
230
|
-
name:
|
|
231
|
-
definition:
|
|
315
|
+
name: str | None = None,
|
|
316
|
+
definition: str | None = None,
|
|
232
317
|
**kwargs,
|
|
233
|
-
) ->
|
|
318
|
+
) -> Term:
|
|
234
319
|
"""Create a term from a reference."""
|
|
235
320
|
return cls(
|
|
236
321
|
reference=Reference(prefix=prefix, identifier=identifier, name=name),
|
|
@@ -239,245 +324,198 @@ class Term(Referenced):
|
|
|
239
324
|
)
|
|
240
325
|
|
|
241
326
|
@classmethod
|
|
242
|
-
def
|
|
243
|
-
|
|
244
|
-
prefix
|
|
245
|
-
identifier: str,
|
|
246
|
-
) -> "Term":
|
|
247
|
-
"""Create a term from a reference."""
|
|
248
|
-
from ..api import get_definition
|
|
249
|
-
|
|
250
|
-
return cls(
|
|
251
|
-
reference=Reference.auto(prefix=prefix, identifier=identifier),
|
|
252
|
-
definition=get_definition(prefix, identifier),
|
|
253
|
-
)
|
|
254
|
-
|
|
255
|
-
@classmethod
|
|
256
|
-
def from_curie(cls, curie: str, name: Optional[str] = None) -> "Term":
|
|
257
|
-
"""Create a term directly from a CURIE and optional name."""
|
|
258
|
-
prefix, identifier = normalize_curie(curie)
|
|
259
|
-
if prefix is None or identifier is None:
|
|
260
|
-
raise ValueError
|
|
261
|
-
return cls.from_triple(prefix=prefix, identifier=identifier, name=name)
|
|
262
|
-
|
|
263
|
-
def append_provenance(self, reference: ReferenceHint) -> None:
|
|
264
|
-
"""Add a provenance reference."""
|
|
265
|
-
self.provenance.append(_ensure_ref(reference))
|
|
266
|
-
|
|
267
|
-
def append_synonym(
|
|
268
|
-
self,
|
|
269
|
-
synonym: Union[str, Synonym],
|
|
270
|
-
*,
|
|
271
|
-
type: Optional[SynonymTypeDef] = None,
|
|
272
|
-
specificity: Optional[SynonymSpecificity] = None,
|
|
273
|
-
) -> None:
|
|
274
|
-
"""Add a synonym."""
|
|
275
|
-
if isinstance(synonym, str):
|
|
276
|
-
synonym = Synonym(
|
|
277
|
-
synonym, type=type or DEFAULT_SYNONYM_TYPE, specificity=specificity or "EXACT"
|
|
278
|
-
)
|
|
279
|
-
self.synonyms.append(synonym)
|
|
280
|
-
|
|
281
|
-
def append_alt(self, alt: Union[str, Reference]) -> None:
|
|
282
|
-
"""Add an alternative identifier."""
|
|
283
|
-
if isinstance(alt, str):
|
|
284
|
-
alt = Reference(prefix=self.prefix, identifier=alt)
|
|
285
|
-
self.alt_ids.append(alt)
|
|
327
|
+
def default(cls, prefix, identifier, name=None) -> Self:
|
|
328
|
+
"""Create a default term."""
|
|
329
|
+
return cls(reference=default_reference(prefix=prefix, identifier=identifier, name=name))
|
|
286
330
|
|
|
287
|
-
def
|
|
288
|
-
"""Add a see also
|
|
289
|
-
self.
|
|
290
|
-
return self
|
|
291
|
-
|
|
292
|
-
def append_comment(self, value: str) -> "Term":
|
|
293
|
-
"""Add a comment relationship."""
|
|
294
|
-
self.append_property(comment.curie, value)
|
|
295
|
-
return self
|
|
296
|
-
|
|
297
|
-
def append_replaced_by(self, reference: ReferenceHint) -> "Term":
|
|
298
|
-
"""Add a replaced by relationship."""
|
|
299
|
-
self.append_relationship(term_replaced_by, reference)
|
|
300
|
-
return self
|
|
301
|
-
|
|
302
|
-
def append_parent(self, reference: ReferenceHint) -> "Term":
|
|
303
|
-
"""Add a parent to this entity."""
|
|
304
|
-
reference = _ensure_ref(reference)
|
|
305
|
-
if reference not in self.parents:
|
|
306
|
-
self.parents.append(reference)
|
|
307
|
-
return self
|
|
331
|
+
def append_see_also_uri(self, uri: str) -> Self:
|
|
332
|
+
"""Add a see also property."""
|
|
333
|
+
return self.annotate_uri(v.see_also, uri)
|
|
308
334
|
|
|
309
335
|
def extend_parents(self, references: Collection[Reference]) -> None:
|
|
310
336
|
"""Add a collection of parents to this entity."""
|
|
337
|
+
warnings.warn("use append_parent", DeprecationWarning, stacklevel=2)
|
|
311
338
|
if any(x is None for x in references):
|
|
312
339
|
raise ValueError("can not append a collection of parents containing a null parent")
|
|
313
340
|
self.parents.extend(references)
|
|
314
341
|
|
|
315
|
-
def
|
|
342
|
+
def get_property_literals(self, prop: ReferenceHint) -> list[str]:
|
|
316
343
|
"""Get properties from the given key."""
|
|
317
|
-
return self.properties[
|
|
344
|
+
return [reference_or_literal_to_str(t) for t in self.properties.get(_ensure_ref(prop), [])]
|
|
318
345
|
|
|
319
|
-
def get_property(self, prop) ->
|
|
346
|
+
def get_property(self, prop: ReferenceHint) -> str | None:
|
|
320
347
|
"""Get a single property of the given key."""
|
|
321
|
-
r = self.
|
|
322
|
-
if not r:
|
|
323
|
-
return None
|
|
324
|
-
if len(r) != 1:
|
|
325
|
-
raise ValueError
|
|
326
|
-
return r[0]
|
|
327
|
-
|
|
328
|
-
def get_relationship(self, typedef: TypeDef) -> Optional[Reference]:
|
|
329
|
-
"""Get a single relationship of the given type."""
|
|
330
|
-
r = self.get_relationships(typedef)
|
|
348
|
+
r = self.get_property_literals(prop)
|
|
331
349
|
if not r:
|
|
332
350
|
return None
|
|
333
351
|
if len(r) != 1:
|
|
334
352
|
raise ValueError
|
|
335
353
|
return r[0]
|
|
336
354
|
|
|
337
|
-
def
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
355
|
+
def append_exact_match(
|
|
356
|
+
self,
|
|
357
|
+
reference: ReferenceHint,
|
|
358
|
+
*,
|
|
359
|
+
mapping_justification: Reference | None = None,
|
|
360
|
+
confidence: float | None = None,
|
|
361
|
+
contributor: Reference | None = None,
|
|
362
|
+
) -> Self:
|
|
342
363
|
"""Append an exact match, also adding an xref."""
|
|
343
364
|
reference = _ensure_ref(reference)
|
|
344
|
-
self.
|
|
345
|
-
|
|
365
|
+
axioms = self._prepare_mapping_annotations(
|
|
366
|
+
mapping_justification=mapping_justification,
|
|
367
|
+
confidence=confidence,
|
|
368
|
+
contributor=contributor,
|
|
369
|
+
)
|
|
370
|
+
self.annotate_object(v.exact_match, reference, annotations=axioms)
|
|
346
371
|
return self
|
|
347
372
|
|
|
348
|
-
def
|
|
349
|
-
"""Append an xref."""
|
|
350
|
-
self.xrefs.append(_ensure_ref(reference))
|
|
351
|
-
|
|
352
|
-
def append_relationship(self, typedef: TypeDef, reference: ReferenceHint) -> None:
|
|
353
|
-
"""Append a relationship."""
|
|
354
|
-
self.relationships[typedef].append(_ensure_ref(reference))
|
|
355
|
-
|
|
356
|
-
def set_species(self, identifier: str, name: Optional[str] = None):
|
|
373
|
+
def set_species(self, identifier: str, name: str | None = None) -> Self:
|
|
357
374
|
"""Append the from_species relation."""
|
|
358
375
|
if name is None:
|
|
359
376
|
from pyobo.resources.ncbitaxon import get_ncbitaxon_name
|
|
360
377
|
|
|
361
378
|
name = get_ncbitaxon_name(identifier)
|
|
362
|
-
self.append_relationship(
|
|
363
|
-
from_species, Reference(prefix=NCBITAXON_PREFIX, identifier=identifier, name=name)
|
|
379
|
+
return self.append_relationship(
|
|
380
|
+
v.from_species, Reference(prefix=NCBITAXON_PREFIX, identifier=identifier, name=name)
|
|
364
381
|
)
|
|
365
382
|
|
|
366
|
-
def get_species(self, prefix: str = NCBITAXON_PREFIX) ->
|
|
383
|
+
def get_species(self, prefix: str = NCBITAXON_PREFIX) -> Reference | None:
|
|
367
384
|
"""Get the species if it exists.
|
|
368
385
|
|
|
369
386
|
:param prefix: The prefix to use in case the term has several species annotations.
|
|
370
387
|
"""
|
|
371
|
-
for species in self.
|
|
388
|
+
for species in self.get_relationships(v.from_species):
|
|
372
389
|
if species.prefix == prefix:
|
|
373
390
|
return species
|
|
374
391
|
return None
|
|
375
392
|
|
|
376
|
-
def extend_relationship(self, typedef:
|
|
393
|
+
def extend_relationship(self, typedef: ReferenceHint, references: Iterable[Reference]) -> None:
|
|
377
394
|
"""Append several relationships."""
|
|
395
|
+
warnings.warn("use append_relationship", DeprecationWarning, stacklevel=2)
|
|
378
396
|
if any(x is None for x in references):
|
|
379
397
|
raise ValueError("can not extend a collection that includes a null reference")
|
|
398
|
+
typedef = _ensure_ref(typedef)
|
|
380
399
|
self.relationships[typedef].extend(references)
|
|
381
400
|
|
|
382
|
-
def
|
|
383
|
-
self,
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
def _definition_fp(self) -> str:
|
|
393
|
-
if self.definition is None:
|
|
394
|
-
raise AssertionError
|
|
395
|
-
return f'"{obo_escape_slim(self.definition)}" [{comma_separate(self.provenance)}]'
|
|
396
|
-
|
|
397
|
-
def iterate_relations(self) -> Iterable[tuple[TypeDef, Reference]]:
|
|
398
|
-
"""Iterate over pairs of typedefs and targets."""
|
|
399
|
-
for typedef, targets in sorted(self.relationships.items(), key=_sort_relations):
|
|
400
|
-
for target in sorted(targets, key=lambda ref: ref.preferred_curie):
|
|
401
|
-
yield typedef, target
|
|
402
|
-
|
|
403
|
-
def iterate_properties(self) -> Iterable[tuple[str, str]]:
|
|
404
|
-
"""Iterate over pairs of property and values."""
|
|
405
|
-
for prop, values in sorted(self.properties.items()):
|
|
406
|
-
for value in sorted(values):
|
|
407
|
-
yield prop, value
|
|
408
|
-
|
|
409
|
-
def iterate_obo_lines(self, *, ontology, typedefs) -> Iterable[str]:
|
|
401
|
+
def iterate_obo_lines(
|
|
402
|
+
self,
|
|
403
|
+
*,
|
|
404
|
+
ontology_prefix: str,
|
|
405
|
+
typedefs: Mapping[ReferenceTuple, TypeDef],
|
|
406
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
|
|
407
|
+
emit_object_properties: bool = True,
|
|
408
|
+
emit_annotation_properties: bool = True,
|
|
409
|
+
) -> Iterable[str]:
|
|
410
410
|
"""Iterate over the lines to write in an OBO file."""
|
|
411
411
|
yield f"\n[{self.type}]"
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
412
|
+
# 1
|
|
413
|
+
yield f"id: {self._reference(self.reference, ontology_prefix)}"
|
|
414
|
+
# 2
|
|
415
|
+
yield from _boolean_tag("is_anonymous", self.is_anonymous)
|
|
416
|
+
# 3
|
|
415
417
|
if self.name:
|
|
416
418
|
yield f"name: {obo_escape_slim(self.name)}"
|
|
419
|
+
# 4
|
|
417
420
|
if self.namespace and self.namespace != "?":
|
|
418
421
|
namespace_normalized = (
|
|
419
422
|
self.namespace.replace(" ", "_").replace("-", "_").replace("(", "").replace(")", "")
|
|
420
423
|
)
|
|
421
424
|
yield f"namespace: {namespace_normalized}"
|
|
422
|
-
|
|
425
|
+
# 5
|
|
426
|
+
for alt in sorted(self.alt_ids):
|
|
427
|
+
yield f"alt_id: {self._reference(alt, ontology_prefix, add_name_comment=True)}"
|
|
428
|
+
# 6
|
|
423
429
|
if self.definition:
|
|
424
430
|
yield f"def: {self._definition_fp()}"
|
|
425
|
-
|
|
426
|
-
for
|
|
427
|
-
|
|
428
|
-
|
|
431
|
+
# 7
|
|
432
|
+
for x in self.get_property_values(v.comment):
|
|
433
|
+
if isinstance(x, OBOLiteral):
|
|
434
|
+
yield f'comment: "{x.value}"'
|
|
435
|
+
# 8
|
|
436
|
+
yield from _reference_list_tag("subset", self.subsets, ontology_prefix)
|
|
437
|
+
# 9
|
|
438
|
+
for synonym in sorted(self.synonyms):
|
|
439
|
+
yield synonym.to_obo(ontology_prefix=ontology_prefix, synonym_typedefs=synonym_typedefs)
|
|
440
|
+
# 10
|
|
441
|
+
yield from self._iterate_xref_obo(ontology_prefix=ontology_prefix)
|
|
442
|
+
# 11
|
|
443
|
+
yield from _boolean_tag("builtin", self.builtin)
|
|
444
|
+
# 12
|
|
445
|
+
if emit_annotation_properties:
|
|
446
|
+
yield from self._iterate_obo_properties(
|
|
447
|
+
ontology_prefix=ontology_prefix,
|
|
448
|
+
skip_predicate_objects=v.SKIP_PROPERTY_PREDICATES_OBJECTS,
|
|
449
|
+
skip_predicate_literals=v.SKIP_PROPERTY_PREDICATES_LITERAL,
|
|
450
|
+
typedefs=typedefs,
|
|
451
|
+
)
|
|
452
|
+
# 13
|
|
429
453
|
parent_tag = "is_a" if self.type == "Term" else "instance_of"
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
454
|
+
yield from _reference_list_tag(parent_tag, self.parents, ontology_prefix)
|
|
455
|
+
# 14
|
|
456
|
+
yield from self._iterate_intersection_of_obo(ontology_prefix=ontology_prefix)
|
|
457
|
+
# 15
|
|
458
|
+
yield from _reference_list_tag("union_of", self.union_of, ontology_prefix=ontology_prefix)
|
|
459
|
+
# 16
|
|
460
|
+
yield from _reference_list_tag(
|
|
461
|
+
"equivalent_to", self.equivalent_to, ontology_prefix=ontology_prefix
|
|
462
|
+
)
|
|
463
|
+
# 17
|
|
464
|
+
yield from _reference_list_tag(
|
|
465
|
+
"disjoint_from", self.disjoint_from, ontology_prefix=ontology_prefix
|
|
466
|
+
)
|
|
467
|
+
# 18
|
|
468
|
+
if emit_object_properties:
|
|
469
|
+
yield from self._iterate_obo_relations(
|
|
470
|
+
ontology_prefix=ontology_prefix, typedefs=typedefs
|
|
471
|
+
)
|
|
472
|
+
# 19 TODO created_by
|
|
473
|
+
# 20
|
|
474
|
+
for x in self.get_property_values(v.obo_creation_date):
|
|
475
|
+
if isinstance(x, OBOLiteral):
|
|
476
|
+
yield f"creation_date: {x.value}"
|
|
477
|
+
# 21
|
|
478
|
+
yield from _boolean_tag("is_obsolete", self.is_obsolete)
|
|
479
|
+
# 22
|
|
480
|
+
yield from _tag_property_targets(
|
|
481
|
+
"replaced_by", self, v.term_replaced_by, ontology_prefix=ontology_prefix
|
|
482
|
+
)
|
|
483
|
+
# 23
|
|
484
|
+
yield from _tag_property_targets(
|
|
485
|
+
"consider", self, v.see_also, ontology_prefix=ontology_prefix
|
|
486
|
+
)
|
|
462
487
|
|
|
463
488
|
|
|
464
489
|
#: A set of warnings, used to make sure we don't show the same one over and over
|
|
465
|
-
|
|
466
|
-
|
|
490
|
+
_SYNONYM_TYPEDEF_WARNINGS: set[tuple[str, Reference]] = set()
|
|
467
491
|
|
|
468
|
-
def _sort_relations(r):
|
|
469
|
-
typedef, _references = r
|
|
470
|
-
return typedef.preferred_curie
|
|
471
492
|
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
return
|
|
479
|
-
|
|
480
|
-
|
|
493
|
+
def _synonym_typedef_warn(
|
|
494
|
+
prefix: str,
|
|
495
|
+
predicate: Reference | None,
|
|
496
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
|
|
497
|
+
) -> SynonymTypeDef | None:
|
|
498
|
+
if predicate is None or predicate.pair == DEFAULT_SYNONYM_TYPE.pair:
|
|
499
|
+
return None
|
|
500
|
+
if predicate.pair in default_synonym_typedefs:
|
|
501
|
+
return default_synonym_typedefs[predicate.pair]
|
|
502
|
+
if predicate.pair in synonym_typedefs:
|
|
503
|
+
return synonym_typedefs[predicate.pair]
|
|
504
|
+
key = prefix, predicate
|
|
505
|
+
if key not in _SYNONYM_TYPEDEF_WARNINGS:
|
|
506
|
+
_SYNONYM_TYPEDEF_WARNINGS.add(key)
|
|
507
|
+
predicate_preferred_curie = get_preferred_curie(predicate)
|
|
508
|
+
if predicate.prefix == "obo":
|
|
509
|
+
# Throw our hands up in the air. By using `obo` as the prefix,
|
|
510
|
+
# we already threw using "real" definitions out the window
|
|
511
|
+
logger.warning(
|
|
512
|
+
f"[{prefix}] synonym typedef with OBO prefix not defined: {predicate_preferred_curie}."
|
|
513
|
+
f"\n\tThis might be because you used an unqualified prefix in an OBO file, "
|
|
514
|
+
f"which automatically gets an OBO prefix."
|
|
515
|
+
)
|
|
516
|
+
else:
|
|
517
|
+
logger.warning(f"[{prefix}] synonym typedef not defined: {predicate_preferred_curie}")
|
|
518
|
+
return None
|
|
481
519
|
|
|
482
520
|
|
|
483
521
|
class BioregistryError(ValueError):
|
|
@@ -495,6 +533,9 @@ class BioregistryError(ValueError):
|
|
|
495
533
|
)
|
|
496
534
|
|
|
497
535
|
|
|
536
|
+
LOGGED_MISSING_URI: set[tuple[str, str]] = set()
|
|
537
|
+
|
|
538
|
+
|
|
498
539
|
@dataclass
|
|
499
540
|
class Obo:
|
|
500
541
|
"""An OBO document."""
|
|
@@ -506,22 +547,19 @@ class Obo:
|
|
|
506
547
|
check_bioregistry_prefix: ClassVar[bool] = True
|
|
507
548
|
|
|
508
549
|
#: The name of the ontology. If not given, tries looking up with the Bioregistry.
|
|
509
|
-
name: ClassVar[
|
|
510
|
-
|
|
511
|
-
#: The OBO format
|
|
512
|
-
format_version: ClassVar[str] = "1.2"
|
|
550
|
+
name: ClassVar[str | None] = None
|
|
513
551
|
|
|
514
552
|
#: Type definitions
|
|
515
|
-
typedefs: ClassVar[
|
|
553
|
+
typedefs: ClassVar[list[TypeDef] | None] = None
|
|
516
554
|
|
|
517
555
|
#: Synonym type definitions
|
|
518
|
-
synonym_typedefs: ClassVar[
|
|
556
|
+
synonym_typedefs: ClassVar[list[SynonymTypeDef] | None] = None
|
|
519
557
|
|
|
520
558
|
#: An annotation about how an ontology was generated
|
|
521
|
-
auto_generated_by: ClassVar[
|
|
559
|
+
auto_generated_by: ClassVar[str | None] = None
|
|
522
560
|
|
|
523
561
|
#: The idspaces used in the document
|
|
524
|
-
idspaces: ClassVar[
|
|
562
|
+
idspaces: ClassVar[Mapping[str, str] | None] = None
|
|
525
563
|
|
|
526
564
|
#: For super-sized datasets that shouldn't be read into memory
|
|
527
565
|
iter_only: ClassVar[bool] = False
|
|
@@ -530,28 +568,32 @@ class Obo:
|
|
|
530
568
|
dynamic_version: ClassVar[bool] = False
|
|
531
569
|
|
|
532
570
|
#: Set to a static version for the resource (i.e., the resource is not itself versioned)
|
|
533
|
-
static_version: ClassVar[
|
|
571
|
+
static_version: ClassVar[str | None] = None
|
|
534
572
|
|
|
535
|
-
bioversions_key: ClassVar[
|
|
573
|
+
bioversions_key: ClassVar[str | None] = None
|
|
536
574
|
|
|
537
575
|
#: Root terms to use for the ontology
|
|
538
|
-
root_terms: ClassVar[
|
|
576
|
+
root_terms: ClassVar[list[Reference] | None] = None
|
|
539
577
|
|
|
540
578
|
#: The date the ontology was generated
|
|
541
|
-
date:
|
|
579
|
+
date: datetime.datetime | None = field(default_factory=datetime.datetime.today)
|
|
542
580
|
|
|
543
581
|
#: The ontology version
|
|
544
|
-
data_version:
|
|
582
|
+
data_version: str | None = None
|
|
545
583
|
|
|
546
584
|
#: Should this ontology be reloaded?
|
|
547
585
|
force: bool = False
|
|
548
586
|
|
|
549
587
|
#: The hierarchy of terms
|
|
550
|
-
_hierarchy:
|
|
588
|
+
_hierarchy: nx.DiGraph | None = field(init=False, default=None, repr=False)
|
|
551
589
|
#: A cache of terms
|
|
552
|
-
_items:
|
|
590
|
+
_items: list[Term] | None = field(init=False, default=None, repr=False)
|
|
591
|
+
|
|
592
|
+
subsetdefs: ClassVar[list[tuple[Reference, str]] | None] = None
|
|
553
593
|
|
|
554
|
-
|
|
594
|
+
property_values: ClassVar[list[Annotation] | None] = None
|
|
595
|
+
|
|
596
|
+
imports: ClassVar[list[str] | None] = None
|
|
555
597
|
|
|
556
598
|
def __post_init__(self):
|
|
557
599
|
"""Run post-init checks."""
|
|
@@ -576,9 +618,85 @@ class Obo:
|
|
|
576
618
|
elif "/" in self.data_version:
|
|
577
619
|
raise ValueError(f"{self.ontology} has a slash in version: {self.data_version}")
|
|
578
620
|
if self.auto_generated_by is None:
|
|
579
|
-
self.auto_generated_by = f"
|
|
621
|
+
self.auto_generated_by = f"PyOBO v{get_pyobo_version(with_git_hash=True)} on {datetime.datetime.now().isoformat()}" # type:ignore
|
|
622
|
+
|
|
623
|
+
def _get_clean_idspaces(self) -> dict[str, str]:
|
|
624
|
+
"""Get normalized idspace dictionary."""
|
|
625
|
+
rv = dict(
|
|
626
|
+
ChainMap(
|
|
627
|
+
# Add reasonable defaults, most of which are
|
|
628
|
+
# mandated by the OWL spec anyway (except skos?)
|
|
629
|
+
DEFAULT_PREFIX_MAP,
|
|
630
|
+
dict(self.idspaces or {}),
|
|
631
|
+
# automatically detect all prefixes in reference in the ontology,
|
|
632
|
+
# then look up Bioregistry-approved URI prefixes
|
|
633
|
+
self._infer_prefix_map(),
|
|
634
|
+
)
|
|
635
|
+
)
|
|
636
|
+
return rv
|
|
580
637
|
|
|
581
|
-
def
|
|
638
|
+
def _infer_prefix_map(self) -> dict[str, str]:
|
|
639
|
+
"""Get a prefix map including all prefixes used in the ontology."""
|
|
640
|
+
rv = {}
|
|
641
|
+
for prefix in sorted(self._get_prefixes(), key=str.casefold):
|
|
642
|
+
resource = bioregistry.get_resource(prefix)
|
|
643
|
+
if resource is None:
|
|
644
|
+
raise ValueError
|
|
645
|
+
uri_prefix = resource.get_rdf_uri_prefix()
|
|
646
|
+
if uri_prefix is None:
|
|
647
|
+
uri_prefix = resource.get_uri_prefix()
|
|
648
|
+
if uri_prefix is None:
|
|
649
|
+
# This allows us an escape hatch, since some
|
|
650
|
+
# prefixes don't have an associated URI prefix
|
|
651
|
+
uri_prefix = f"https://bioregistry.io/{prefix}:"
|
|
652
|
+
if (self.ontology, prefix) not in LOGGED_MISSING_URI:
|
|
653
|
+
LOGGED_MISSING_URI.add((self.ontology, prefix))
|
|
654
|
+
logger.warning(
|
|
655
|
+
"[%s] uses prefix with no URI format: %s. Auto-generating Bioregistry link: %s",
|
|
656
|
+
self.ontology,
|
|
657
|
+
prefix,
|
|
658
|
+
uri_prefix,
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
pp = bioregistry.get_preferred_prefix(prefix) or str(prefix)
|
|
662
|
+
rv[pp] = uri_prefix
|
|
663
|
+
return rv
|
|
664
|
+
|
|
665
|
+
def _get_prefixes(self) -> set[str]:
|
|
666
|
+
"""Get all prefixes used by the ontology."""
|
|
667
|
+
prefixes: set[str] = set(DEFAULT_PREFIX_MAP)
|
|
668
|
+
for stanza in self._iter_stanzas():
|
|
669
|
+
prefixes.update(stanza._get_prefixes())
|
|
670
|
+
for synonym_typedef in self.synonym_typedefs or []:
|
|
671
|
+
prefixes.update(synonym_typedef._get_prefixes())
|
|
672
|
+
prefixes.update(subset.prefix for subset, _ in self.subsetdefs or [])
|
|
673
|
+
# _iterate_property_pairs covers metadata, root terms,
|
|
674
|
+
# and properties in self.property_values
|
|
675
|
+
prefixes.update(_get_prefixes_from_annotations(self._iterate_property_pairs()))
|
|
676
|
+
if self.auto_generated_by:
|
|
677
|
+
prefixes.add("oboInOwl")
|
|
678
|
+
return prefixes
|
|
679
|
+
|
|
680
|
+
def _get_references(self) -> dict[str, set[Reference]]:
|
|
681
|
+
"""Get all references used by the ontology."""
|
|
682
|
+
rv: defaultdict[str, set[Reference]] = defaultdict(set)
|
|
683
|
+
|
|
684
|
+
for rr in itt.chain(self, self.typedefs or [], self.synonym_typedefs or []):
|
|
685
|
+
for prefix, references in rr._get_references().items():
|
|
686
|
+
rv[prefix].update(references)
|
|
687
|
+
for subset, _ in self.subsetdefs or []:
|
|
688
|
+
rv[subset.prefix].add(subset)
|
|
689
|
+
# _iterate_property_pairs covers metadata, root terms,
|
|
690
|
+
# and properties in self.property_values
|
|
691
|
+
for prefix, references in _get_references_from_annotations(
|
|
692
|
+
self._iterate_property_pairs()
|
|
693
|
+
).items():
|
|
694
|
+
rv[prefix].update(references)
|
|
695
|
+
if self.auto_generated_by:
|
|
696
|
+
rv[v.obo_autogenerated_by.prefix].add(v.obo_autogenerated_by)
|
|
697
|
+
return dict(rv)
|
|
698
|
+
|
|
699
|
+
def _get_version(self) -> str | None:
|
|
582
700
|
if self.bioversions_key:
|
|
583
701
|
try:
|
|
584
702
|
return get_version(self.bioversions_key)
|
|
@@ -594,6 +712,13 @@ class Obo:
|
|
|
594
712
|
raise ValueError(f"There is no version available for {self.ontology}")
|
|
595
713
|
return self.data_version
|
|
596
714
|
|
|
715
|
+
@property
|
|
716
|
+
def _prefix_version(self) -> str:
|
|
717
|
+
"""Get the prefix and version (for logging)."""
|
|
718
|
+
if self.data_version:
|
|
719
|
+
return f"{self.ontology} {self.data_version}"
|
|
720
|
+
return self.ontology
|
|
721
|
+
|
|
597
722
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
598
723
|
"""Iterate over terms in this ontology."""
|
|
599
724
|
raise NotImplementedError
|
|
@@ -604,273 +729,481 @@ class Obo:
|
|
|
604
729
|
|
|
605
730
|
return graph_from_obo(self)
|
|
606
731
|
|
|
607
|
-
def write_obograph(self, path: Path) -> None:
|
|
732
|
+
def write_obograph(self, path: str | Path) -> None:
|
|
608
733
|
"""Write OBO Graph json."""
|
|
609
734
|
graph = self.get_graph()
|
|
610
|
-
path
|
|
735
|
+
with safe_open(path, read=False) as file:
|
|
736
|
+
file.write(graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
|
|
611
737
|
|
|
612
738
|
@classmethod
|
|
613
|
-
def cli(cls) ->
|
|
739
|
+
def cli(cls, *args, default_rewrite: bool = False) -> Any:
|
|
614
740
|
"""Run the CLI for this class."""
|
|
615
|
-
cli = cls.get_cls_cli()
|
|
616
|
-
cli()
|
|
741
|
+
cli = cls.get_cls_cli(default_rewrite=default_rewrite)
|
|
742
|
+
return cli(*args)
|
|
617
743
|
|
|
618
744
|
@classmethod
|
|
619
|
-
def get_cls_cli(cls) -> click.Command:
|
|
745
|
+
def get_cls_cli(cls, *, default_rewrite: bool = False) -> click.Command:
|
|
620
746
|
"""Get the CLI for this class."""
|
|
621
747
|
|
|
622
748
|
@click.command()
|
|
623
749
|
@verbose_option
|
|
624
750
|
@force_option
|
|
625
|
-
@click.option(
|
|
751
|
+
@click.option(
|
|
752
|
+
"--rewrite/--no-rewrite",
|
|
753
|
+
"-r",
|
|
754
|
+
default=False,
|
|
755
|
+
is_flag=True,
|
|
756
|
+
help="Re-process the data, but don't download it again.",
|
|
757
|
+
)
|
|
626
758
|
@click.option("--owl", is_flag=True, help="Write OWL via ROBOT")
|
|
627
|
-
@click.option("--
|
|
759
|
+
@click.option("--ofn", is_flag=True, help="Write Functional OWL (OFN)")
|
|
760
|
+
@click.option("--ttl", is_flag=True, help="Write turtle RDF via OFN")
|
|
628
761
|
@click.option(
|
|
629
762
|
"--version", help="Specify data version to get. Use this if bioversions is acting up."
|
|
630
763
|
)
|
|
631
|
-
def _main(force: bool, owl: bool,
|
|
764
|
+
def _main(force: bool, owl: bool, ofn: bool, ttl: bool, version: str | None, rewrite: bool):
|
|
765
|
+
rewrite = True
|
|
632
766
|
try:
|
|
633
767
|
inst = cls(force=force, data_version=version)
|
|
634
768
|
except Exception as e:
|
|
635
769
|
click.secho(f"[{cls.ontology}] Got an exception during instantiation - {type(e)}")
|
|
636
770
|
sys.exit(1)
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
except Exception as e:
|
|
648
|
-
click.secho(f"[{cls.ontology}] Got an exception during OBO writing {type(e)}")
|
|
649
|
-
sys.exit(1)
|
|
771
|
+
inst.write_default(
|
|
772
|
+
write_obograph=False,
|
|
773
|
+
write_obo=False,
|
|
774
|
+
write_owl=owl,
|
|
775
|
+
write_ofn=ofn,
|
|
776
|
+
write_ttl=ttl,
|
|
777
|
+
write_nodes=True,
|
|
778
|
+
force=force or rewrite,
|
|
779
|
+
use_tqdm=True,
|
|
780
|
+
)
|
|
650
781
|
|
|
651
782
|
return _main
|
|
652
783
|
|
|
653
784
|
@property
|
|
654
785
|
def date_formatted(self) -> str:
|
|
655
786
|
"""Get the date as a formatted string."""
|
|
656
|
-
return (self.date if self.date else datetime.now()).strftime(DATE_FORMAT)
|
|
787
|
+
return (self.date if self.date else datetime.datetime.now()).strftime(DATE_FORMAT)
|
|
788
|
+
|
|
789
|
+
def _iter_terms_safe(self) -> Iterator[Term]:
|
|
790
|
+
if self.iter_only:
|
|
791
|
+
return iter(self.iter_terms(force=self.force))
|
|
792
|
+
return iter(self._items_accessor)
|
|
657
793
|
|
|
658
794
|
def _iter_terms(self, use_tqdm: bool = False, desc: str = "terms") -> Iterable[Term]:
|
|
795
|
+
yv = self._iter_terms_safe()
|
|
659
796
|
if use_tqdm:
|
|
660
|
-
total:
|
|
797
|
+
total: int | None
|
|
661
798
|
try:
|
|
662
799
|
total = len(self._items_accessor)
|
|
663
800
|
except TypeError:
|
|
664
801
|
total = None
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
yield from self
|
|
668
|
-
|
|
669
|
-
def iterate_obo_lines(self) -> Iterable[str]:
|
|
670
|
-
"""Iterate over the lines to write in an OBO file."""
|
|
671
|
-
yield f"format-version: {self.format_version}"
|
|
802
|
+
yv = tqdm(yv, desc=desc, unit_scale=True, unit="term", total=total)
|
|
803
|
+
yield from yv
|
|
672
804
|
|
|
673
|
-
|
|
674
|
-
|
|
805
|
+
def _iter_stanzas(self, use_tqdm: bool = False, desc: str = "terms") -> Iterable[Stanza]:
|
|
806
|
+
yield from self._iter_terms(use_tqdm=use_tqdm, desc=desc)
|
|
807
|
+
yield from self.typedefs or []
|
|
675
808
|
|
|
676
|
-
|
|
809
|
+
def iterate_obo_lines(
|
|
810
|
+
self,
|
|
811
|
+
emit_object_properties: bool = True,
|
|
812
|
+
emit_annotation_properties: bool = True,
|
|
813
|
+
) -> Iterable[str]:
|
|
814
|
+
"""Iterate over the lines to write in an OBO file.
|
|
815
|
+
|
|
816
|
+
Here's the order:
|
|
817
|
+
|
|
818
|
+
1. format-version (technically, this is the only required field)
|
|
819
|
+
2. data-version
|
|
820
|
+
3. date
|
|
821
|
+
4. saved-by
|
|
822
|
+
5. auto-generated-by
|
|
823
|
+
6. import
|
|
824
|
+
7. subsetdef
|
|
825
|
+
8. synonymtypedef
|
|
826
|
+
9. default-namespace
|
|
827
|
+
10. namespace-id-rule
|
|
828
|
+
11. idspace
|
|
829
|
+
12. treat-xrefs-as-equivalent
|
|
830
|
+
13. treat-xrefs-as-genus-differentia
|
|
831
|
+
14. treat-xrefs-as-relationship
|
|
832
|
+
15. treat-xrefs-as-is_a
|
|
833
|
+
16. remark
|
|
834
|
+
17. ontology
|
|
835
|
+
"""
|
|
836
|
+
# 1
|
|
837
|
+
yield f"format-version: {FORMAT_VERSION}"
|
|
838
|
+
# 2
|
|
839
|
+
if self.data_version:
|
|
677
840
|
yield f"data-version: {self.data_version}"
|
|
678
|
-
|
|
679
|
-
|
|
841
|
+
# 3
|
|
842
|
+
if self.date:
|
|
843
|
+
f"date: {self.date_formatted}"
|
|
844
|
+
# 4 TODO saved-by
|
|
845
|
+
# 5
|
|
846
|
+
if self.auto_generated_by:
|
|
847
|
+
yield f"auto-generated-by: {self.auto_generated_by}"
|
|
848
|
+
# 6
|
|
849
|
+
for imp in self.imports or []:
|
|
850
|
+
yield f"import: {imp}"
|
|
851
|
+
# 7
|
|
852
|
+
for subset, subset_remark in self.subsetdefs or []:
|
|
853
|
+
yield f'subsetdef: {reference_escape(subset, ontology_prefix=self.ontology)} "{subset_remark}"'
|
|
854
|
+
# 8
|
|
855
|
+
for synonym_typedef in sorted(self.synonym_typedefs or []):
|
|
856
|
+
if synonym_typedef.curie == DEFAULT_SYNONYM_TYPE.curie:
|
|
857
|
+
continue
|
|
858
|
+
yield synonym_typedef.to_obo(ontology_prefix=self.ontology)
|
|
859
|
+
# 9 TODO default-namespace
|
|
860
|
+
# 10 TODO namespace-id-rule
|
|
861
|
+
# 11
|
|
862
|
+
for prefix, url in sorted(self._get_clean_idspaces().items()):
|
|
863
|
+
if prefix in DEFAULT_PREFIX_MAP:
|
|
864
|
+
# we don't need to write out the 4 default prefixes from
|
|
865
|
+
# table 2 in https://www.w3.org/TR/owl2-syntax/#IRIs since
|
|
866
|
+
# they're considered to always be builtin
|
|
867
|
+
continue
|
|
680
868
|
|
|
681
|
-
|
|
682
|
-
|
|
869
|
+
# additional assumptions about built in
|
|
870
|
+
if prefix in {"obo", "oboInOwl"}:
|
|
871
|
+
continue
|
|
683
872
|
|
|
684
|
-
|
|
685
|
-
|
|
873
|
+
# ROBOT assumes that all OBO foundry prefixes are builtin,
|
|
874
|
+
# so don't re-declare them
|
|
875
|
+
if bioregistry.is_obo_foundry(prefix):
|
|
686
876
|
continue
|
|
687
|
-
yield synonym_typedef.to_obo()
|
|
688
877
|
|
|
878
|
+
yv = f"idspace: {prefix} {url}"
|
|
879
|
+
if _yv_name := bioregistry.get_name(prefix):
|
|
880
|
+
yv += f' "{_yv_name}"'
|
|
881
|
+
yield yv
|
|
882
|
+
# 12-15 are handled only during reading, and
|
|
883
|
+
# PyOBO unmacros things before outputting
|
|
884
|
+
# 12 treat-xrefs-as-equivalent
|
|
885
|
+
# 13 treat-xrefs-as-genus-differentia
|
|
886
|
+
# 14 treat-xrefs-as-relationship
|
|
887
|
+
# 15 treat-xrefs-as-is_a
|
|
888
|
+
# 16 TODO remark
|
|
889
|
+
# 17
|
|
689
890
|
yield f"ontology: {self.ontology}"
|
|
891
|
+
# 18 (secret)
|
|
892
|
+
yield from self._iterate_properties()
|
|
893
|
+
|
|
894
|
+
typedefs = self._index_typedefs()
|
|
895
|
+
synonym_typedefs = self._index_synonym_typedefs()
|
|
896
|
+
|
|
897
|
+
# PROPERTIES
|
|
898
|
+
for typedef in sorted(self.typedefs or []):
|
|
899
|
+
yield from typedef.iterate_obo_lines(
|
|
900
|
+
ontology_prefix=self.ontology,
|
|
901
|
+
typedefs=typedefs,
|
|
902
|
+
synonym_typedefs=synonym_typedefs,
|
|
903
|
+
)
|
|
690
904
|
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
905
|
+
# TERMS AND INSTANCES
|
|
906
|
+
for term in self._iter_terms():
|
|
907
|
+
yield from term.iterate_obo_lines(
|
|
908
|
+
ontology_prefix=self.ontology,
|
|
909
|
+
typedefs=typedefs,
|
|
910
|
+
synonym_typedefs=synonym_typedefs,
|
|
911
|
+
emit_object_properties=emit_object_properties,
|
|
912
|
+
emit_annotation_properties=emit_annotation_properties,
|
|
913
|
+
)
|
|
914
|
+
|
|
915
|
+
def _iterate_properties(self) -> Iterable[str]:
|
|
916
|
+
for predicate, value in self._iterate_property_pairs():
|
|
917
|
+
match value:
|
|
918
|
+
case OBOLiteral():
|
|
919
|
+
end = f'"{obo_escape_slim(value.value)}" {reference_escape(value.datatype, ontology_prefix=self.ontology)}'
|
|
920
|
+
case Reference():
|
|
921
|
+
end = reference_escape(value, ontology_prefix=self.ontology)
|
|
922
|
+
yield f"property_value: {reference_escape(predicate, ontology_prefix=self.ontology)} {end}"
|
|
923
|
+
|
|
924
|
+
def _iterate_property_pairs(self) -> Iterable[Annotation]:
|
|
925
|
+
# Title
|
|
926
|
+
if self.name:
|
|
927
|
+
yield Annotation(v.has_title, OBOLiteral.string(self.name))
|
|
928
|
+
|
|
929
|
+
# License
|
|
930
|
+
# TODO add SPDX to idspaces and use as a CURIE?
|
|
931
|
+
if license_spdx_id := bioregistry.get_license(self.ontology):
|
|
932
|
+
if license_spdx_id.startswith("http"):
|
|
933
|
+
license_literal = OBOLiteral.uri(license_spdx_id)
|
|
934
|
+
else:
|
|
935
|
+
license_literal = OBOLiteral.string(license_spdx_id)
|
|
936
|
+
yield Annotation(v.has_license, license_literal)
|
|
937
|
+
|
|
938
|
+
# Description
|
|
939
|
+
if description := bioregistry.get_description(self.ontology):
|
|
700
940
|
description = obo_escape_slim(description.strip())
|
|
701
|
-
yield
|
|
941
|
+
yield Annotation(v.has_description, OBOLiteral.string(description.strip()))
|
|
702
942
|
|
|
943
|
+
# Root terms
|
|
703
944
|
for root_term in self.root_terms or []:
|
|
704
|
-
yield
|
|
945
|
+
yield Annotation(v.has_ontology_root_term, root_term)
|
|
946
|
+
|
|
947
|
+
# Extras
|
|
948
|
+
if self.property_values:
|
|
949
|
+
yield from self.property_values
|
|
705
950
|
|
|
706
|
-
|
|
707
|
-
|
|
951
|
+
def _index_typedefs(self) -> Mapping[ReferenceTuple, TypeDef]:
|
|
952
|
+
from .typedef import default_typedefs
|
|
708
953
|
|
|
709
|
-
|
|
710
|
-
|
|
954
|
+
return ChainMap(
|
|
955
|
+
{t.pair: t for t in self.typedefs or []},
|
|
956
|
+
default_typedefs,
|
|
957
|
+
)
|
|
958
|
+
|
|
959
|
+
def _index_synonym_typedefs(self) -> Mapping[ReferenceTuple, SynonymTypeDef]:
|
|
960
|
+
return ChainMap(
|
|
961
|
+
{t.pair: t for t in self.synonym_typedefs or []},
|
|
962
|
+
default_synonym_typedefs,
|
|
963
|
+
)
|
|
711
964
|
|
|
712
965
|
def write_obo(
|
|
713
|
-
self,
|
|
966
|
+
self,
|
|
967
|
+
file: None | str | TextIO | Path = None,
|
|
968
|
+
*,
|
|
969
|
+
use_tqdm: bool = False,
|
|
970
|
+
emit_object_properties: bool = True,
|
|
971
|
+
emit_annotation_properties: bool = True,
|
|
714
972
|
) -> None:
|
|
715
973
|
"""Write the OBO to a file."""
|
|
716
|
-
it = self.iterate_obo_lines(
|
|
974
|
+
it = self.iterate_obo_lines(
|
|
975
|
+
emit_object_properties=emit_object_properties,
|
|
976
|
+
emit_annotation_properties=emit_annotation_properties,
|
|
977
|
+
)
|
|
717
978
|
if use_tqdm:
|
|
718
|
-
it = tqdm(
|
|
719
|
-
|
|
720
|
-
|
|
979
|
+
it = tqdm(
|
|
980
|
+
it,
|
|
981
|
+
desc=f"[{self._prefix_version}] writing OBO",
|
|
982
|
+
unit_scale=True,
|
|
983
|
+
unit="line",
|
|
984
|
+
)
|
|
985
|
+
if isinstance(file, str | Path | os.PathLike):
|
|
986
|
+
with safe_open(file, read=False) as fh:
|
|
721
987
|
self._write_lines(it, fh)
|
|
722
988
|
else:
|
|
723
989
|
self._write_lines(it, file)
|
|
724
990
|
|
|
725
991
|
@staticmethod
|
|
726
|
-
def _write_lines(it, file:
|
|
992
|
+
def _write_lines(it, file: TextIO | None):
|
|
727
993
|
for line in it:
|
|
728
994
|
print(line, file=file)
|
|
729
995
|
|
|
730
|
-
def write_obonet_gz(self, path:
|
|
996
|
+
def write_obonet_gz(self, path: str | Path) -> None:
|
|
731
997
|
"""Write the OBO to a gzipped dump in Obonet JSON."""
|
|
732
998
|
graph = self.to_obonet()
|
|
733
|
-
|
|
734
|
-
json.dump(nx.node_link_data(graph), file)
|
|
999
|
+
write_gzipped_graph(path=path, graph=graph)
|
|
735
1000
|
|
|
736
|
-
def
|
|
737
|
-
|
|
1001
|
+
def write_ofn(self, path: str | Path) -> None:
|
|
1002
|
+
"""Write as Functional OWL (OFN)."""
|
|
1003
|
+
from .functional.obo_to_functional import get_ofn_from_obo
|
|
738
1004
|
|
|
739
|
-
|
|
740
|
-
|
|
1005
|
+
ofn = get_ofn_from_obo(self)
|
|
1006
|
+
ofn.write_funowl(path)
|
|
741
1007
|
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
1008
|
+
def write_rdf(self, path: str | Path) -> None:
|
|
1009
|
+
"""Write as Turtle RDF."""
|
|
1010
|
+
from .functional.obo_to_functional import get_ofn_from_obo
|
|
745
1011
|
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
return self._cache(name="definitions.tsv")
|
|
1012
|
+
ofn = get_ofn_from_obo(self)
|
|
1013
|
+
ofn.write_rdf(path)
|
|
749
1014
|
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
1015
|
+
def write_nodes(self, path: str | Path) -> None:
|
|
1016
|
+
"""Write a nodes TSV file."""
|
|
1017
|
+
write_iterable_tsv(
|
|
1018
|
+
path=path,
|
|
1019
|
+
header=self.nodes_header,
|
|
1020
|
+
it=self.iterate_edge_rows(),
|
|
1021
|
+
)
|
|
753
1022
|
|
|
754
1023
|
@property
|
|
755
|
-
def
|
|
756
|
-
|
|
1024
|
+
def nodes_header(self) -> Sequence[str]:
|
|
1025
|
+
"""Get the header for nodes."""
|
|
1026
|
+
return [
|
|
1027
|
+
"curie:ID",
|
|
1028
|
+
"name:string",
|
|
1029
|
+
"synonyms:string[]",
|
|
1030
|
+
"synonym_predicates:string[]",
|
|
1031
|
+
"synonym_types:string[]",
|
|
1032
|
+
"definition:string",
|
|
1033
|
+
"deprecated:boolean",
|
|
1034
|
+
"type:string",
|
|
1035
|
+
"provenance:string[]",
|
|
1036
|
+
"alts:string[]",
|
|
1037
|
+
"replaced_by:string[]",
|
|
1038
|
+
"mapping_objects:string[]",
|
|
1039
|
+
"mapping_predicates:string[]",
|
|
1040
|
+
"version:string",
|
|
1041
|
+
]
|
|
757
1042
|
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
1043
|
+
def _get_node_row(self, node: Term, sep: str, version: str) -> Sequence[str]:
|
|
1044
|
+
synonym_predicate_curies, synonym_type_curies, synonyms = [], [], []
|
|
1045
|
+
for synonym in node.synonyms:
|
|
1046
|
+
synonym_predicate_curies.append(synonym.predicate.curie)
|
|
1047
|
+
synonym_type_curies.append(synonym.type.curie if synonym.type else "")
|
|
1048
|
+
synonyms.append(synonym.name)
|
|
1049
|
+
mapping_predicate_curies, mapping_target_curies = [], []
|
|
1050
|
+
for predicate, obj in node.get_mappings(include_xrefs=True, add_context=False):
|
|
1051
|
+
mapping_predicate_curies.append(predicate.curie)
|
|
1052
|
+
mapping_target_curies.append(obj.curie)
|
|
1053
|
+
return (
|
|
1054
|
+
node.curie,
|
|
1055
|
+
node.name or "",
|
|
1056
|
+
sep.join(synonyms),
|
|
1057
|
+
sep.join(synonym_predicate_curies),
|
|
1058
|
+
sep.join(synonym_type_curies),
|
|
1059
|
+
node.definition or "",
|
|
1060
|
+
"true" if node.is_obsolete else "false",
|
|
1061
|
+
node.type,
|
|
1062
|
+
sep.join(
|
|
1063
|
+
reference.curie for reference in node.provenance if isinstance(reference, Reference)
|
|
1064
|
+
),
|
|
1065
|
+
sep.join(alt_reference.curie for alt_reference in node.alt_ids),
|
|
1066
|
+
sep.join(ref.curie for ref in node.get_replaced_by()),
|
|
1067
|
+
sep.join(mapping_target_curies),
|
|
1068
|
+
sep.join(mapping_predicate_curies),
|
|
1069
|
+
version,
|
|
1070
|
+
)
|
|
761
1071
|
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
1072
|
+
def iterate_node_rows(self, sep: str = ";") -> Iterable[Sequence[str]]:
|
|
1073
|
+
"""Get a nodes iterator appropriate for serialization."""
|
|
1074
|
+
version = self.data_version or ""
|
|
1075
|
+
for node in self.iter_terms():
|
|
1076
|
+
if node.prefix != self.ontology:
|
|
1077
|
+
continue
|
|
1078
|
+
yield self._get_node_row(node, sep=sep, version=version)
|
|
1079
|
+
|
|
1080
|
+
def write_edges(self, path: str | Path) -> None:
|
|
1081
|
+
"""Write a edges TSV file."""
|
|
1082
|
+
# node, this is actually taken care of as part of the cache configuration
|
|
1083
|
+
write_iterable_tsv(
|
|
1084
|
+
path=path,
|
|
1085
|
+
header=self.edges_header,
|
|
1086
|
+
it=self.iterate_edge_rows(),
|
|
1087
|
+
)
|
|
765
1088
|
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
return self._cache(name="xrefs.tsv")
|
|
1089
|
+
def _path(self, *parts: str, name: str | None = None) -> Path:
|
|
1090
|
+
return prefix_directory_join(self.ontology, *parts, name=name, version=self.data_version)
|
|
769
1091
|
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
return self._cache(name="relations.tsv")
|
|
773
|
-
|
|
774
|
-
@property
|
|
775
|
-
def _properties_path(self) -> Path:
|
|
776
|
-
return self._cache(name="properties.tsv")
|
|
1092
|
+
def _get_cache_path(self, name: CacheArtifact) -> Path:
|
|
1093
|
+
return get_cache_path(self.ontology, name=name, version=self.data_version)
|
|
777
1094
|
|
|
778
1095
|
@property
|
|
779
1096
|
def _root_metadata_path(self) -> Path:
|
|
780
1097
|
return prefix_directory_join(self.ontology, name="metadata.json")
|
|
781
1098
|
|
|
782
|
-
@property
|
|
783
|
-
def _versioned_metadata_path(self) -> Path:
|
|
784
|
-
return self._cache(name="metadata.json")
|
|
785
|
-
|
|
786
1099
|
@property
|
|
787
1100
|
def _obo_path(self) -> Path:
|
|
788
|
-
return
|
|
1101
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.obo.gz")
|
|
789
1102
|
|
|
790
1103
|
@property
|
|
791
1104
|
def _obograph_path(self) -> Path:
|
|
792
|
-
return self._path(name=f"{self.ontology}.json")
|
|
1105
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.json.gz")
|
|
793
1106
|
|
|
794
1107
|
@property
|
|
795
1108
|
def _owl_path(self) -> Path:
|
|
796
|
-
return self._path(name=f"{self.ontology}.owl")
|
|
1109
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.owl.gz")
|
|
797
1110
|
|
|
798
1111
|
@property
|
|
799
1112
|
def _obonet_gz_path(self) -> Path:
|
|
800
|
-
return self._path(name=f"{self.ontology}.obonet.json.gz")
|
|
1113
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.obonet.json.gz")
|
|
801
1114
|
|
|
802
1115
|
@property
|
|
803
|
-
def
|
|
804
|
-
return self._path(name=f"{self.ontology}.
|
|
805
|
-
|
|
806
|
-
def write_default(
|
|
807
|
-
self,
|
|
808
|
-
use_tqdm: bool = False,
|
|
809
|
-
force: bool = False,
|
|
810
|
-
write_obo: bool = False,
|
|
811
|
-
write_obonet: bool = False,
|
|
812
|
-
write_obograph: bool = False,
|
|
813
|
-
write_owl: bool = False,
|
|
814
|
-
write_nodes: bool = False,
|
|
815
|
-
) -> None:
|
|
816
|
-
"""Write the OBO to the default path."""
|
|
817
|
-
metadata = self.get_metadata()
|
|
818
|
-
for path in (self._root_metadata_path, self._versioned_metadata_path):
|
|
819
|
-
logger.debug("[%s v%s] caching metadata to %s", self.ontology, self.data_version, path)
|
|
820
|
-
with path.open("w") as file:
|
|
821
|
-
json.dump(metadata, file, indent=2)
|
|
1116
|
+
def _ofn_path(self) -> Path:
|
|
1117
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.ofn.gz")
|
|
822
1118
|
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
)
|
|
826
|
-
typedef_df: pd.DataFrame = self.get_typedef_df()
|
|
827
|
-
typedef_df.sort_values(list(typedef_df.columns), inplace=True)
|
|
828
|
-
typedef_df.to_csv(self._typedefs_path, sep="\t", index=False)
|
|
1119
|
+
@property
|
|
1120
|
+
def _ttl_path(self) -> Path:
|
|
1121
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.ttl")
|
|
829
1122
|
|
|
830
|
-
|
|
831
|
-
|
|
1123
|
+
def _get_cache_config(self) -> list[tuple[CacheArtifact, Sequence[str], Callable]]:
|
|
1124
|
+
return [
|
|
1125
|
+
(CacheArtifact.names, [f"{self.ontology}_id", "name"], self.iterate_id_name),
|
|
832
1126
|
(
|
|
833
|
-
|
|
834
|
-
self._definitions_path,
|
|
1127
|
+
CacheArtifact.definitions,
|
|
835
1128
|
[f"{self.ontology}_id", "definition"],
|
|
836
1129
|
self.iterate_id_definition,
|
|
837
1130
|
),
|
|
838
1131
|
(
|
|
839
|
-
|
|
840
|
-
self._species_path,
|
|
1132
|
+
CacheArtifact.species,
|
|
841
1133
|
[f"{self.ontology}_id", "taxonomy_id"],
|
|
842
1134
|
self.iterate_id_species,
|
|
843
1135
|
),
|
|
1136
|
+
(CacheArtifact.alts, [f"{self.ontology}_id", "alt_id"], self.iterate_alt_rows),
|
|
1137
|
+
(CacheArtifact.mappings, SSSOM_DF_COLUMNS, self.iterate_mapping_rows),
|
|
1138
|
+
(CacheArtifact.relations, self.relations_header, self.iter_relation_rows),
|
|
1139
|
+
(CacheArtifact.edges, self.edges_header, self.iterate_edge_rows),
|
|
1140
|
+
(
|
|
1141
|
+
CacheArtifact.object_properties,
|
|
1142
|
+
self.object_properties_header,
|
|
1143
|
+
self.iter_object_properties,
|
|
1144
|
+
),
|
|
1145
|
+
(
|
|
1146
|
+
CacheArtifact.literal_properties,
|
|
1147
|
+
self.literal_properties_header,
|
|
1148
|
+
self.iter_literal_properties,
|
|
1149
|
+
),
|
|
844
1150
|
(
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
self.iterate_synonym_rows,
|
|
1151
|
+
CacheArtifact.literal_mappings,
|
|
1152
|
+
ssslm.LiteralMappingTuple._fields,
|
|
1153
|
+
self.iterate_literal_mapping_rows,
|
|
849
1154
|
),
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
1155
|
+
]
|
|
1156
|
+
|
|
1157
|
+
def write_metadata(self) -> None:
|
|
1158
|
+
"""Write the metadata JSON file."""
|
|
1159
|
+
metadata = self.get_metadata()
|
|
1160
|
+
for path in (self._root_metadata_path, self._get_cache_path(CacheArtifact.metadata)):
|
|
1161
|
+
logger.debug("[%s] caching metadata to %s", self._prefix_version, path)
|
|
1162
|
+
with safe_open(path, read=False) as file:
|
|
1163
|
+
json.dump(metadata, file, indent=2)
|
|
1164
|
+
|
|
1165
|
+
def write_prefix_map(self) -> None:
|
|
1166
|
+
"""Write a prefix map file that includes all prefixes used in this ontology."""
|
|
1167
|
+
with self._get_cache_path(CacheArtifact.prefixes).open("w") as file:
|
|
1168
|
+
json.dump(self._get_clean_idspaces(), file, indent=2)
|
|
1169
|
+
|
|
1170
|
+
def write_cache(self, *, force: bool = False) -> None:
|
|
1171
|
+
"""Write cache parts."""
|
|
1172
|
+
typedefs_path = self._get_cache_path(CacheArtifact.typedefs)
|
|
1173
|
+
logger.debug(
|
|
1174
|
+
"[%s] caching typedefs to %s",
|
|
1175
|
+
self._prefix_version,
|
|
1176
|
+
typedefs_path,
|
|
1177
|
+
)
|
|
1178
|
+
typedef_df: pd.DataFrame = self.get_typedef_df()
|
|
1179
|
+
typedef_df.sort_values(list(typedef_df.columns), inplace=True)
|
|
1180
|
+
typedef_df.to_csv(typedefs_path, sep="\t", index=False)
|
|
1181
|
+
|
|
1182
|
+
for cache_artifact, header, fn in self._get_cache_config():
|
|
1183
|
+
path = self._get_cache_path(cache_artifact)
|
|
1184
|
+
if path.is_file() and not force:
|
|
856
1185
|
continue
|
|
857
|
-
|
|
1186
|
+
tqdm.write(
|
|
1187
|
+
f"[{self._prefix_version}] writing {cache_artifact.name} to {path}",
|
|
1188
|
+
)
|
|
858
1189
|
write_iterable_tsv(
|
|
859
1190
|
path=path,
|
|
860
1191
|
header=header,
|
|
861
1192
|
it=fn(), # type:ignore
|
|
862
1193
|
)
|
|
863
1194
|
|
|
864
|
-
|
|
865
|
-
|
|
1195
|
+
typedefs = self._index_typedefs()
|
|
1196
|
+
for relation in (v.is_a, v.has_part, v.part_of, v.from_species, v.orthologous):
|
|
1197
|
+
if relation is not v.is_a and relation.pair not in typedefs:
|
|
866
1198
|
continue
|
|
867
|
-
relations_path =
|
|
868
|
-
|
|
1199
|
+
relations_path = get_relation_cache_path(
|
|
1200
|
+
self.ontology, reference=relation, version=self.data_version
|
|
1201
|
+
)
|
|
1202
|
+
if relations_path.is_file() and not force:
|
|
869
1203
|
continue
|
|
870
1204
|
logger.debug(
|
|
871
|
-
"[%s
|
|
872
|
-
self.
|
|
873
|
-
self.data_version,
|
|
1205
|
+
"[%s] caching relation %s ! %s",
|
|
1206
|
+
self._prefix_version,
|
|
874
1207
|
relation.curie,
|
|
875
1208
|
relation.name,
|
|
876
1209
|
)
|
|
@@ -880,36 +1213,82 @@ class Obo:
|
|
|
880
1213
|
relation_df.sort_values(list(relation_df.columns), inplace=True)
|
|
881
1214
|
relation_df.to_csv(relations_path, sep="\t", index=False)
|
|
882
1215
|
|
|
883
|
-
|
|
1216
|
+
def write_default(
|
|
1217
|
+
self,
|
|
1218
|
+
use_tqdm: bool = False,
|
|
1219
|
+
force: bool = False,
|
|
1220
|
+
write_obo: bool = False,
|
|
1221
|
+
write_obonet: bool = False,
|
|
1222
|
+
write_obograph: bool = False,
|
|
1223
|
+
write_owl: bool = False,
|
|
1224
|
+
write_ofn: bool = False,
|
|
1225
|
+
write_ttl: bool = False,
|
|
1226
|
+
write_nodes: bool = False,
|
|
1227
|
+
obograph_use_internal: bool = False,
|
|
1228
|
+
write_cache: bool = True,
|
|
1229
|
+
) -> None:
|
|
1230
|
+
"""Write the OBO to the default path."""
|
|
1231
|
+
self.write_metadata()
|
|
1232
|
+
self.write_prefix_map()
|
|
1233
|
+
if write_cache:
|
|
1234
|
+
self.write_cache(force=force)
|
|
1235
|
+
if write_obo and (not self._obo_path.is_file() or force):
|
|
1236
|
+
tqdm.write(f"[{self._prefix_version}] writing OBO to {self._obo_path}")
|
|
884
1237
|
self.write_obo(self._obo_path, use_tqdm=use_tqdm)
|
|
885
|
-
if write_obograph and (not self.
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
1238
|
+
if (write_ofn or write_owl or write_obograph) and (not self._ofn_path.is_file() or force):
|
|
1239
|
+
tqdm.write(f"[{self._prefix_version}] writing OFN to {self._ofn_path}")
|
|
1240
|
+
self.write_ofn(self._ofn_path)
|
|
1241
|
+
if write_obograph and (not self._obograph_path.is_file() or force):
|
|
1242
|
+
if obograph_use_internal:
|
|
1243
|
+
tqdm.write(f"[{self._prefix_version}] writing OBO Graph to {self._obograph_path}")
|
|
1244
|
+
self.write_obograph(self._obograph_path)
|
|
1245
|
+
else:
|
|
1246
|
+
import bioontologies.robot
|
|
1247
|
+
|
|
1248
|
+
tqdm.write(
|
|
1249
|
+
f"[{self.ontology}] converting OFN to OBO Graph at {self._obograph_path}"
|
|
1250
|
+
)
|
|
1251
|
+
bioontologies.robot.convert(
|
|
1252
|
+
self._ofn_path, self._obograph_path, debug=True, merge=False, reason=False
|
|
1253
|
+
)
|
|
1254
|
+
if write_owl and (not self._owl_path.is_file() or force):
|
|
1255
|
+
tqdm.write(f"[{self._prefix_version}] writing OWL to {self._owl_path}")
|
|
1256
|
+
import bioontologies.robot
|
|
1257
|
+
|
|
1258
|
+
bioontologies.robot.convert(
|
|
1259
|
+
self._ofn_path, self._owl_path, debug=True, merge=False, reason=False
|
|
1260
|
+
)
|
|
1261
|
+
if write_ttl and (not self._ttl_path.is_file() or force):
|
|
1262
|
+
tqdm.write(f"[{self._prefix_version}] writing Turtle to {self._ttl_path}")
|
|
1263
|
+
self.write_rdf(self._ttl_path)
|
|
1264
|
+
if write_obonet and (not self._obonet_gz_path.is_file() or force):
|
|
1265
|
+
tqdm.write(f"[{self._prefix_version}] writing obonet to {self._obonet_gz_path}")
|
|
891
1266
|
self.write_obonet_gz(self._obonet_gz_path)
|
|
892
1267
|
if write_nodes:
|
|
893
|
-
self.
|
|
1268
|
+
nodes_path = self._get_cache_path(CacheArtifact.nodes)
|
|
1269
|
+
tqdm.write(f"[{self._prefix_version}] writing nodes TSV to {nodes_path}")
|
|
1270
|
+
self.write_nodes(nodes_path)
|
|
894
1271
|
|
|
895
1272
|
@property
|
|
896
|
-
def _items_accessor(self):
|
|
1273
|
+
def _items_accessor(self) -> list[Term]:
|
|
897
1274
|
if self._items is None:
|
|
898
|
-
key
|
|
899
|
-
self._items = sorted(
|
|
1275
|
+
# if the term sort key is None, then the terms get sorted by their reference
|
|
1276
|
+
self._items = sorted(
|
|
1277
|
+
self.iter_terms(force=self.force),
|
|
1278
|
+
)
|
|
900
1279
|
return self._items
|
|
901
1280
|
|
|
902
|
-
def __iter__(self) -> Iterator[
|
|
903
|
-
|
|
904
|
-
return iter(self.iter_terms(force=self.force))
|
|
905
|
-
return iter(self._items_accessor)
|
|
1281
|
+
def __iter__(self) -> Iterator[Term]:
|
|
1282
|
+
yield from self._iter_terms_safe()
|
|
906
1283
|
|
|
907
1284
|
def ancestors(self, identifier: str) -> set[str]:
|
|
908
1285
|
"""Return a set of identifiers for parents of the given identifier."""
|
|
1286
|
+
# FIXME switch to references
|
|
909
1287
|
return nx.descendants(self.hierarchy, identifier) # note this is backwards
|
|
910
1288
|
|
|
911
1289
|
def descendants(self, identifier: str) -> set[str]:
|
|
912
1290
|
"""Return a set of identifiers for the children of the given identifier."""
|
|
1291
|
+
# FIXME switch to references
|
|
913
1292
|
return nx.ancestors(self.hierarchy, identifier) # note this is backwards
|
|
914
1293
|
|
|
915
1294
|
def is_descendant(self, descendant: str, ancestor: str) -> bool:
|
|
@@ -917,9 +1296,9 @@ class Obo:
|
|
|
917
1296
|
|
|
918
1297
|
.. code-block:: python
|
|
919
1298
|
|
|
920
|
-
from pyobo import
|
|
1299
|
+
from pyobo import get_ontology
|
|
921
1300
|
|
|
922
|
-
obo =
|
|
1301
|
+
obo = get_ontology("go")
|
|
923
1302
|
|
|
924
1303
|
interleukin_10_complex = "1905571" # interleukin-10 receptor complex
|
|
925
1304
|
all_complexes = "0032991"
|
|
@@ -935,21 +1314,22 @@ class Obo:
|
|
|
935
1314
|
|
|
936
1315
|
.. code-block:: python
|
|
937
1316
|
|
|
938
|
-
from pyobo import
|
|
1317
|
+
from pyobo import get_ontology
|
|
939
1318
|
|
|
940
|
-
obo =
|
|
1319
|
+
obo = get_ontology("go")
|
|
941
1320
|
|
|
942
1321
|
identifier = "1905571" # interleukin-10 receptor complex
|
|
943
1322
|
is_complex = "0032991" in nx.descendants(obo.hierarchy, identifier) # should be true
|
|
944
1323
|
"""
|
|
945
1324
|
if self._hierarchy is None:
|
|
946
1325
|
self._hierarchy = nx.DiGraph()
|
|
947
|
-
for
|
|
948
|
-
for parent in
|
|
949
|
-
|
|
1326
|
+
for stanza in self._iter_stanzas(desc=f"[{self.ontology}] getting hierarchy"):
|
|
1327
|
+
for parent in stanza.parents:
|
|
1328
|
+
# FIXME add referneces
|
|
1329
|
+
self._hierarchy.add_edge(stanza.identifier, parent.identifier)
|
|
950
1330
|
return self._hierarchy
|
|
951
1331
|
|
|
952
|
-
def to_obonet(self:
|
|
1332
|
+
def to_obonet(self: Obo, *, use_tqdm: bool = False) -> nx.MultiDiGraph:
|
|
953
1333
|
"""Export as a :mod`obonet` style graph."""
|
|
954
1334
|
rv = nx.MultiDiGraph()
|
|
955
1335
|
rv.graph.update(
|
|
@@ -957,55 +1337,64 @@ class Obo:
|
|
|
957
1337
|
"name": self.name,
|
|
958
1338
|
"ontology": self.ontology,
|
|
959
1339
|
"auto-generated-by": self.auto_generated_by,
|
|
960
|
-
"
|
|
961
|
-
"format-version": self.format_version,
|
|
1340
|
+
"format-version": FORMAT_VERSION,
|
|
962
1341
|
"data-version": self.data_version,
|
|
963
|
-
"synonymtypedef": _convert_synonym_typedefs(self.synonym_typedefs),
|
|
964
1342
|
"date": self.date_formatted,
|
|
1343
|
+
"typedefs": [typedef.reference.model_dump() for typedef in self.typedefs or []],
|
|
1344
|
+
"synonymtypedef": [
|
|
1345
|
+
synonym_typedef.to_obo(ontology_prefix=self.ontology)
|
|
1346
|
+
for synonym_typedef in self.synonym_typedefs or []
|
|
1347
|
+
],
|
|
965
1348
|
}
|
|
966
1349
|
)
|
|
967
1350
|
|
|
968
1351
|
nodes = {}
|
|
1352
|
+
#: a list of 3-tuples u,v,k
|
|
969
1353
|
links = []
|
|
970
|
-
|
|
1354
|
+
typedefs = self._index_typedefs()
|
|
1355
|
+
synonym_typedefs = self._index_synonym_typedefs()
|
|
1356
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
|
|
971
1357
|
parents = []
|
|
972
|
-
for parent in
|
|
1358
|
+
for parent in stanza.parents:
|
|
973
1359
|
if parent is None:
|
|
974
1360
|
raise ValueError("parent should not be none!")
|
|
975
|
-
links.append((
|
|
1361
|
+
links.append((stanza.curie, "is_a", parent.curie))
|
|
976
1362
|
parents.append(parent.curie)
|
|
977
1363
|
|
|
978
1364
|
relations = []
|
|
979
|
-
for typedef, target in
|
|
980
|
-
if target is None:
|
|
981
|
-
raise ValueError("target should not be none!")
|
|
1365
|
+
for typedef, target in stanza.iterate_relations():
|
|
982
1366
|
relations.append(f"{typedef.curie} {target.curie}")
|
|
983
|
-
links.append((
|
|
1367
|
+
links.append((stanza.curie, typedef.curie, target.curie))
|
|
1368
|
+
|
|
1369
|
+
for typedef, targets in sorted(stanza.properties.items()):
|
|
1370
|
+
for target_or_literal in targets:
|
|
1371
|
+
if isinstance(target_or_literal, curies.Reference):
|
|
1372
|
+
links.append((stanza.curie, typedef.curie, target_or_literal.curie))
|
|
984
1373
|
|
|
985
1374
|
d = {
|
|
986
|
-
"id":
|
|
987
|
-
"name":
|
|
988
|
-
"def":
|
|
989
|
-
"xref": [xref.curie for xref in
|
|
1375
|
+
"id": stanza.curie,
|
|
1376
|
+
"name": stanza.name,
|
|
1377
|
+
"def": stanza.definition and stanza._definition_fp(),
|
|
1378
|
+
"xref": [xref.curie for xref in stanza.xrefs],
|
|
990
1379
|
"is_a": parents,
|
|
991
1380
|
"relationship": relations,
|
|
992
|
-
"synonym": [
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
for prop, values in term.properties.items()
|
|
996
|
-
for value in values
|
|
1381
|
+
"synonym": [
|
|
1382
|
+
synonym._fp(ontology_prefix=self.ontology, synonym_typedefs=synonym_typedefs)
|
|
1383
|
+
for synonym in stanza.synonyms
|
|
997
1384
|
],
|
|
1385
|
+
"property_value": list(
|
|
1386
|
+
stanza._iterate_obo_properties(ontology_prefix=self.ontology, typedefs=typedefs)
|
|
1387
|
+
),
|
|
998
1388
|
}
|
|
999
|
-
nodes[
|
|
1389
|
+
nodes[stanza.curie] = {k: v for k, v in d.items() if v}
|
|
1000
1390
|
|
|
1001
1391
|
rv.add_nodes_from(nodes.items())
|
|
1002
1392
|
for _source, _key, _target in links:
|
|
1003
1393
|
rv.add_edge(_source, _target, key=_key)
|
|
1004
1394
|
|
|
1005
1395
|
logger.info(
|
|
1006
|
-
"[%s
|
|
1007
|
-
self.
|
|
1008
|
-
self.data_version,
|
|
1396
|
+
"[%s] exported graph with %d nodes",
|
|
1397
|
+
self._prefix_version,
|
|
1009
1398
|
rv.number_of_nodes(),
|
|
1010
1399
|
)
|
|
1011
1400
|
return rv
|
|
@@ -1017,11 +1406,21 @@ class Obo:
|
|
|
1017
1406
|
"date": self.date and self.date.isoformat(),
|
|
1018
1407
|
}
|
|
1019
1408
|
|
|
1409
|
+
def iterate_references(self, *, use_tqdm: bool = False) -> Iterable[Reference]:
|
|
1410
|
+
"""Iterate over identifiers."""
|
|
1411
|
+
for stanza in self._iter_stanzas(
|
|
1412
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting identifiers"
|
|
1413
|
+
):
|
|
1414
|
+
if self._in_ontology(stanza.reference):
|
|
1415
|
+
yield stanza.reference
|
|
1416
|
+
|
|
1020
1417
|
def iterate_ids(self, *, use_tqdm: bool = False) -> Iterable[str]:
|
|
1021
1418
|
"""Iterate over identifiers."""
|
|
1022
|
-
for
|
|
1023
|
-
|
|
1024
|
-
|
|
1419
|
+
for stanza in self._iter_stanzas(
|
|
1420
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting identifiers"
|
|
1421
|
+
):
|
|
1422
|
+
if self._in_ontology_strict(stanza.reference):
|
|
1423
|
+
yield stanza.identifier
|
|
1025
1424
|
|
|
1026
1425
|
def get_ids(self, *, use_tqdm: bool = False) -> set[str]:
|
|
1027
1426
|
"""Get the set of identifiers."""
|
|
@@ -1029,9 +1428,11 @@ class Obo:
|
|
|
1029
1428
|
|
|
1030
1429
|
def iterate_id_name(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
|
|
1031
1430
|
"""Iterate identifier name pairs."""
|
|
1032
|
-
for
|
|
1033
|
-
|
|
1034
|
-
|
|
1431
|
+
for stanza in self._iter_stanzas(
|
|
1432
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"
|
|
1433
|
+
):
|
|
1434
|
+
if self._in_ontology(stanza.reference) and stanza.name:
|
|
1435
|
+
yield stanza.identifier, stanza.name
|
|
1035
1436
|
|
|
1036
1437
|
def get_id_name_mapping(self, *, use_tqdm: bool = False) -> Mapping[str, str]:
|
|
1037
1438
|
"""Get a mapping from identifiers to names."""
|
|
@@ -1039,11 +1440,13 @@ class Obo:
|
|
|
1039
1440
|
|
|
1040
1441
|
def iterate_id_definition(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
|
|
1041
1442
|
"""Iterate over pairs of terms' identifiers and their respective definitions."""
|
|
1042
|
-
for
|
|
1043
|
-
|
|
1443
|
+
for stanza in self._iter_stanzas(
|
|
1444
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"
|
|
1445
|
+
):
|
|
1446
|
+
if stanza.identifier and stanza.definition:
|
|
1044
1447
|
yield (
|
|
1045
|
-
|
|
1046
|
-
|
|
1448
|
+
stanza.identifier,
|
|
1449
|
+
stanza.definition.strip('"')
|
|
1047
1450
|
.replace("\n", " ")
|
|
1048
1451
|
.replace("\t", " ")
|
|
1049
1452
|
.replace(" ", " "),
|
|
@@ -1056,11 +1459,11 @@ class Obo:
|
|
|
1056
1459
|
def get_obsolete(self, *, use_tqdm: bool = False) -> set[str]:
|
|
1057
1460
|
"""Get the set of obsolete identifiers."""
|
|
1058
1461
|
return {
|
|
1059
|
-
|
|
1060
|
-
for
|
|
1462
|
+
stanza.identifier
|
|
1463
|
+
for stanza in self._iter_stanzas(
|
|
1061
1464
|
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting obsolete"
|
|
1062
1465
|
)
|
|
1063
|
-
if
|
|
1466
|
+
if stanza.identifier and stanza.is_obsolete
|
|
1064
1467
|
}
|
|
1065
1468
|
|
|
1066
1469
|
############
|
|
@@ -1068,18 +1471,19 @@ class Obo:
|
|
|
1068
1471
|
############
|
|
1069
1472
|
|
|
1070
1473
|
def iterate_id_species(
|
|
1071
|
-
self, *, prefix:
|
|
1474
|
+
self, *, prefix: str | None = None, use_tqdm: bool = False
|
|
1072
1475
|
) -> Iterable[tuple[str, str]]:
|
|
1073
1476
|
"""Iterate over terms' identifiers and respective species (if available)."""
|
|
1074
1477
|
if prefix is None:
|
|
1075
1478
|
prefix = NCBITAXON_PREFIX
|
|
1076
|
-
for
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1479
|
+
for stanza in self._iter_stanzas(
|
|
1480
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting species"
|
|
1481
|
+
):
|
|
1482
|
+
if isinstance(stanza, Term) and (species := stanza.get_species(prefix=prefix)):
|
|
1483
|
+
yield stanza.identifier, species.identifier
|
|
1080
1484
|
|
|
1081
1485
|
def get_id_species_mapping(
|
|
1082
|
-
self, *, prefix:
|
|
1486
|
+
self, *, prefix: str | None = None, use_tqdm: bool = False
|
|
1083
1487
|
) -> Mapping[str, str]:
|
|
1084
1488
|
"""Get a mapping from identifiers to species."""
|
|
1085
1489
|
return dict(self.iterate_id_species(prefix=prefix, use_tqdm=use_tqdm))
|
|
@@ -1109,42 +1513,103 @@ class Obo:
|
|
|
1109
1513
|
# PROPS #
|
|
1110
1514
|
#########
|
|
1111
1515
|
|
|
1112
|
-
def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[
|
|
1516
|
+
def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[Stanza, Annotation]]:
|
|
1113
1517
|
"""Iterate over tuples of terms, properties, and their values."""
|
|
1114
|
-
|
|
1115
|
-
for term in self._iter_terms(
|
|
1518
|
+
for stanza in self._iter_stanzas(
|
|
1116
1519
|
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting properties"
|
|
1117
1520
|
):
|
|
1118
|
-
for
|
|
1119
|
-
yield
|
|
1521
|
+
for property_tuple in stanza.get_property_annotations():
|
|
1522
|
+
yield stanza, property_tuple
|
|
1120
1523
|
|
|
1121
1524
|
@property
|
|
1122
1525
|
def properties_header(self):
|
|
1123
1526
|
"""Property dataframe header."""
|
|
1124
|
-
return [f"{self.ontology}_id", "property", "value"]
|
|
1527
|
+
return [f"{self.ontology}_id", "property", "value", "datatype", "language"]
|
|
1125
1528
|
|
|
1126
|
-
|
|
1529
|
+
@property
|
|
1530
|
+
def object_properties_header(self):
|
|
1531
|
+
"""Property dataframe header."""
|
|
1532
|
+
return ["source", "predicate", "target"]
|
|
1533
|
+
|
|
1534
|
+
@property
|
|
1535
|
+
def literal_properties_header(self):
|
|
1536
|
+
"""Property dataframe header."""
|
|
1537
|
+
return ["source", "predicate", "target", "datatype", "language"]
|
|
1538
|
+
|
|
1539
|
+
def _iter_property_rows(
|
|
1540
|
+
self, *, use_tqdm: bool = False
|
|
1541
|
+
) -> Iterable[tuple[str, str, str, str, str]]:
|
|
1127
1542
|
"""Iterate property rows."""
|
|
1128
|
-
for term,
|
|
1129
|
-
|
|
1543
|
+
for term, t in self.iterate_properties(use_tqdm=use_tqdm):
|
|
1544
|
+
pred = term._reference(t.predicate, ontology_prefix=self.ontology)
|
|
1545
|
+
match t.value:
|
|
1546
|
+
case OBOLiteral(value, datatype, language):
|
|
1547
|
+
yield (
|
|
1548
|
+
term.identifier,
|
|
1549
|
+
pred,
|
|
1550
|
+
value,
|
|
1551
|
+
get_preferred_curie(datatype),
|
|
1552
|
+
language or "",
|
|
1553
|
+
)
|
|
1554
|
+
case Reference() as obj:
|
|
1555
|
+
yield term.identifier, pred, get_preferred_curie(obj), "", ""
|
|
1556
|
+
case _:
|
|
1557
|
+
raise TypeError(f"got: {type(t)} - {t}")
|
|
1558
|
+
|
|
1559
|
+
def get_properties_df(self, *, use_tqdm: bool = False, drop_na: bool = True) -> pd.DataFrame:
|
|
1560
|
+
"""Get all properties as a dataframe."""
|
|
1561
|
+
df = pd.DataFrame(
|
|
1562
|
+
self._iter_property_rows(use_tqdm=use_tqdm),
|
|
1563
|
+
columns=self.properties_header,
|
|
1564
|
+
)
|
|
1565
|
+
if drop_na:
|
|
1566
|
+
df.dropna(inplace=True)
|
|
1567
|
+
return df
|
|
1568
|
+
|
|
1569
|
+
def iter_object_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
|
|
1570
|
+
"""Iterate over object property triples."""
|
|
1571
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
|
|
1572
|
+
for predicate, target in stanza.iterate_object_properties():
|
|
1573
|
+
yield stanza.curie, predicate.curie, target.curie
|
|
1130
1574
|
|
|
1131
|
-
def
|
|
1575
|
+
def get_object_properties_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
|
|
1132
1576
|
"""Get all properties as a dataframe."""
|
|
1133
1577
|
return pd.DataFrame(
|
|
1134
|
-
|
|
1135
|
-
columns=self.properties_header,
|
|
1578
|
+
self.iter_object_properties(use_tqdm=use_tqdm), columns=self.object_properties_header
|
|
1136
1579
|
)
|
|
1137
1580
|
|
|
1581
|
+
def iter_literal_properties(
|
|
1582
|
+
self, *, use_tqdm: bool = False
|
|
1583
|
+
) -> Iterable[tuple[str, str, str, str, str]]:
|
|
1584
|
+
"""Iterate over literal properties quads."""
|
|
1585
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
|
|
1586
|
+
for predicate, target in stanza.iterate_literal_properties():
|
|
1587
|
+
yield (
|
|
1588
|
+
stanza.curie,
|
|
1589
|
+
predicate.curie,
|
|
1590
|
+
target.value,
|
|
1591
|
+
target.datatype.curie,
|
|
1592
|
+
target.language or "",
|
|
1593
|
+
)
|
|
1594
|
+
|
|
1595
|
+
def get_literal_properties_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
|
|
1596
|
+
"""Get all properties as a dataframe."""
|
|
1597
|
+
return pd.DataFrame(self.iter_literal_properties(), columns=self.literal_properties_header)
|
|
1598
|
+
|
|
1138
1599
|
def iterate_filtered_properties(
|
|
1139
|
-
self, prop:
|
|
1140
|
-
) -> Iterable[tuple[
|
|
1600
|
+
self, prop: ReferenceHint, *, use_tqdm: bool = False
|
|
1601
|
+
) -> Iterable[tuple[Stanza, str]]:
|
|
1141
1602
|
"""Iterate over tuples of terms and the values for the given property."""
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1603
|
+
prop = _ensure_ref(prop)
|
|
1604
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
|
|
1605
|
+
for t in stanza.get_property_annotations():
|
|
1606
|
+
if t.predicate != prop:
|
|
1607
|
+
continue
|
|
1608
|
+
yield stanza, reference_or_literal_to_str(t.value)
|
|
1609
|
+
|
|
1610
|
+
def get_filtered_properties_df(
|
|
1611
|
+
self, prop: ReferenceHint, *, use_tqdm: bool = False
|
|
1612
|
+
) -> pd.DataFrame:
|
|
1148
1613
|
"""Get a dataframe of terms' identifiers to the given property's values."""
|
|
1149
1614
|
return pd.DataFrame(
|
|
1150
1615
|
list(self.get_filtered_properties_mapping(prop, use_tqdm=use_tqdm).items()),
|
|
@@ -1152,7 +1617,7 @@ class Obo:
|
|
|
1152
1617
|
)
|
|
1153
1618
|
|
|
1154
1619
|
def get_filtered_properties_mapping(
|
|
1155
|
-
self, prop:
|
|
1620
|
+
self, prop: ReferenceHint, *, use_tqdm: bool = False
|
|
1156
1621
|
) -> Mapping[str, str]:
|
|
1157
1622
|
"""Get a mapping from a term's identifier to the property.
|
|
1158
1623
|
|
|
@@ -1164,7 +1629,7 @@ class Obo:
|
|
|
1164
1629
|
}
|
|
1165
1630
|
|
|
1166
1631
|
def get_filtered_properties_multimapping(
|
|
1167
|
-
self, prop:
|
|
1632
|
+
self, prop: ReferenceHint, *, use_tqdm: bool = False
|
|
1168
1633
|
) -> Mapping[str, list[str]]:
|
|
1169
1634
|
"""Get a mapping from a term's identifier to the property values."""
|
|
1170
1635
|
return multidict(
|
|
@@ -1176,22 +1641,63 @@ class Obo:
|
|
|
1176
1641
|
# RELATIONS #
|
|
1177
1642
|
#############
|
|
1178
1643
|
|
|
1644
|
+
def iterate_edges(
|
|
1645
|
+
self, *, use_tqdm: bool = False
|
|
1646
|
+
) -> Iterable[tuple[Stanza, TypeDef, Reference]]:
|
|
1647
|
+
"""Iterate over triples of terms, relations, and their targets."""
|
|
1648
|
+
_warned: set[ReferenceTuple] = set()
|
|
1649
|
+
typedefs = self._index_typedefs()
|
|
1650
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm, desc=f"[{self.ontology}] edge"):
|
|
1651
|
+
for predicate, reference in stanza._iter_edges():
|
|
1652
|
+
if td := self._get_typedef(stanza, predicate, _warned, typedefs):
|
|
1653
|
+
yield stanza, td, reference
|
|
1654
|
+
|
|
1655
|
+
@property
|
|
1656
|
+
def edges_header(self) -> Sequence[str]:
|
|
1657
|
+
"""Header for the edges dataframe."""
|
|
1658
|
+
return [":START_ID", ":TYPE", ":END_ID"]
|
|
1659
|
+
|
|
1179
1660
|
def iterate_relations(
|
|
1180
1661
|
self, *, use_tqdm: bool = False
|
|
1181
|
-
) -> Iterable[tuple[
|
|
1182
|
-
"""Iterate over tuples of terms, relations, and their targets.
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1662
|
+
) -> Iterable[tuple[Stanza, TypeDef, Reference]]:
|
|
1663
|
+
"""Iterate over tuples of terms, relations, and their targets.
|
|
1664
|
+
|
|
1665
|
+
This only outputs stuff from the `relationship:` tag, not
|
|
1666
|
+
all possible triples. For that, see :func:`iterate_edges`.
|
|
1667
|
+
"""
|
|
1668
|
+
_warned: set[ReferenceTuple] = set()
|
|
1669
|
+
typedefs = self._index_typedefs()
|
|
1670
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm, desc=f"[{self.ontology}] relation"):
|
|
1671
|
+
for predicate, reference in stanza.iterate_relations():
|
|
1672
|
+
if td := self._get_typedef(stanza, predicate, _warned, typedefs):
|
|
1673
|
+
yield stanza, td, reference
|
|
1674
|
+
|
|
1675
|
+
def get_edges_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
|
|
1676
|
+
"""Get an edges dataframe."""
|
|
1677
|
+
return pd.DataFrame(self.iterate_edge_rows(use_tqdm=use_tqdm), columns=self.edges_header)
|
|
1678
|
+
|
|
1679
|
+
def iterate_edge_rows(self, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
|
|
1680
|
+
"""Iterate the edge rows."""
|
|
1681
|
+
for term, typedef, reference in self.iterate_edges(use_tqdm=use_tqdm):
|
|
1682
|
+
yield term.curie, typedef.curie, reference.curie
|
|
1683
|
+
|
|
1684
|
+
def _get_typedef(
|
|
1685
|
+
self,
|
|
1686
|
+
term: Stanza,
|
|
1687
|
+
predicate: Reference,
|
|
1688
|
+
_warned: set[ReferenceTuple],
|
|
1689
|
+
typedefs: Mapping[ReferenceTuple, TypeDef],
|
|
1690
|
+
) -> TypeDef | None:
|
|
1691
|
+
pp = predicate.pair
|
|
1692
|
+
if pp in typedefs:
|
|
1693
|
+
return typedefs[pp]
|
|
1694
|
+
if pp not in _warned:
|
|
1695
|
+
_warn_string = f"[{term.curie}] undefined typedef: {pp}"
|
|
1696
|
+
if predicate.name:
|
|
1697
|
+
_warn_string += f" ({predicate.name})"
|
|
1698
|
+
logger.warning(_warn_string)
|
|
1699
|
+
_warned.add(pp)
|
|
1700
|
+
return None
|
|
1195
1701
|
|
|
1196
1702
|
def iter_relation_rows(
|
|
1197
1703
|
self, use_tqdm: bool = False
|
|
@@ -1208,14 +1714,14 @@ class Obo:
|
|
|
1208
1714
|
|
|
1209
1715
|
def iterate_filtered_relations(
|
|
1210
1716
|
self,
|
|
1211
|
-
relation:
|
|
1717
|
+
relation: ReferenceHint,
|
|
1212
1718
|
*,
|
|
1213
1719
|
use_tqdm: bool = False,
|
|
1214
|
-
) -> Iterable[tuple[
|
|
1720
|
+
) -> Iterable[tuple[Stanza, Reference]]:
|
|
1215
1721
|
"""Iterate over tuples of terms and ther targets for the given relation."""
|
|
1216
|
-
|
|
1217
|
-
for term,
|
|
1218
|
-
if
|
|
1722
|
+
_pair = _ensure_ref(relation, ontology_prefix=self.ontology).pair
|
|
1723
|
+
for term, predicate, reference in self.iterate_relations(use_tqdm=use_tqdm):
|
|
1724
|
+
if _pair == predicate.pair:
|
|
1219
1725
|
yield term, reference
|
|
1220
1726
|
|
|
1221
1727
|
@property
|
|
@@ -1232,7 +1738,7 @@ class Obo:
|
|
|
1232
1738
|
|
|
1233
1739
|
def get_filtered_relations_df(
|
|
1234
1740
|
self,
|
|
1235
|
-
relation:
|
|
1741
|
+
relation: ReferenceHint,
|
|
1236
1742
|
*,
|
|
1237
1743
|
use_tqdm: bool = False,
|
|
1238
1744
|
) -> pd.DataFrame:
|
|
@@ -1247,11 +1753,11 @@ class Obo:
|
|
|
1247
1753
|
|
|
1248
1754
|
def iterate_filtered_relations_filtered_targets(
|
|
1249
1755
|
self,
|
|
1250
|
-
relation:
|
|
1756
|
+
relation: ReferenceHint,
|
|
1251
1757
|
target_prefix: str,
|
|
1252
1758
|
*,
|
|
1253
1759
|
use_tqdm: bool = False,
|
|
1254
|
-
) -> Iterable[tuple[
|
|
1760
|
+
) -> Iterable[tuple[Stanza, Reference]]:
|
|
1255
1761
|
"""Iterate over relationships between one identifier and another."""
|
|
1256
1762
|
for term, reference in self.iterate_filtered_relations(
|
|
1257
1763
|
relation=relation, use_tqdm=use_tqdm
|
|
@@ -1261,7 +1767,7 @@ class Obo:
|
|
|
1261
1767
|
|
|
1262
1768
|
def get_relation_mapping(
|
|
1263
1769
|
self,
|
|
1264
|
-
relation:
|
|
1770
|
+
relation: ReferenceHint,
|
|
1265
1771
|
target_prefix: str,
|
|
1266
1772
|
*,
|
|
1267
1773
|
use_tqdm: bool = False,
|
|
@@ -1272,8 +1778,8 @@ class Obo:
|
|
|
1272
1778
|
|
|
1273
1779
|
Example usage: get homology between HGNC and MGI:
|
|
1274
1780
|
|
|
1275
|
-
>>> from pyobo.sources.hgnc import
|
|
1276
|
-
>>> obo =
|
|
1781
|
+
>>> from pyobo.sources.hgnc import HGNCGetter
|
|
1782
|
+
>>> obo = HGNCGetter()
|
|
1277
1783
|
>>> human_mapt_hgnc_id = "6893"
|
|
1278
1784
|
>>> mouse_mapt_mgi_id = "97180"
|
|
1279
1785
|
>>> hgnc_mgi_orthology_mapping = obo.get_relation_mapping("ro:HOM0000017", "mgi")
|
|
@@ -1291,15 +1797,15 @@ class Obo:
|
|
|
1291
1797
|
def get_relation(
|
|
1292
1798
|
self,
|
|
1293
1799
|
source_identifier: str,
|
|
1294
|
-
relation:
|
|
1800
|
+
relation: ReferenceHint,
|
|
1295
1801
|
target_prefix: str,
|
|
1296
1802
|
*,
|
|
1297
1803
|
use_tqdm: bool = False,
|
|
1298
|
-
) ->
|
|
1804
|
+
) -> str | None:
|
|
1299
1805
|
"""Get the value for a bijective relation mapping between this resource and a target resource.
|
|
1300
1806
|
|
|
1301
|
-
>>> from pyobo.sources.hgnc import
|
|
1302
|
-
>>> obo =
|
|
1807
|
+
>>> from pyobo.sources.hgnc import HGNCGetter
|
|
1808
|
+
>>> obo = HGNCGetter()
|
|
1303
1809
|
>>> human_mapt_hgnc_id = "6893"
|
|
1304
1810
|
>>> mouse_mapt_mgi_id = "97180"
|
|
1305
1811
|
>>> assert mouse_mapt_mgi_id == obo.get_relation(human_mapt_hgnc_id, "ro:HOM0000017", "mgi")
|
|
@@ -1311,7 +1817,7 @@ class Obo:
|
|
|
1311
1817
|
|
|
1312
1818
|
def get_relation_multimapping(
|
|
1313
1819
|
self,
|
|
1314
|
-
relation:
|
|
1820
|
+
relation: ReferenceHint,
|
|
1315
1821
|
target_prefix: str,
|
|
1316
1822
|
*,
|
|
1317
1823
|
use_tqdm: bool = False,
|
|
@@ -1334,22 +1840,24 @@ class Obo:
|
|
|
1334
1840
|
) -> Mapping[str, list[Reference]]:
|
|
1335
1841
|
"""Get a mapping from identifiers to a list of all references for the given relation."""
|
|
1336
1842
|
return multidict(
|
|
1337
|
-
(
|
|
1338
|
-
for
|
|
1843
|
+
(stanza.identifier, reference)
|
|
1844
|
+
for stanza in self._iter_stanzas(
|
|
1339
1845
|
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting {typedef.curie}"
|
|
1340
1846
|
)
|
|
1341
|
-
for reference in
|
|
1847
|
+
for reference in stanza.get_relationships(typedef)
|
|
1342
1848
|
)
|
|
1343
1849
|
|
|
1344
1850
|
############
|
|
1345
1851
|
# SYNONYMS #
|
|
1346
1852
|
############
|
|
1347
1853
|
|
|
1348
|
-
def iterate_synonyms(self, *, use_tqdm: bool = False) -> Iterable[tuple[
|
|
1854
|
+
def iterate_synonyms(self, *, use_tqdm: bool = False) -> Iterable[tuple[Stanza, Synonym]]:
|
|
1349
1855
|
"""Iterate over pairs of term and synonym object."""
|
|
1350
|
-
for
|
|
1351
|
-
|
|
1352
|
-
|
|
1856
|
+
for stanza in self._iter_stanzas(
|
|
1857
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting synonyms"
|
|
1858
|
+
):
|
|
1859
|
+
for synonym in sorted(stanza.synonyms):
|
|
1860
|
+
yield stanza, synonym
|
|
1353
1861
|
|
|
1354
1862
|
def iterate_synonym_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
|
|
1355
1863
|
"""Iterate over pairs of identifier and synonym text."""
|
|
@@ -1360,40 +1868,95 @@ class Obo:
|
|
|
1360
1868
|
"""Get a mapping from identifiers to a list of sorted synonym strings."""
|
|
1361
1869
|
return multidict(self.iterate_synonym_rows(use_tqdm=use_tqdm))
|
|
1362
1870
|
|
|
1871
|
+
def get_literal_mappings(self) -> Iterable[ssslm.LiteralMapping]:
|
|
1872
|
+
"""Get literal mappings in a standard data model."""
|
|
1873
|
+
stanzas: Iterable[Stanza] = itt.chain(self, self.typedefs or [])
|
|
1874
|
+
yield from itt.chain.from_iterable(
|
|
1875
|
+
stanza.get_literal_mappings()
|
|
1876
|
+
for stanza in stanzas
|
|
1877
|
+
if self._in_ontology(stanza.reference)
|
|
1878
|
+
)
|
|
1879
|
+
|
|
1880
|
+
def _in_ontology(self, reference: Reference | Referenced) -> bool:
|
|
1881
|
+
return self._in_ontology_strict(reference) or self._in_ontology_aux(reference)
|
|
1882
|
+
|
|
1883
|
+
def _in_ontology_strict(self, reference: Reference | Referenced) -> bool:
|
|
1884
|
+
return reference.prefix == self.ontology
|
|
1885
|
+
|
|
1886
|
+
def _in_ontology_aux(self, reference: Reference | Referenced) -> bool:
|
|
1887
|
+
return reference.prefix == "obo" and reference.identifier.startswith(self.ontology + "#")
|
|
1888
|
+
|
|
1363
1889
|
#########
|
|
1364
1890
|
# XREFS #
|
|
1365
1891
|
#########
|
|
1366
1892
|
|
|
1367
|
-
def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[tuple[
|
|
1893
|
+
def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[tuple[Stanza, Reference]]:
|
|
1368
1894
|
"""Iterate over xrefs."""
|
|
1369
|
-
for
|
|
1370
|
-
|
|
1371
|
-
|
|
1895
|
+
for stanza in self._iter_stanzas(
|
|
1896
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting xrefs"
|
|
1897
|
+
):
|
|
1898
|
+
xrefs = {xref for _, xref in stanza.get_mappings(add_context=False)}
|
|
1899
|
+
for xref in sorted(xrefs):
|
|
1900
|
+
yield stanza, xref
|
|
1372
1901
|
|
|
1373
1902
|
def iterate_filtered_xrefs(
|
|
1374
1903
|
self, prefix: str, *, use_tqdm: bool = False
|
|
1375
|
-
) -> Iterable[tuple[
|
|
1904
|
+
) -> Iterable[tuple[Stanza, Reference]]:
|
|
1376
1905
|
"""Iterate over xrefs to a given prefix."""
|
|
1377
1906
|
for term, xref in self.iterate_xrefs(use_tqdm=use_tqdm):
|
|
1378
1907
|
if xref.prefix == prefix:
|
|
1379
1908
|
yield term, xref
|
|
1380
1909
|
|
|
1381
|
-
def
|
|
1382
|
-
"""Iterate over
|
|
1383
|
-
for
|
|
1384
|
-
yield
|
|
1910
|
+
def iterate_literal_mapping_rows(self) -> Iterable[ssslm.LiteralMappingTuple]:
|
|
1911
|
+
"""Iterate over literal mapping rows."""
|
|
1912
|
+
for synonym in self.get_literal_mappings():
|
|
1913
|
+
yield synonym._as_row()
|
|
1385
1914
|
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
return [f"{self.ontology}_id", TARGET_PREFIX, TARGET_ID]
|
|
1915
|
+
def get_literal_mappings_df(self) -> pd.DataFrame:
|
|
1916
|
+
"""Get a literal mappings dataframe."""
|
|
1917
|
+
return ssslm.literal_mappings_to_df(self.get_literal_mappings())
|
|
1390
1918
|
|
|
1391
|
-
def
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1919
|
+
def iterate_mapping_rows(
|
|
1920
|
+
self, *, use_tqdm: bool = False
|
|
1921
|
+
) -> Iterable[tuple[str, str, str, str, str, float | None, str | None]]:
|
|
1922
|
+
"""Iterate over SSSOM rows for mappings."""
|
|
1923
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
|
|
1924
|
+
for predicate, obj_ref, context in stanza.get_mappings(
|
|
1925
|
+
include_xrefs=True, add_context=True
|
|
1926
|
+
):
|
|
1927
|
+
yield (
|
|
1928
|
+
get_preferred_curie(stanza),
|
|
1929
|
+
stanza.name,
|
|
1930
|
+
get_preferred_curie(obj_ref),
|
|
1931
|
+
get_preferred_curie(predicate),
|
|
1932
|
+
get_preferred_curie(context.justification),
|
|
1933
|
+
context.confidence if context.confidence is not None else None,
|
|
1934
|
+
get_preferred_curie(context.contributor) if context.contributor else None,
|
|
1935
|
+
)
|
|
1936
|
+
|
|
1937
|
+
def get_mappings_df(
|
|
1938
|
+
self,
|
|
1939
|
+
*,
|
|
1940
|
+
use_tqdm: bool = False,
|
|
1941
|
+
include_subject_labels: bool = False,
|
|
1942
|
+
include_mapping_source_column: bool = False,
|
|
1943
|
+
) -> pd.DataFrame:
|
|
1944
|
+
"""Get a dataframe with SSSOM extracted from the OBO document."""
|
|
1945
|
+
df = pd.DataFrame(self.iterate_mapping_rows(use_tqdm=use_tqdm), columns=SSSOM_DF_COLUMNS)
|
|
1946
|
+
if not include_subject_labels:
|
|
1947
|
+
del df["subject_label"]
|
|
1948
|
+
|
|
1949
|
+
# if no confidences/contributor, remove that column
|
|
1950
|
+
for c in ["confidence", "contributor"]:
|
|
1951
|
+
if df[c].isna().all():
|
|
1952
|
+
del df[c]
|
|
1953
|
+
|
|
1954
|
+
# append on the mapping_source
|
|
1955
|
+
# (https://mapping-commons.github.io/sssom/mapping_source/)
|
|
1956
|
+
if include_mapping_source_column:
|
|
1957
|
+
df["mapping_source"] = self.ontology
|
|
1958
|
+
|
|
1959
|
+
return df
|
|
1397
1960
|
|
|
1398
1961
|
def get_filtered_xrefs_mapping(
|
|
1399
1962
|
self, prefix: str, *, use_tqdm: bool = False
|
|
@@ -1417,11 +1980,12 @@ class Obo:
|
|
|
1417
1980
|
# ALTS #
|
|
1418
1981
|
########
|
|
1419
1982
|
|
|
1420
|
-
def iterate_alts(self) -> Iterable[tuple[
|
|
1983
|
+
def iterate_alts(self) -> Iterable[tuple[Stanza, Reference]]:
|
|
1421
1984
|
"""Iterate over alternative identifiers."""
|
|
1422
|
-
for
|
|
1423
|
-
|
|
1424
|
-
|
|
1985
|
+
for stanza in self._iter_stanzas():
|
|
1986
|
+
if self._in_ontology(stanza):
|
|
1987
|
+
for alt in stanza.alt_ids:
|
|
1988
|
+
yield stanza, alt
|
|
1425
1989
|
|
|
1426
1990
|
def iterate_alt_rows(self) -> Iterable[tuple[str, str]]:
|
|
1427
1991
|
"""Iterate over pairs of terms' primary identifiers and alternate identifiers."""
|
|
@@ -1433,33 +1997,315 @@ class Obo:
|
|
|
1433
1997
|
return multidict((term.identifier, alt.identifier) for term, alt in self.iterate_alts())
|
|
1434
1998
|
|
|
1435
1999
|
|
|
2000
|
+
@dataclass
|
|
2001
|
+
class TypeDef(Stanza):
|
|
2002
|
+
"""A type definition in OBO.
|
|
2003
|
+
|
|
2004
|
+
See the subsection of https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.2.2.
|
|
2005
|
+
"""
|
|
2006
|
+
|
|
2007
|
+
reference: Annotated[Reference, 1]
|
|
2008
|
+
is_anonymous: Annotated[bool | None, 2] = None
|
|
2009
|
+
# 3 - name is covered by reference
|
|
2010
|
+
namespace: Annotated[str | None, 4] = None
|
|
2011
|
+
# 5 alt_id is part of proerties
|
|
2012
|
+
definition: Annotated[str | None, 6] = None
|
|
2013
|
+
comment: Annotated[str | None, 7] = None
|
|
2014
|
+
subsets: Annotated[list[Reference], 8] = field(default_factory=list)
|
|
2015
|
+
synonyms: Annotated[list[Synonym], 9] = field(default_factory=list)
|
|
2016
|
+
xrefs: Annotated[list[Reference], 10] = field(default_factory=list)
|
|
2017
|
+
_axioms: AnnotationsDict = field(default_factory=lambda: defaultdict(list))
|
|
2018
|
+
properties: Annotated[PropertiesHint, 11] = field(default_factory=lambda: defaultdict(list))
|
|
2019
|
+
domain: Annotated[Reference | None, 12, "typedef-only"] = None
|
|
2020
|
+
range: Annotated[Reference | None, 13, "typedef-only"] = None
|
|
2021
|
+
builtin: Annotated[bool | None, 14] = None
|
|
2022
|
+
holds_over_chain: Annotated[list[list[Reference]], 15, "typedef-only"] = field(
|
|
2023
|
+
default_factory=list
|
|
2024
|
+
)
|
|
2025
|
+
is_anti_symmetric: Annotated[bool | None, 16, "typedef-only"] = None
|
|
2026
|
+
is_cyclic: Annotated[bool | None, 17, "typedef-only"] = None
|
|
2027
|
+
is_reflexive: Annotated[bool | None, 18, "typedef-only"] = None
|
|
2028
|
+
is_symmetric: Annotated[bool | None, 19, "typedef-only"] = None
|
|
2029
|
+
is_transitive: Annotated[bool | None, 20, "typedef-only"] = None
|
|
2030
|
+
is_functional: Annotated[bool | None, 21, "typedef-only"] = None
|
|
2031
|
+
is_inverse_functional: Annotated[bool | None, 22, "typedef-only"] = None
|
|
2032
|
+
parents: Annotated[list[Reference], 23] = field(default_factory=list)
|
|
2033
|
+
intersection_of: Annotated[IntersectionOfHint, 24] = field(default_factory=list)
|
|
2034
|
+
union_of: Annotated[list[Reference], 25] = field(default_factory=list)
|
|
2035
|
+
equivalent_to: Annotated[list[Reference], 26] = field(default_factory=list)
|
|
2036
|
+
disjoint_from: Annotated[list[Reference], 27] = field(default_factory=list)
|
|
2037
|
+
# TODO inverse should be inverse_of, cardinality any
|
|
2038
|
+
inverse: Annotated[Reference | None, 28, "typedef-only"] = None
|
|
2039
|
+
# TODO check if there are any examples of this being multiple
|
|
2040
|
+
transitive_over: Annotated[list[Reference], 29, "typedef-only"] = field(default_factory=list)
|
|
2041
|
+
equivalent_to_chain: Annotated[list[list[Reference]], 30, "typedef-only"] = field(
|
|
2042
|
+
default_factory=list
|
|
2043
|
+
)
|
|
2044
|
+
#: From the OBO spec:
|
|
2045
|
+
#:
|
|
2046
|
+
#: For example: spatially_disconnected_from is disjoint_over part_of, in that two
|
|
2047
|
+
#: disconnected entities have no parts in common. This can be translated to OWL as:
|
|
2048
|
+
#: ``disjoint_over(R S), R(A B) ==> (S some A) disjointFrom (S some B)``
|
|
2049
|
+
disjoint_over: Annotated[list[Reference], 31] = field(default_factory=list)
|
|
2050
|
+
relationships: Annotated[RelationsHint, 32] = field(default_factory=lambda: defaultdict(list))
|
|
2051
|
+
is_obsolete: Annotated[bool | None, 33] = None
|
|
2052
|
+
created_by: Annotated[str | None, 34] = None
|
|
2053
|
+
creation_date: Annotated[datetime.datetime | None, 35] = None
|
|
2054
|
+
# TODO expand_assertion_to
|
|
2055
|
+
# TODO expand_expression_to
|
|
2056
|
+
#: Whether this relationship is a metadata tag. Properties that are marked as metadata tags are
|
|
2057
|
+
#: used to record object metadata. Object metadata is additional information about an object
|
|
2058
|
+
#: that is useful to track, but does not impact the definition of the object or how it should
|
|
2059
|
+
#: be treated by a reasoner. Metadata tags might be used to record special term synonyms or
|
|
2060
|
+
#: structured notes about a term, for example.
|
|
2061
|
+
is_metadata_tag: Annotated[bool | None, 40, "typedef-only"] = None
|
|
2062
|
+
is_class_level: Annotated[bool | None, 41] = None
|
|
2063
|
+
|
|
2064
|
+
type: StanzaType = "TypeDef"
|
|
2065
|
+
|
|
2066
|
+
def __hash__(self) -> int:
|
|
2067
|
+
# have to re-define hash because of the @dataclass
|
|
2068
|
+
return hash((self.__class__, self.prefix, self.identifier))
|
|
2069
|
+
|
|
2070
|
+
def _get_references(self) -> dict[str, set[Reference]]:
|
|
2071
|
+
rv = super()._get_references()
|
|
2072
|
+
|
|
2073
|
+
def _add(r: Reference) -> None:
|
|
2074
|
+
rv[r.prefix].add(r)
|
|
2075
|
+
|
|
2076
|
+
if self.domain:
|
|
2077
|
+
_add(self.domain)
|
|
2078
|
+
if self.range:
|
|
2079
|
+
_add(self.range)
|
|
2080
|
+
if self.inverse:
|
|
2081
|
+
_add(self.inverse)
|
|
2082
|
+
|
|
2083
|
+
# TODO all of the properties, which are from oboInOwl
|
|
2084
|
+
for rr in itt.chain(self.transitive_over, self.disjoint_over):
|
|
2085
|
+
_add(rr)
|
|
2086
|
+
for part in itt.chain(self.holds_over_chain, self.equivalent_to_chain):
|
|
2087
|
+
for rr in part:
|
|
2088
|
+
_add(rr)
|
|
2089
|
+
return dict(rv)
|
|
2090
|
+
|
|
2091
|
+
def iterate_obo_lines(
|
|
2092
|
+
self,
|
|
2093
|
+
ontology_prefix: str,
|
|
2094
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
|
|
2095
|
+
typedefs: Mapping[ReferenceTuple, TypeDef] | None = None,
|
|
2096
|
+
) -> Iterable[str]:
|
|
2097
|
+
"""Iterate over the lines to write in an OBO file.
|
|
2098
|
+
|
|
2099
|
+
:param ontology_prefix:
|
|
2100
|
+
The prefix of the ontology into which the type definition is being written.
|
|
2101
|
+
This is used for compressing builtin identifiers
|
|
2102
|
+
:yield:
|
|
2103
|
+
The lines to write to an OBO file
|
|
2104
|
+
|
|
2105
|
+
`S.3.5.5 <https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.3.5.5>`_
|
|
2106
|
+
of the OBO Flat File Specification v1.4 says tags should appear in the following order:
|
|
2107
|
+
|
|
2108
|
+
1. id
|
|
2109
|
+
2. is_anonymous
|
|
2110
|
+
3. name
|
|
2111
|
+
4. namespace
|
|
2112
|
+
5. alt_id
|
|
2113
|
+
6. def
|
|
2114
|
+
7. comment
|
|
2115
|
+
8. subset
|
|
2116
|
+
9. synonym
|
|
2117
|
+
10. xref
|
|
2118
|
+
11. property_value
|
|
2119
|
+
12. domain
|
|
2120
|
+
13. range
|
|
2121
|
+
14. builtin
|
|
2122
|
+
15. holds_over_chain
|
|
2123
|
+
16. is_anti_symmetric
|
|
2124
|
+
17. is_cyclic
|
|
2125
|
+
18. is_reflexive
|
|
2126
|
+
19. is_symmetric
|
|
2127
|
+
20. is_transitive
|
|
2128
|
+
21. is_functional
|
|
2129
|
+
22. is_inverse_functional
|
|
2130
|
+
23. is_a
|
|
2131
|
+
24. intersection_of
|
|
2132
|
+
25. union_of
|
|
2133
|
+
26. equivalent_to
|
|
2134
|
+
27. disjoint_from
|
|
2135
|
+
28. inverse_of
|
|
2136
|
+
29. transitive_over
|
|
2137
|
+
30. equivalent_to_chain
|
|
2138
|
+
31. disjoint_over
|
|
2139
|
+
32. relationship
|
|
2140
|
+
33. is_obsolete
|
|
2141
|
+
34. created_by
|
|
2142
|
+
35. creation_date
|
|
2143
|
+
36. replaced_by
|
|
2144
|
+
37. consider
|
|
2145
|
+
38. expand_assertion_to
|
|
2146
|
+
39. expand_expression_to
|
|
2147
|
+
40. is_metadata_tag
|
|
2148
|
+
41. is_class_level
|
|
2149
|
+
"""
|
|
2150
|
+
if synonym_typedefs is None:
|
|
2151
|
+
synonym_typedefs = {}
|
|
2152
|
+
if typedefs is None:
|
|
2153
|
+
typedefs = {}
|
|
2154
|
+
|
|
2155
|
+
yield "\n[Typedef]"
|
|
2156
|
+
# 1
|
|
2157
|
+
yield f"id: {reference_escape(self.reference, ontology_prefix=ontology_prefix)}"
|
|
2158
|
+
# 2
|
|
2159
|
+
yield from _boolean_tag("is_anonymous", self.is_anonymous)
|
|
2160
|
+
# 3
|
|
2161
|
+
if self.name:
|
|
2162
|
+
yield f"name: {self.name}"
|
|
2163
|
+
# 4
|
|
2164
|
+
if self.namespace:
|
|
2165
|
+
yield f"namespace: {self.namespace}"
|
|
2166
|
+
# 5
|
|
2167
|
+
yield from _reference_list_tag("alt_id", self.alt_ids, ontology_prefix)
|
|
2168
|
+
# 6
|
|
2169
|
+
if self.definition:
|
|
2170
|
+
yield f"def: {self._definition_fp()}"
|
|
2171
|
+
# 7
|
|
2172
|
+
if self.comment:
|
|
2173
|
+
yield f"comment: {self.comment}"
|
|
2174
|
+
# 8
|
|
2175
|
+
yield from _reference_list_tag("subset", self.subsets, ontology_prefix)
|
|
2176
|
+
# 9
|
|
2177
|
+
for synonym in self.synonyms:
|
|
2178
|
+
yield synonym.to_obo(ontology_prefix=ontology_prefix, synonym_typedefs=synonym_typedefs)
|
|
2179
|
+
# 10
|
|
2180
|
+
yield from self._iterate_xref_obo(ontology_prefix=ontology_prefix)
|
|
2181
|
+
# 11
|
|
2182
|
+
yield from self._iterate_obo_properties(
|
|
2183
|
+
ontology_prefix=ontology_prefix,
|
|
2184
|
+
skip_predicate_objects=v.SKIP_PROPERTY_PREDICATES_OBJECTS,
|
|
2185
|
+
skip_predicate_literals=v.SKIP_PROPERTY_PREDICATES_LITERAL,
|
|
2186
|
+
typedefs=typedefs,
|
|
2187
|
+
)
|
|
2188
|
+
# 12
|
|
2189
|
+
if self.domain:
|
|
2190
|
+
yield f"domain: {reference_escape(self.domain, ontology_prefix=ontology_prefix, add_name_comment=True)}"
|
|
2191
|
+
# 13
|
|
2192
|
+
if self.range:
|
|
2193
|
+
yield f"range: {reference_escape(self.range, ontology_prefix=ontology_prefix, add_name_comment=True)}"
|
|
2194
|
+
# 14
|
|
2195
|
+
yield from _boolean_tag("builtin", self.builtin)
|
|
2196
|
+
# 15
|
|
2197
|
+
yield from _chain_tag("holds_over_chain", self.holds_over_chain, ontology_prefix)
|
|
2198
|
+
# 16
|
|
2199
|
+
yield from _boolean_tag("is_anti_symmetric", self.is_anti_symmetric)
|
|
2200
|
+
# 17
|
|
2201
|
+
yield from _boolean_tag("is_cyclic", self.is_cyclic)
|
|
2202
|
+
# 18
|
|
2203
|
+
yield from _boolean_tag("is_reflexive", self.is_reflexive)
|
|
2204
|
+
# 19
|
|
2205
|
+
yield from _boolean_tag("is_symmetric", self.is_symmetric)
|
|
2206
|
+
# 20
|
|
2207
|
+
yield from _boolean_tag("is_transitive", self.is_transitive)
|
|
2208
|
+
# 21
|
|
2209
|
+
yield from _boolean_tag("is_functional", self.is_functional)
|
|
2210
|
+
# 22
|
|
2211
|
+
yield from _boolean_tag("is_inverse_functional", self.is_inverse_functional)
|
|
2212
|
+
# 23
|
|
2213
|
+
yield from _reference_list_tag("is_a", self.parents, ontology_prefix)
|
|
2214
|
+
# 24
|
|
2215
|
+
yield from self._iterate_intersection_of_obo(ontology_prefix=ontology_prefix)
|
|
2216
|
+
# 25
|
|
2217
|
+
yield from _reference_list_tag("union_of", self.union_of, ontology_prefix)
|
|
2218
|
+
# 26
|
|
2219
|
+
yield from _reference_list_tag("equivalent_to", self.equivalent_to, ontology_prefix)
|
|
2220
|
+
# 27
|
|
2221
|
+
yield from _reference_list_tag("disjoint_from", self.disjoint_from, ontology_prefix)
|
|
2222
|
+
# 28
|
|
2223
|
+
if self.inverse:
|
|
2224
|
+
yield f"inverse_of: {reference_escape(self.inverse, ontology_prefix=ontology_prefix, add_name_comment=True)}"
|
|
2225
|
+
# 29
|
|
2226
|
+
yield from _reference_list_tag("transitive_over", self.transitive_over, ontology_prefix)
|
|
2227
|
+
# 30
|
|
2228
|
+
yield from _chain_tag("equivalent_to_chain", self.equivalent_to_chain, ontology_prefix)
|
|
2229
|
+
# 31 disjoint_over, see https://github.com/search?q=%22disjoint_over%3A%22+path%3A*.obo&type=code
|
|
2230
|
+
yield from _reference_list_tag(
|
|
2231
|
+
"disjoint_over", self.disjoint_over, ontology_prefix=ontology_prefix
|
|
2232
|
+
)
|
|
2233
|
+
# 32
|
|
2234
|
+
yield from self._iterate_obo_relations(ontology_prefix=ontology_prefix, typedefs=typedefs)
|
|
2235
|
+
# 33
|
|
2236
|
+
yield from _boolean_tag("is_obsolete", self.is_obsolete)
|
|
2237
|
+
# 34
|
|
2238
|
+
if self.created_by:
|
|
2239
|
+
yield f"created_by: {self.created_by}"
|
|
2240
|
+
# 35
|
|
2241
|
+
if self.creation_date is not None:
|
|
2242
|
+
yield f"creation_date: {self.creation_date.isoformat()}"
|
|
2243
|
+
# 36
|
|
2244
|
+
yield from _tag_property_targets(
|
|
2245
|
+
"replaced_by", self, v.term_replaced_by, ontology_prefix=ontology_prefix
|
|
2246
|
+
)
|
|
2247
|
+
# 37
|
|
2248
|
+
yield from _tag_property_targets(
|
|
2249
|
+
"consider", self, v.see_also, ontology_prefix=ontology_prefix
|
|
2250
|
+
)
|
|
2251
|
+
# 38 TODO expand_assertion_to
|
|
2252
|
+
# 39 TODO expand_expression_to
|
|
2253
|
+
# 40
|
|
2254
|
+
yield from _boolean_tag("is_metadata_tag", self.is_metadata_tag)
|
|
2255
|
+
# 41
|
|
2256
|
+
yield from _boolean_tag("is_class_level", self.is_class_level)
|
|
2257
|
+
|
|
2258
|
+
@classmethod
|
|
2259
|
+
def from_triple(cls, prefix: str, identifier: str, name: str | None = None) -> TypeDef:
|
|
2260
|
+
"""Create a typedef from a reference."""
|
|
2261
|
+
return cls(reference=Reference(prefix=prefix, identifier=identifier, name=name))
|
|
2262
|
+
|
|
2263
|
+
@classmethod
|
|
2264
|
+
def default(
|
|
2265
|
+
cls, prefix: str, identifier: str, *, name: str | None = None, is_metadata_tag: bool
|
|
2266
|
+
) -> Self:
|
|
2267
|
+
"""Construct a default type definition from within the OBO namespace."""
|
|
2268
|
+
return cls(
|
|
2269
|
+
reference=default_reference(prefix, identifier, name=name),
|
|
2270
|
+
is_metadata_tag=is_metadata_tag,
|
|
2271
|
+
)
|
|
2272
|
+
|
|
2273
|
+
|
|
2274
|
+
class AdHocOntologyBase(Obo):
|
|
2275
|
+
"""A base class for ad-hoc ontologies."""
|
|
2276
|
+
|
|
2277
|
+
|
|
1436
2278
|
def make_ad_hoc_ontology(
|
|
1437
2279
|
_ontology: str,
|
|
1438
|
-
_name: str,
|
|
1439
|
-
_auto_generated_by:
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
2280
|
+
_name: str | None = None,
|
|
2281
|
+
_auto_generated_by: str | None = None,
|
|
2282
|
+
_typedefs: list[TypeDef] | None = None,
|
|
2283
|
+
_synonym_typedefs: list[SynonymTypeDef] | None = None,
|
|
2284
|
+
_date: datetime.datetime | None = None,
|
|
2285
|
+
_data_version: str | None = None,
|
|
2286
|
+
_idspaces: Mapping[str, str] | None = None,
|
|
2287
|
+
_root_terms: list[Reference] | None = None,
|
|
2288
|
+
_subsetdefs: list[tuple[Reference, str]] | None = None,
|
|
2289
|
+
_property_values: list[Annotation] | None = None,
|
|
2290
|
+
_imports: list[str] | None = None,
|
|
1447
2291
|
*,
|
|
1448
|
-
terms: list[Term],
|
|
1449
|
-
) ->
|
|
2292
|
+
terms: list[Term] | None = None,
|
|
2293
|
+
) -> Obo:
|
|
1450
2294
|
"""Make an ad-hoc ontology."""
|
|
1451
2295
|
|
|
1452
|
-
class AdHocOntology(
|
|
2296
|
+
class AdHocOntology(AdHocOntologyBase):
|
|
1453
2297
|
"""An ad hoc ontology created from an OBO file."""
|
|
1454
2298
|
|
|
1455
2299
|
ontology = _ontology
|
|
1456
2300
|
name = _name
|
|
1457
2301
|
auto_generated_by = _auto_generated_by
|
|
1458
|
-
format_version = _format_version
|
|
1459
2302
|
typedefs = _typedefs
|
|
1460
2303
|
synonym_typedefs = _synonym_typedefs
|
|
1461
2304
|
idspaces = _idspaces
|
|
1462
2305
|
root_terms = _root_terms
|
|
2306
|
+
subsetdefs = _subsetdefs
|
|
2307
|
+
property_values = _property_values
|
|
2308
|
+
imports = _imports
|
|
1463
2309
|
|
|
1464
2310
|
def __post_init__(self):
|
|
1465
2311
|
self.date = _date
|
|
@@ -1467,30 +2313,11 @@ def make_ad_hoc_ontology(
|
|
|
1467
2313
|
|
|
1468
2314
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
1469
2315
|
"""Iterate over terms in the ad hoc ontology."""
|
|
1470
|
-
return terms
|
|
2316
|
+
return terms or []
|
|
1471
2317
|
|
|
1472
2318
|
return AdHocOntology()
|
|
1473
2319
|
|
|
1474
2320
|
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
return []
|
|
1479
|
-
return [_convert_typedef(typedef) for typedef in typedefs]
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
def _convert_typedef(typedef: TypeDef) -> Mapping[str, Any]:
|
|
1483
|
-
"""Convert a type def."""
|
|
1484
|
-
# TODO add more later
|
|
1485
|
-
return typedef.reference.model_dump()
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
def _convert_synonym_typedefs(synonym_typedefs: Optional[Iterable[SynonymTypeDef]]) -> list[str]:
|
|
1489
|
-
"""Convert the synonym type defs."""
|
|
1490
|
-
if not synonym_typedefs:
|
|
1491
|
-
return []
|
|
1492
|
-
return [_convert_synonym_typedef(synonym_typedef) for synonym_typedef in synonym_typedefs]
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
def _convert_synonym_typedef(synonym_typedef: SynonymTypeDef) -> str:
|
|
1496
|
-
return f'{synonym_typedef.preferred_curie} "{synonym_typedef.name}"'
|
|
2321
|
+
HUMAN_TERM = Term(reference=v.HUMAN)
|
|
2322
|
+
CHARLIE_TERM = Term(reference=v.CHARLIE, type="Instance").append_parent(HUMAN_TERM)
|
|
2323
|
+
PYOBO_INJECTED = "Injected by PyOBO"
|