pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/struct/struct.py
CHANGED
|
@@ -1,111 +1,209 @@
|
|
|
1
1
|
"""Data structures for OBO."""
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime
|
|
6
|
+
import itertools as itt
|
|
4
7
|
import json
|
|
5
8
|
import logging
|
|
6
9
|
import os
|
|
7
10
|
import sys
|
|
8
|
-
|
|
9
|
-
from collections
|
|
11
|
+
import warnings
|
|
12
|
+
from collections import ChainMap, defaultdict
|
|
13
|
+
from collections.abc import Callable, Collection, Iterable, Iterator, Mapping, Sequence
|
|
10
14
|
from dataclasses import dataclass, field
|
|
11
|
-
from datetime import datetime
|
|
12
|
-
from operator import attrgetter
|
|
13
15
|
from pathlib import Path
|
|
14
16
|
from textwrap import dedent
|
|
15
|
-
from typing import
|
|
16
|
-
Any,
|
|
17
|
-
Callable,
|
|
18
|
-
ClassVar,
|
|
19
|
-
Optional,
|
|
20
|
-
TextIO,
|
|
21
|
-
Union,
|
|
22
|
-
)
|
|
17
|
+
from typing import Annotated, Any, ClassVar, TextIO
|
|
23
18
|
|
|
24
19
|
import bioregistry
|
|
25
20
|
import click
|
|
21
|
+
import curies
|
|
26
22
|
import networkx as nx
|
|
27
23
|
import pandas as pd
|
|
24
|
+
import ssslm
|
|
25
|
+
from curies import ReferenceTuple
|
|
26
|
+
from curies import vocabulary as _cv
|
|
28
27
|
from more_click import force_option, verbose_option
|
|
29
28
|
from tqdm.auto import tqdm
|
|
30
|
-
from typing_extensions import
|
|
31
|
-
|
|
32
|
-
from .
|
|
33
|
-
from .
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
29
|
+
from typing_extensions import Self
|
|
30
|
+
|
|
31
|
+
from . import vocabulary as v
|
|
32
|
+
from .reference import (
|
|
33
|
+
OBOLiteral,
|
|
34
|
+
Reference,
|
|
35
|
+
Referenced,
|
|
36
|
+
_reference_list_tag,
|
|
37
|
+
comma_separate_references,
|
|
38
|
+
default_reference,
|
|
39
|
+
get_preferred_curie,
|
|
40
|
+
reference_escape,
|
|
41
|
+
reference_or_literal_to_str,
|
|
42
|
+
)
|
|
43
|
+
from .struct_utils import (
|
|
44
|
+
Annotation,
|
|
45
|
+
AnnotationsDict,
|
|
46
|
+
HasReferencesMixin,
|
|
47
|
+
IntersectionOfHint,
|
|
48
|
+
PropertiesHint,
|
|
49
|
+
ReferenceHint,
|
|
50
|
+
RelationsHint,
|
|
51
|
+
Stanza,
|
|
52
|
+
StanzaType,
|
|
53
|
+
UnionOfHint,
|
|
54
|
+
_chain_tag,
|
|
55
|
+
_ensure_ref,
|
|
56
|
+
_get_prefixes_from_annotations,
|
|
57
|
+
_get_references_from_annotations,
|
|
58
|
+
_tag_property_targets,
|
|
48
59
|
)
|
|
49
|
-
from .utils import
|
|
60
|
+
from .utils import _boolean_tag, obo_escape_slim
|
|
50
61
|
from ..api.utils import get_version
|
|
51
62
|
from ..constants import (
|
|
63
|
+
BUILD_SUBDIRECTORY_NAME,
|
|
52
64
|
DATE_FORMAT,
|
|
65
|
+
DEFAULT_PREFIX_MAP,
|
|
53
66
|
NCBITAXON_PREFIX,
|
|
54
67
|
RELATION_ID,
|
|
55
68
|
RELATION_PREFIX,
|
|
56
69
|
TARGET_ID,
|
|
57
70
|
TARGET_PREFIX,
|
|
58
71
|
)
|
|
59
|
-
from ..
|
|
72
|
+
from ..utils.cache import write_gzipped_graph
|
|
60
73
|
from ..utils.io import multidict, write_iterable_tsv
|
|
61
|
-
from ..utils.
|
|
62
|
-
|
|
74
|
+
from ..utils.path import (
|
|
75
|
+
CacheArtifact,
|
|
76
|
+
get_cache_path,
|
|
77
|
+
get_relation_cache_path,
|
|
78
|
+
prefix_directory_join,
|
|
79
|
+
)
|
|
80
|
+
from ..version import get_version as get_pyobo_version
|
|
63
81
|
|
|
64
82
|
__all__ = [
|
|
83
|
+
"Obo",
|
|
65
84
|
"Synonym",
|
|
66
85
|
"SynonymTypeDef",
|
|
67
|
-
"SynonymSpecificity",
|
|
68
|
-
"SynonymSpecificities",
|
|
69
86
|
"Term",
|
|
70
|
-
"Obo",
|
|
71
|
-
"make_ad_hoc_ontology",
|
|
72
87
|
"abbreviation",
|
|
73
88
|
"acronym",
|
|
89
|
+
"make_ad_hoc_ontology",
|
|
74
90
|
]
|
|
75
91
|
|
|
76
92
|
logger = logging.getLogger(__name__)
|
|
77
93
|
|
|
78
|
-
|
|
79
|
-
|
|
94
|
+
#: This is what happens if no specificity is given
|
|
95
|
+
DEFAULT_SPECIFICITY: _cv.SynonymScope = "RELATED"
|
|
96
|
+
|
|
97
|
+
#: Columns in the SSSOM dataframe
|
|
98
|
+
SSSOM_DF_COLUMNS = [
|
|
99
|
+
"subject_id",
|
|
100
|
+
"subject_label",
|
|
101
|
+
"object_id",
|
|
102
|
+
"predicate_id",
|
|
103
|
+
"mapping_justification",
|
|
104
|
+
"confidence",
|
|
105
|
+
"contributor",
|
|
106
|
+
]
|
|
107
|
+
UNSPECIFIED_MATCHING_CURIE = "sempav:UnspecifiedMatching"
|
|
108
|
+
FORMAT_VERSION = "1.4"
|
|
80
109
|
|
|
81
110
|
|
|
82
111
|
@dataclass
|
|
83
|
-
class Synonym:
|
|
112
|
+
class Synonym(HasReferencesMixin):
|
|
84
113
|
"""A synonym with optional specificity and references."""
|
|
85
114
|
|
|
86
115
|
#: The string representing the synonym
|
|
87
116
|
name: str
|
|
88
117
|
|
|
89
118
|
#: The specificity of the synonym
|
|
90
|
-
specificity:
|
|
119
|
+
specificity: _cv.SynonymScope | None = None
|
|
91
120
|
|
|
92
121
|
#: The type of synonym. Must be defined in OBO document!
|
|
93
|
-
type:
|
|
94
|
-
default_factory=lambda: DEFAULT_SYNONYM_TYPE # type:ignore
|
|
95
|
-
)
|
|
122
|
+
type: Reference | None = None
|
|
96
123
|
|
|
97
124
|
#: References to articles where the synonym appears
|
|
98
|
-
provenance:
|
|
125
|
+
provenance: Sequence[Reference | OBOLiteral] = field(default_factory=list)
|
|
126
|
+
|
|
127
|
+
#: Extra annotations
|
|
128
|
+
annotations: list[Annotation] = field(default_factory=list)
|
|
129
|
+
|
|
130
|
+
#: Language tag for the synonym
|
|
131
|
+
language: str | None = None
|
|
132
|
+
|
|
133
|
+
def __lt__(self, other: Synonym) -> bool:
|
|
134
|
+
"""Sort lexically by name."""
|
|
135
|
+
return self._sort_key() < other._sort_key()
|
|
136
|
+
|
|
137
|
+
def _get_references(self) -> defaultdict[str, set[Reference]]:
|
|
138
|
+
"""Get all prefixes used by the typedef."""
|
|
139
|
+
rv: defaultdict[str, set[Reference]] = defaultdict(set)
|
|
140
|
+
rv[v.has_dbxref.prefix].add(v.has_dbxref)
|
|
141
|
+
if self.type is not None:
|
|
142
|
+
rv[self.type.prefix].add(self.type)
|
|
143
|
+
for provenance in self.provenance:
|
|
144
|
+
match provenance:
|
|
145
|
+
case Reference():
|
|
146
|
+
rv[provenance.prefix].add(provenance)
|
|
147
|
+
case OBOLiteral(_, datatype, _language):
|
|
148
|
+
rv[datatype.prefix].add(v._c(datatype))
|
|
149
|
+
for prefix, references in _get_references_from_annotations(self.annotations).items():
|
|
150
|
+
rv[prefix].update(references)
|
|
151
|
+
return rv
|
|
152
|
+
|
|
153
|
+
def _sort_key(self) -> tuple[str, _cv.SynonymScope, str]:
|
|
154
|
+
return (
|
|
155
|
+
self.name,
|
|
156
|
+
self.specificity or DEFAULT_SPECIFICITY,
|
|
157
|
+
self.type.curie if self.type else "",
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def predicate(self) -> curies.NamedReference:
|
|
162
|
+
"""Get the specificity reference."""
|
|
163
|
+
return _cv.synonym_scopes[self.specificity or DEFAULT_SPECIFICITY]
|
|
99
164
|
|
|
100
|
-
def to_obo(
|
|
165
|
+
def to_obo(
|
|
166
|
+
self,
|
|
167
|
+
ontology_prefix: str,
|
|
168
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
|
|
169
|
+
) -> str:
|
|
101
170
|
"""Write this synonym as an OBO line to appear in a [Term] stanza."""
|
|
102
|
-
return f"synonym: {self._fp()}"
|
|
171
|
+
return f"synonym: {self._fp(ontology_prefix, synonym_typedefs)}"
|
|
103
172
|
|
|
104
|
-
def _fp(
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
173
|
+
def _fp(
|
|
174
|
+
self,
|
|
175
|
+
ontology_prefix: str,
|
|
176
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
|
|
177
|
+
) -> str:
|
|
178
|
+
if synonym_typedefs is None:
|
|
179
|
+
synonym_typedefs = {}
|
|
180
|
+
|
|
181
|
+
x = f'"{self._escape(self.name)}"'
|
|
182
|
+
|
|
183
|
+
# Add on the specificity, e.g., EXACT
|
|
184
|
+
synonym_typedef = _synonym_typedef_warn(ontology_prefix, self.type, synonym_typedefs)
|
|
185
|
+
if synonym_typedef is not None and synonym_typedef.specificity is not None:
|
|
186
|
+
x = f"{x} {synonym_typedef.specificity}"
|
|
187
|
+
elif self.specificity is not None:
|
|
188
|
+
x = f"{x} {self.specificity}"
|
|
189
|
+
elif self.type is not None:
|
|
190
|
+
# it's not valid to have a synonym type without a specificity,
|
|
191
|
+
# so automatically assign one if we'll need it
|
|
192
|
+
x = f"{x} {DEFAULT_SPECIFICITY}"
|
|
193
|
+
|
|
194
|
+
# Add on the synonym type, if exists
|
|
195
|
+
if self.type is not None:
|
|
196
|
+
x = f"{x} {reference_escape(self.type, ontology_prefix=ontology_prefix)}"
|
|
197
|
+
|
|
198
|
+
# the provenance list is required, even if it's empty :/
|
|
199
|
+
x = f"{x} [{comma_separate_references(self.provenance)}]"
|
|
200
|
+
|
|
201
|
+
# OBO flat file format does not support language,
|
|
202
|
+
# but at least we can mention it here as a comment
|
|
203
|
+
if self.language:
|
|
204
|
+
x += f" ! language: {self.language}"
|
|
205
|
+
|
|
206
|
+
return x
|
|
109
207
|
|
|
110
208
|
@staticmethod
|
|
111
209
|
def _escape(s: str) -> str:
|
|
@@ -113,113 +211,100 @@ class Synonym:
|
|
|
113
211
|
|
|
114
212
|
|
|
115
213
|
@dataclass
|
|
116
|
-
class SynonymTypeDef(Referenced):
|
|
214
|
+
class SynonymTypeDef(Referenced, HasReferencesMixin):
|
|
117
215
|
"""A type definition for synonyms in OBO."""
|
|
118
216
|
|
|
119
217
|
reference: Reference
|
|
120
|
-
specificity:
|
|
218
|
+
specificity: _cv.SynonymScope | None = None
|
|
121
219
|
|
|
122
|
-
def
|
|
220
|
+
def __hash__(self) -> int:
|
|
221
|
+
# have to re-define hash because of the @dataclass
|
|
222
|
+
return hash((self.__class__, self.prefix, self.identifier))
|
|
223
|
+
|
|
224
|
+
def to_obo(self, ontology_prefix: str) -> str:
|
|
123
225
|
"""Serialize to OBO."""
|
|
124
|
-
rv = f
|
|
226
|
+
rv = f"synonymtypedef: {reference_escape(self.reference, ontology_prefix=ontology_prefix)}"
|
|
227
|
+
name = self.name or ""
|
|
228
|
+
rv = f'{rv} "{name}"'
|
|
125
229
|
if self.specificity:
|
|
126
230
|
rv = f"{rv} {self.specificity}"
|
|
127
231
|
return rv
|
|
128
232
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
specificity
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
text.replace("-", "_")
|
|
140
|
-
.replace(" ", "_")
|
|
141
|
-
.replace('"', "")
|
|
142
|
-
.replace(")", "")
|
|
143
|
-
.replace("(", "")
|
|
144
|
-
)
|
|
145
|
-
if lower:
|
|
146
|
-
identifier = identifier.lower()
|
|
147
|
-
return cls(
|
|
148
|
-
reference=Reference(prefix="obo", identifier=identifier, name=text.replace('"', "")),
|
|
149
|
-
specificity=specificity,
|
|
150
|
-
)
|
|
233
|
+
def _get_references(self) -> dict[str, set[Reference]]:
|
|
234
|
+
"""Get all references used by the typedef."""
|
|
235
|
+
rv: defaultdict[str, set[Reference]] = defaultdict(set)
|
|
236
|
+
rv[self.reference.prefix].add(self.reference)
|
|
237
|
+
if self.specificity is not None:
|
|
238
|
+
# weird syntax, but this just gets the synonym scope
|
|
239
|
+
# predicate as a pyobo reference
|
|
240
|
+
r = v._c(_cv.synonym_scopes[self.specificity])
|
|
241
|
+
rv[r.prefix].add(r)
|
|
242
|
+
return dict(rv)
|
|
151
243
|
|
|
152
244
|
|
|
153
245
|
DEFAULT_SYNONYM_TYPE = SynonymTypeDef(
|
|
154
|
-
reference=Reference(prefix="oboInOwl", identifier="SynonymType", name="
|
|
246
|
+
reference=Reference(prefix="oboInOwl", identifier="SynonymType", name="synonym type"),
|
|
155
247
|
)
|
|
156
248
|
abbreviation = SynonymTypeDef(
|
|
157
249
|
reference=Reference(prefix="OMO", identifier="0003000", name="abbreviation")
|
|
158
250
|
)
|
|
159
251
|
acronym = SynonymTypeDef(reference=Reference(prefix="omo", identifier="0003012", name="acronym"))
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
if isinstance(reference, Term):
|
|
169
|
-
return reference.reference
|
|
170
|
-
if isinstance(reference, str):
|
|
171
|
-
_rv = Reference.from_curie(reference)
|
|
172
|
-
if _rv is None:
|
|
173
|
-
raise ValueError(f"could not parse CURIE from {reference}")
|
|
174
|
-
return _rv
|
|
175
|
-
if isinstance(reference, tuple):
|
|
176
|
-
return Reference(prefix=reference[0], identifier=reference[1])
|
|
177
|
-
if isinstance(reference, Reference):
|
|
178
|
-
return reference
|
|
179
|
-
raise TypeError(f"invalid type given for a reference ({type(reference)}): {reference}")
|
|
252
|
+
uk_spelling = SynonymTypeDef(
|
|
253
|
+
reference=Reference(prefix="omo", identifier="0003005", name="UK spelling synonym")
|
|
254
|
+
)
|
|
255
|
+
default_synonym_typedefs: dict[ReferenceTuple, SynonymTypeDef] = {
|
|
256
|
+
abbreviation.pair: abbreviation,
|
|
257
|
+
acronym.pair: acronym,
|
|
258
|
+
uk_spelling.pair: uk_spelling,
|
|
259
|
+
}
|
|
180
260
|
|
|
181
261
|
|
|
182
262
|
@dataclass
|
|
183
|
-
class Term(
|
|
263
|
+
class Term(Stanza):
|
|
184
264
|
"""A term in OBO."""
|
|
185
265
|
|
|
186
266
|
#: The primary reference for the entity
|
|
187
267
|
reference: Reference
|
|
188
268
|
|
|
189
269
|
#: A description of the entity
|
|
190
|
-
definition:
|
|
270
|
+
definition: str | None = None
|
|
191
271
|
|
|
192
|
-
#:
|
|
193
|
-
|
|
272
|
+
#: Object properties
|
|
273
|
+
relationships: RelationsHint = field(default_factory=lambda: defaultdict(list))
|
|
194
274
|
|
|
195
|
-
|
|
196
|
-
relationships: dict[TypeDef, list[Reference]] = field(default_factory=lambda: defaultdict(list))
|
|
275
|
+
_axioms: AnnotationsDict = field(default_factory=lambda: defaultdict(list))
|
|
197
276
|
|
|
198
|
-
|
|
199
|
-
properties: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))
|
|
277
|
+
properties: PropertiesHint = field(default_factory=lambda: defaultdict(list))
|
|
200
278
|
|
|
201
279
|
#: Relationships with the default "is_a"
|
|
202
280
|
parents: list[Reference] = field(default_factory=list)
|
|
203
281
|
|
|
282
|
+
intersection_of: IntersectionOfHint = field(default_factory=list)
|
|
283
|
+
union_of: UnionOfHint = field(default_factory=list)
|
|
284
|
+
equivalent_to: list[Reference] = field(default_factory=list)
|
|
285
|
+
disjoint_from: list[Reference] = field(default_factory=list)
|
|
286
|
+
|
|
204
287
|
#: Synonyms of this term
|
|
205
288
|
synonyms: list[Synonym] = field(default_factory=list)
|
|
206
289
|
|
|
207
|
-
#:
|
|
290
|
+
#: Database cross-references, see :func:`get_mappings` for
|
|
291
|
+
#: access to all mappings in an SSSOM-like interface
|
|
208
292
|
xrefs: list[Reference] = field(default_factory=list)
|
|
209
|
-
xref_types: list[Reference] = field(default_factory=list)
|
|
210
|
-
|
|
211
|
-
#: Alternate Identifiers
|
|
212
|
-
alt_ids: list[Reference] = field(default_factory=list)
|
|
213
293
|
|
|
214
294
|
#: The sub-namespace within the ontology
|
|
215
|
-
namespace:
|
|
295
|
+
namespace: str | None = None
|
|
216
296
|
|
|
217
297
|
#: An annotation for obsolescence. By default, is None, but this means that it is not obsolete.
|
|
218
|
-
is_obsolete:
|
|
298
|
+
is_obsolete: bool | None = None
|
|
299
|
+
|
|
300
|
+
type: StanzaType = "Term"
|
|
219
301
|
|
|
220
|
-
|
|
302
|
+
builtin: bool | None = None
|
|
303
|
+
is_anonymous: bool | None = None
|
|
304
|
+
subsets: list[Reference] = field(default_factory=list)
|
|
221
305
|
|
|
222
|
-
def __hash__(self):
|
|
306
|
+
def __hash__(self) -> int:
|
|
307
|
+
# have to re-define hash because of the @dataclass
|
|
223
308
|
return hash((self.__class__, self.prefix, self.identifier))
|
|
224
309
|
|
|
225
310
|
@classmethod
|
|
@@ -227,10 +312,10 @@ class Term(Referenced):
|
|
|
227
312
|
cls,
|
|
228
313
|
prefix: str,
|
|
229
314
|
identifier: str,
|
|
230
|
-
name:
|
|
231
|
-
definition:
|
|
315
|
+
name: str | None = None,
|
|
316
|
+
definition: str | None = None,
|
|
232
317
|
**kwargs,
|
|
233
|
-
) ->
|
|
318
|
+
) -> Term:
|
|
234
319
|
"""Create a term from a reference."""
|
|
235
320
|
return cls(
|
|
236
321
|
reference=Reference(prefix=prefix, identifier=identifier, name=name),
|
|
@@ -239,245 +324,198 @@ class Term(Referenced):
|
|
|
239
324
|
)
|
|
240
325
|
|
|
241
326
|
@classmethod
|
|
242
|
-
def
|
|
243
|
-
|
|
244
|
-
prefix
|
|
245
|
-
identifier: str,
|
|
246
|
-
) -> "Term":
|
|
247
|
-
"""Create a term from a reference."""
|
|
248
|
-
from ..api import get_definition
|
|
249
|
-
|
|
250
|
-
return cls(
|
|
251
|
-
reference=Reference.auto(prefix=prefix, identifier=identifier),
|
|
252
|
-
definition=get_definition(prefix, identifier),
|
|
253
|
-
)
|
|
254
|
-
|
|
255
|
-
@classmethod
|
|
256
|
-
def from_curie(cls, curie: str, name: Optional[str] = None) -> "Term":
|
|
257
|
-
"""Create a term directly from a CURIE and optional name."""
|
|
258
|
-
prefix, identifier = normalize_curie(curie)
|
|
259
|
-
if prefix is None or identifier is None:
|
|
260
|
-
raise ValueError
|
|
261
|
-
return cls.from_triple(prefix=prefix, identifier=identifier, name=name)
|
|
262
|
-
|
|
263
|
-
def append_provenance(self, reference: ReferenceHint) -> None:
|
|
264
|
-
"""Add a provenance reference."""
|
|
265
|
-
self.provenance.append(_ensure_ref(reference))
|
|
266
|
-
|
|
267
|
-
def append_synonym(
|
|
268
|
-
self,
|
|
269
|
-
synonym: Union[str, Synonym],
|
|
270
|
-
*,
|
|
271
|
-
type: Optional[SynonymTypeDef] = None,
|
|
272
|
-
specificity: Optional[SynonymSpecificity] = None,
|
|
273
|
-
) -> None:
|
|
274
|
-
"""Add a synonym."""
|
|
275
|
-
if isinstance(synonym, str):
|
|
276
|
-
synonym = Synonym(
|
|
277
|
-
synonym, type=type or DEFAULT_SYNONYM_TYPE, specificity=specificity or "EXACT"
|
|
278
|
-
)
|
|
279
|
-
self.synonyms.append(synonym)
|
|
280
|
-
|
|
281
|
-
def append_alt(self, alt: Union[str, Reference]) -> None:
|
|
282
|
-
"""Add an alternative identifier."""
|
|
283
|
-
if isinstance(alt, str):
|
|
284
|
-
alt = Reference(prefix=self.prefix, identifier=alt)
|
|
285
|
-
self.alt_ids.append(alt)
|
|
327
|
+
def default(cls, prefix, identifier, name=None) -> Self:
|
|
328
|
+
"""Create a default term."""
|
|
329
|
+
return cls(reference=default_reference(prefix=prefix, identifier=identifier, name=name))
|
|
286
330
|
|
|
287
|
-
def
|
|
288
|
-
"""Add a see also
|
|
289
|
-
self.
|
|
290
|
-
return self
|
|
291
|
-
|
|
292
|
-
def append_comment(self, value: str) -> "Term":
|
|
293
|
-
"""Add a comment relationship."""
|
|
294
|
-
self.append_property(comment.curie, value)
|
|
295
|
-
return self
|
|
296
|
-
|
|
297
|
-
def append_replaced_by(self, reference: ReferenceHint) -> "Term":
|
|
298
|
-
"""Add a replaced by relationship."""
|
|
299
|
-
self.append_relationship(term_replaced_by, reference)
|
|
300
|
-
return self
|
|
301
|
-
|
|
302
|
-
def append_parent(self, reference: ReferenceHint) -> "Term":
|
|
303
|
-
"""Add a parent to this entity."""
|
|
304
|
-
reference = _ensure_ref(reference)
|
|
305
|
-
if reference not in self.parents:
|
|
306
|
-
self.parents.append(reference)
|
|
307
|
-
return self
|
|
331
|
+
def append_see_also_uri(self, uri: str) -> Self:
|
|
332
|
+
"""Add a see also property."""
|
|
333
|
+
return self.annotate_uri(v.see_also, uri)
|
|
308
334
|
|
|
309
335
|
def extend_parents(self, references: Collection[Reference]) -> None:
|
|
310
336
|
"""Add a collection of parents to this entity."""
|
|
337
|
+
warnings.warn("use append_parent", DeprecationWarning, stacklevel=2)
|
|
311
338
|
if any(x is None for x in references):
|
|
312
339
|
raise ValueError("can not append a collection of parents containing a null parent")
|
|
313
340
|
self.parents.extend(references)
|
|
314
341
|
|
|
315
|
-
def
|
|
342
|
+
def get_property_literals(self, prop: ReferenceHint) -> list[str]:
|
|
316
343
|
"""Get properties from the given key."""
|
|
317
|
-
return self.properties[
|
|
344
|
+
return [reference_or_literal_to_str(t) for t in self.properties.get(_ensure_ref(prop), [])]
|
|
318
345
|
|
|
319
|
-
def get_property(self, prop) ->
|
|
346
|
+
def get_property(self, prop: ReferenceHint) -> str | None:
|
|
320
347
|
"""Get a single property of the given key."""
|
|
321
|
-
r = self.
|
|
322
|
-
if not r:
|
|
323
|
-
return None
|
|
324
|
-
if len(r) != 1:
|
|
325
|
-
raise ValueError
|
|
326
|
-
return r[0]
|
|
327
|
-
|
|
328
|
-
def get_relationship(self, typedef: TypeDef) -> Optional[Reference]:
|
|
329
|
-
"""Get a single relationship of the given type."""
|
|
330
|
-
r = self.get_relationships(typedef)
|
|
348
|
+
r = self.get_property_literals(prop)
|
|
331
349
|
if not r:
|
|
332
350
|
return None
|
|
333
351
|
if len(r) != 1:
|
|
334
352
|
raise ValueError
|
|
335
353
|
return r[0]
|
|
336
354
|
|
|
337
|
-
def
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
355
|
+
def append_exact_match(
|
|
356
|
+
self,
|
|
357
|
+
reference: ReferenceHint,
|
|
358
|
+
*,
|
|
359
|
+
mapping_justification: Reference | None = None,
|
|
360
|
+
confidence: float | None = None,
|
|
361
|
+
contributor: Reference | None = None,
|
|
362
|
+
) -> Self:
|
|
342
363
|
"""Append an exact match, also adding an xref."""
|
|
343
364
|
reference = _ensure_ref(reference)
|
|
344
|
-
self.
|
|
345
|
-
|
|
365
|
+
axioms = self._prepare_mapping_annotations(
|
|
366
|
+
mapping_justification=mapping_justification,
|
|
367
|
+
confidence=confidence,
|
|
368
|
+
contributor=contributor,
|
|
369
|
+
)
|
|
370
|
+
self.annotate_object(v.exact_match, reference, annotations=axioms)
|
|
346
371
|
return self
|
|
347
372
|
|
|
348
|
-
def
|
|
349
|
-
"""Append an xref."""
|
|
350
|
-
self.xrefs.append(_ensure_ref(reference))
|
|
351
|
-
|
|
352
|
-
def append_relationship(self, typedef: TypeDef, reference: ReferenceHint) -> None:
|
|
353
|
-
"""Append a relationship."""
|
|
354
|
-
self.relationships[typedef].append(_ensure_ref(reference))
|
|
355
|
-
|
|
356
|
-
def set_species(self, identifier: str, name: Optional[str] = None):
|
|
373
|
+
def set_species(self, identifier: str, name: str | None = None) -> Self:
|
|
357
374
|
"""Append the from_species relation."""
|
|
358
375
|
if name is None:
|
|
359
376
|
from pyobo.resources.ncbitaxon import get_ncbitaxon_name
|
|
360
377
|
|
|
361
378
|
name = get_ncbitaxon_name(identifier)
|
|
362
|
-
self.append_relationship(
|
|
363
|
-
from_species, Reference(prefix=NCBITAXON_PREFIX, identifier=identifier, name=name)
|
|
379
|
+
return self.append_relationship(
|
|
380
|
+
v.from_species, Reference(prefix=NCBITAXON_PREFIX, identifier=identifier, name=name)
|
|
364
381
|
)
|
|
365
382
|
|
|
366
|
-
def get_species(self, prefix: str = NCBITAXON_PREFIX) ->
|
|
383
|
+
def get_species(self, prefix: str = NCBITAXON_PREFIX) -> Reference | None:
|
|
367
384
|
"""Get the species if it exists.
|
|
368
385
|
|
|
369
386
|
:param prefix: The prefix to use in case the term has several species annotations.
|
|
370
387
|
"""
|
|
371
|
-
for species in self.
|
|
388
|
+
for species in self.get_relationships(v.from_species):
|
|
372
389
|
if species.prefix == prefix:
|
|
373
390
|
return species
|
|
374
391
|
return None
|
|
375
392
|
|
|
376
|
-
def extend_relationship(self, typedef:
|
|
393
|
+
def extend_relationship(self, typedef: ReferenceHint, references: Iterable[Reference]) -> None:
|
|
377
394
|
"""Append several relationships."""
|
|
395
|
+
warnings.warn("use append_relationship", DeprecationWarning, stacklevel=2)
|
|
378
396
|
if any(x is None for x in references):
|
|
379
397
|
raise ValueError("can not extend a collection that includes a null reference")
|
|
398
|
+
typedef = _ensure_ref(typedef)
|
|
380
399
|
self.relationships[typedef].extend(references)
|
|
381
400
|
|
|
382
|
-
def
|
|
383
|
-
self,
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
def _definition_fp(self) -> str:
|
|
393
|
-
if self.definition is None:
|
|
394
|
-
raise AssertionError
|
|
395
|
-
return f'"{obo_escape_slim(self.definition)}" [{comma_separate(self.provenance)}]'
|
|
396
|
-
|
|
397
|
-
def iterate_relations(self) -> Iterable[tuple[TypeDef, Reference]]:
|
|
398
|
-
"""Iterate over pairs of typedefs and targets."""
|
|
399
|
-
for typedef, targets in sorted(self.relationships.items(), key=_sort_relations):
|
|
400
|
-
for target in sorted(targets, key=lambda ref: ref.preferred_curie):
|
|
401
|
-
yield typedef, target
|
|
402
|
-
|
|
403
|
-
def iterate_properties(self) -> Iterable[tuple[str, str]]:
|
|
404
|
-
"""Iterate over pairs of property and values."""
|
|
405
|
-
for prop, values in sorted(self.properties.items()):
|
|
406
|
-
for value in sorted(values):
|
|
407
|
-
yield prop, value
|
|
408
|
-
|
|
409
|
-
def iterate_obo_lines(self, *, ontology, typedefs) -> Iterable[str]:
|
|
401
|
+
def iterate_obo_lines(
|
|
402
|
+
self,
|
|
403
|
+
*,
|
|
404
|
+
ontology_prefix: str,
|
|
405
|
+
typedefs: Mapping[ReferenceTuple, TypeDef],
|
|
406
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
|
|
407
|
+
emit_object_properties: bool = True,
|
|
408
|
+
emit_annotation_properties: bool = True,
|
|
409
|
+
) -> Iterable[str]:
|
|
410
410
|
"""Iterate over the lines to write in an OBO file."""
|
|
411
411
|
yield f"\n[{self.type}]"
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
412
|
+
# 1
|
|
413
|
+
yield f"id: {self._reference(self.reference, ontology_prefix)}"
|
|
414
|
+
# 2
|
|
415
|
+
yield from _boolean_tag("is_anonymous", self.is_anonymous)
|
|
416
|
+
# 3
|
|
415
417
|
if self.name:
|
|
416
418
|
yield f"name: {obo_escape_slim(self.name)}"
|
|
419
|
+
# 4
|
|
417
420
|
if self.namespace and self.namespace != "?":
|
|
418
421
|
namespace_normalized = (
|
|
419
422
|
self.namespace.replace(" ", "_").replace("-", "_").replace("(", "").replace(")", "")
|
|
420
423
|
)
|
|
421
424
|
yield f"namespace: {namespace_normalized}"
|
|
422
|
-
|
|
425
|
+
# 5
|
|
426
|
+
for alt in sorted(self.alt_ids):
|
|
427
|
+
yield f"alt_id: {self._reference(alt, ontology_prefix, add_name_comment=True)}"
|
|
428
|
+
# 6
|
|
423
429
|
if self.definition:
|
|
424
430
|
yield f"def: {self._definition_fp()}"
|
|
425
|
-
|
|
426
|
-
for
|
|
427
|
-
|
|
428
|
-
|
|
431
|
+
# 7
|
|
432
|
+
for x in self.get_property_values(v.comment):
|
|
433
|
+
if isinstance(x, OBOLiteral):
|
|
434
|
+
yield f'comment: "{x.value}"'
|
|
435
|
+
# 8
|
|
436
|
+
yield from _reference_list_tag("subset", self.subsets, ontology_prefix)
|
|
437
|
+
# 9
|
|
438
|
+
for synonym in sorted(self.synonyms):
|
|
439
|
+
yield synonym.to_obo(ontology_prefix=ontology_prefix, synonym_typedefs=synonym_typedefs)
|
|
440
|
+
# 10
|
|
441
|
+
yield from self._iterate_xref_obo(ontology_prefix=ontology_prefix)
|
|
442
|
+
# 11
|
|
443
|
+
yield from _boolean_tag("builtin", self.builtin)
|
|
444
|
+
# 12
|
|
445
|
+
if emit_annotation_properties:
|
|
446
|
+
yield from self._iterate_obo_properties(
|
|
447
|
+
ontology_prefix=ontology_prefix,
|
|
448
|
+
skip_predicate_objects=v.SKIP_PROPERTY_PREDICATES_OBJECTS,
|
|
449
|
+
skip_predicate_literals=v.SKIP_PROPERTY_PREDICATES_LITERAL,
|
|
450
|
+
typedefs=typedefs,
|
|
451
|
+
)
|
|
452
|
+
# 13
|
|
429
453
|
parent_tag = "is_a" if self.type == "Term" else "instance_of"
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
454
|
+
yield from _reference_list_tag(parent_tag, self.parents, ontology_prefix)
|
|
455
|
+
# 14
|
|
456
|
+
yield from self._iterate_intersection_of_obo(ontology_prefix=ontology_prefix)
|
|
457
|
+
# 15
|
|
458
|
+
yield from _reference_list_tag("union_of", self.union_of, ontology_prefix=ontology_prefix)
|
|
459
|
+
# 16
|
|
460
|
+
yield from _reference_list_tag(
|
|
461
|
+
"equivalent_to", self.equivalent_to, ontology_prefix=ontology_prefix
|
|
462
|
+
)
|
|
463
|
+
# 17
|
|
464
|
+
yield from _reference_list_tag(
|
|
465
|
+
"disjoint_from", self.disjoint_from, ontology_prefix=ontology_prefix
|
|
466
|
+
)
|
|
467
|
+
# 18
|
|
468
|
+
if emit_object_properties:
|
|
469
|
+
yield from self._iterate_obo_relations(
|
|
470
|
+
ontology_prefix=ontology_prefix, typedefs=typedefs
|
|
471
|
+
)
|
|
472
|
+
# 19 TODO created_by
|
|
473
|
+
# 20
|
|
474
|
+
for x in self.get_property_values(v.obo_creation_date):
|
|
475
|
+
if isinstance(x, OBOLiteral):
|
|
476
|
+
yield f"creation_date: {x.value}"
|
|
477
|
+
# 21
|
|
478
|
+
yield from _boolean_tag("is_obsolete", self.is_obsolete)
|
|
479
|
+
# 22
|
|
480
|
+
yield from _tag_property_targets(
|
|
481
|
+
"replaced_by", self, v.term_replaced_by, ontology_prefix=ontology_prefix
|
|
482
|
+
)
|
|
483
|
+
# 23
|
|
484
|
+
yield from _tag_property_targets(
|
|
485
|
+
"consider", self, v.see_also, ontology_prefix=ontology_prefix
|
|
486
|
+
)
|
|
462
487
|
|
|
463
488
|
|
|
464
489
|
#: A set of warnings, used to make sure we don't show the same one over and over
|
|
465
|
-
|
|
466
|
-
|
|
490
|
+
_SYNONYM_TYPEDEF_WARNINGS: set[tuple[str, Reference]] = set()
|
|
467
491
|
|
|
468
|
-
def _sort_relations(r):
|
|
469
|
-
typedef, _references = r
|
|
470
|
-
return typedef.preferred_curie
|
|
471
492
|
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
return
|
|
479
|
-
|
|
480
|
-
|
|
493
|
+
def _synonym_typedef_warn(
|
|
494
|
+
prefix: str,
|
|
495
|
+
predicate: Reference | None,
|
|
496
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
|
|
497
|
+
) -> SynonymTypeDef | None:
|
|
498
|
+
if predicate is None or predicate.pair == DEFAULT_SYNONYM_TYPE.pair:
|
|
499
|
+
return None
|
|
500
|
+
if predicate.pair in default_synonym_typedefs:
|
|
501
|
+
return default_synonym_typedefs[predicate.pair]
|
|
502
|
+
if predicate.pair in synonym_typedefs:
|
|
503
|
+
return synonym_typedefs[predicate.pair]
|
|
504
|
+
key = prefix, predicate
|
|
505
|
+
if key not in _SYNONYM_TYPEDEF_WARNINGS:
|
|
506
|
+
_SYNONYM_TYPEDEF_WARNINGS.add(key)
|
|
507
|
+
predicate_preferred_curie = get_preferred_curie(predicate)
|
|
508
|
+
if predicate.prefix == "obo":
|
|
509
|
+
# Throw our hands up in the air. By using `obo` as the prefix,
|
|
510
|
+
# we already threw using "real" definitions out the window
|
|
511
|
+
logger.warning(
|
|
512
|
+
f"[{prefix}] synonym typedef with OBO prefix not defined: {predicate_preferred_curie}."
|
|
513
|
+
f"\n\tThis might be because you used an unqualified prefix in an OBO file, "
|
|
514
|
+
f"which automatically gets an OBO prefix."
|
|
515
|
+
)
|
|
516
|
+
else:
|
|
517
|
+
logger.warning(f"[{prefix}] synonym typedef not defined: {predicate_preferred_curie}")
|
|
518
|
+
return None
|
|
481
519
|
|
|
482
520
|
|
|
483
521
|
class BioregistryError(ValueError):
|
|
@@ -495,6 +533,9 @@ class BioregistryError(ValueError):
|
|
|
495
533
|
)
|
|
496
534
|
|
|
497
535
|
|
|
536
|
+
LOGGED_MISSING_URI: set[tuple[str, str]] = set()
|
|
537
|
+
|
|
538
|
+
|
|
498
539
|
@dataclass
|
|
499
540
|
class Obo:
|
|
500
541
|
"""An OBO document."""
|
|
@@ -506,22 +547,19 @@ class Obo:
|
|
|
506
547
|
check_bioregistry_prefix: ClassVar[bool] = True
|
|
507
548
|
|
|
508
549
|
#: The name of the ontology. If not given, tries looking up with the Bioregistry.
|
|
509
|
-
name: ClassVar[
|
|
510
|
-
|
|
511
|
-
#: The OBO format
|
|
512
|
-
format_version: ClassVar[str] = "1.2"
|
|
550
|
+
name: ClassVar[str | None] = None
|
|
513
551
|
|
|
514
552
|
#: Type definitions
|
|
515
|
-
typedefs: ClassVar[
|
|
553
|
+
typedefs: ClassVar[list[TypeDef] | None] = None
|
|
516
554
|
|
|
517
555
|
#: Synonym type definitions
|
|
518
|
-
synonym_typedefs: ClassVar[
|
|
556
|
+
synonym_typedefs: ClassVar[list[SynonymTypeDef] | None] = None
|
|
519
557
|
|
|
520
558
|
#: An annotation about how an ontology was generated
|
|
521
|
-
auto_generated_by: ClassVar[
|
|
559
|
+
auto_generated_by: ClassVar[str | None] = None
|
|
522
560
|
|
|
523
561
|
#: The idspaces used in the document
|
|
524
|
-
idspaces: ClassVar[
|
|
562
|
+
idspaces: ClassVar[Mapping[str, str] | None] = None
|
|
525
563
|
|
|
526
564
|
#: For super-sized datasets that shouldn't be read into memory
|
|
527
565
|
iter_only: ClassVar[bool] = False
|
|
@@ -530,28 +568,32 @@ class Obo:
|
|
|
530
568
|
dynamic_version: ClassVar[bool] = False
|
|
531
569
|
|
|
532
570
|
#: Set to a static version for the resource (i.e., the resource is not itself versioned)
|
|
533
|
-
static_version: ClassVar[
|
|
571
|
+
static_version: ClassVar[str | None] = None
|
|
534
572
|
|
|
535
|
-
bioversions_key: ClassVar[
|
|
573
|
+
bioversions_key: ClassVar[str | None] = None
|
|
536
574
|
|
|
537
575
|
#: Root terms to use for the ontology
|
|
538
|
-
root_terms: ClassVar[
|
|
576
|
+
root_terms: ClassVar[list[Reference] | None] = None
|
|
539
577
|
|
|
540
578
|
#: The date the ontology was generated
|
|
541
|
-
date:
|
|
579
|
+
date: datetime.datetime | None = field(default_factory=datetime.datetime.today)
|
|
542
580
|
|
|
543
581
|
#: The ontology version
|
|
544
|
-
data_version:
|
|
582
|
+
data_version: str | None = None
|
|
545
583
|
|
|
546
584
|
#: Should this ontology be reloaded?
|
|
547
585
|
force: bool = False
|
|
548
586
|
|
|
549
587
|
#: The hierarchy of terms
|
|
550
|
-
_hierarchy:
|
|
588
|
+
_hierarchy: nx.DiGraph | None = field(init=False, default=None, repr=False)
|
|
551
589
|
#: A cache of terms
|
|
552
|
-
_items:
|
|
590
|
+
_items: list[Term] | None = field(init=False, default=None, repr=False)
|
|
591
|
+
|
|
592
|
+
subsetdefs: ClassVar[list[tuple[Reference, str]] | None] = None
|
|
553
593
|
|
|
554
|
-
|
|
594
|
+
property_values: ClassVar[list[Annotation] | None] = None
|
|
595
|
+
|
|
596
|
+
imports: ClassVar[list[str] | None] = None
|
|
555
597
|
|
|
556
598
|
def __post_init__(self):
|
|
557
599
|
"""Run post-init checks."""
|
|
@@ -576,9 +618,85 @@ class Obo:
|
|
|
576
618
|
elif "/" in self.data_version:
|
|
577
619
|
raise ValueError(f"{self.ontology} has a slash in version: {self.data_version}")
|
|
578
620
|
if self.auto_generated_by is None:
|
|
579
|
-
self.auto_generated_by = f"
|
|
621
|
+
self.auto_generated_by = f"PyOBO v{get_pyobo_version(with_git_hash=True)} on {datetime.datetime.now().isoformat()}" # type:ignore
|
|
622
|
+
|
|
623
|
+
def _get_clean_idspaces(self) -> dict[str, str]:
|
|
624
|
+
"""Get normalized idspace dictionary."""
|
|
625
|
+
rv = dict(
|
|
626
|
+
ChainMap(
|
|
627
|
+
# Add reasonable defaults, most of which are
|
|
628
|
+
# mandated by the OWL spec anyway (except skos?)
|
|
629
|
+
DEFAULT_PREFIX_MAP,
|
|
630
|
+
dict(self.idspaces or {}),
|
|
631
|
+
# automatically detect all prefixes in reference in the ontology,
|
|
632
|
+
# then look up Bioregistry-approved URI prefixes
|
|
633
|
+
self._infer_prefix_map(),
|
|
634
|
+
)
|
|
635
|
+
)
|
|
636
|
+
return rv
|
|
580
637
|
|
|
581
|
-
def
|
|
638
|
+
def _infer_prefix_map(self) -> dict[str, str]:
|
|
639
|
+
"""Get a prefix map including all prefixes used in the ontology."""
|
|
640
|
+
rv = {}
|
|
641
|
+
for prefix in sorted(self._get_prefixes(), key=str.casefold):
|
|
642
|
+
resource = bioregistry.get_resource(prefix)
|
|
643
|
+
if resource is None:
|
|
644
|
+
raise ValueError
|
|
645
|
+
uri_prefix = resource.get_rdf_uri_prefix()
|
|
646
|
+
if uri_prefix is None:
|
|
647
|
+
uri_prefix = resource.get_uri_prefix()
|
|
648
|
+
if uri_prefix is None:
|
|
649
|
+
# This allows us an escape hatch, since some
|
|
650
|
+
# prefixes don't have an associated URI prefix
|
|
651
|
+
uri_prefix = f"https://bioregistry.io/{prefix}:"
|
|
652
|
+
if (self.ontology, prefix) not in LOGGED_MISSING_URI:
|
|
653
|
+
LOGGED_MISSING_URI.add((self.ontology, prefix))
|
|
654
|
+
logger.warning(
|
|
655
|
+
"[%s] uses prefix with no URI format: %s. Auto-generating Bioregistry link: %s",
|
|
656
|
+
self.ontology,
|
|
657
|
+
prefix,
|
|
658
|
+
uri_prefix,
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
pp = bioregistry.get_preferred_prefix(prefix) or str(prefix)
|
|
662
|
+
rv[pp] = uri_prefix
|
|
663
|
+
return rv
|
|
664
|
+
|
|
665
|
+
def _get_prefixes(self) -> set[str]:
|
|
666
|
+
"""Get all prefixes used by the ontology."""
|
|
667
|
+
prefixes: set[str] = set(DEFAULT_PREFIX_MAP)
|
|
668
|
+
for stanza in self._iter_stanzas():
|
|
669
|
+
prefixes.update(stanza._get_prefixes())
|
|
670
|
+
for synonym_typedef in self.synonym_typedefs or []:
|
|
671
|
+
prefixes.update(synonym_typedef._get_prefixes())
|
|
672
|
+
prefixes.update(subset.prefix for subset, _ in self.subsetdefs or [])
|
|
673
|
+
# _iterate_property_pairs covers metadata, root terms,
|
|
674
|
+
# and properties in self.property_values
|
|
675
|
+
prefixes.update(_get_prefixes_from_annotations(self._iterate_property_pairs()))
|
|
676
|
+
if self.auto_generated_by:
|
|
677
|
+
prefixes.add("oboInOwl")
|
|
678
|
+
return prefixes
|
|
679
|
+
|
|
680
|
+
def _get_references(self) -> dict[str, set[Reference]]:
|
|
681
|
+
"""Get all references used by the ontology."""
|
|
682
|
+
rv: defaultdict[str, set[Reference]] = defaultdict(set)
|
|
683
|
+
|
|
684
|
+
for rr in itt.chain(self, self.typedefs or [], self.synonym_typedefs or []):
|
|
685
|
+
for prefix, references in rr._get_references().items():
|
|
686
|
+
rv[prefix].update(references)
|
|
687
|
+
for subset, _ in self.subsetdefs or []:
|
|
688
|
+
rv[subset.prefix].add(subset)
|
|
689
|
+
# _iterate_property_pairs covers metadata, root terms,
|
|
690
|
+
# and properties in self.property_values
|
|
691
|
+
for prefix, references in _get_references_from_annotations(
|
|
692
|
+
self._iterate_property_pairs()
|
|
693
|
+
).items():
|
|
694
|
+
rv[prefix].update(references)
|
|
695
|
+
if self.auto_generated_by:
|
|
696
|
+
rv[v.obo_autogenerated_by.prefix].add(v.obo_autogenerated_by)
|
|
697
|
+
return dict(rv)
|
|
698
|
+
|
|
699
|
+
def _get_version(self) -> str | None:
|
|
582
700
|
if self.bioversions_key:
|
|
583
701
|
try:
|
|
584
702
|
return get_version(self.bioversions_key)
|
|
@@ -610,261 +728,417 @@ class Obo:
|
|
|
610
728
|
path.write_text(graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
|
|
611
729
|
|
|
612
730
|
@classmethod
|
|
613
|
-
def cli(cls) ->
|
|
731
|
+
def cli(cls, *args, default_rewrite: bool = False) -> Any:
|
|
614
732
|
"""Run the CLI for this class."""
|
|
615
|
-
cli = cls.get_cls_cli()
|
|
616
|
-
cli()
|
|
733
|
+
cli = cls.get_cls_cli(default_rewrite=default_rewrite)
|
|
734
|
+
return cli(*args)
|
|
617
735
|
|
|
618
736
|
@classmethod
|
|
619
|
-
def get_cls_cli(cls) -> click.Command:
|
|
737
|
+
def get_cls_cli(cls, *, default_rewrite: bool = False) -> click.Command:
|
|
620
738
|
"""Get the CLI for this class."""
|
|
621
739
|
|
|
622
740
|
@click.command()
|
|
623
741
|
@verbose_option
|
|
624
742
|
@force_option
|
|
625
|
-
@click.option(
|
|
743
|
+
@click.option(
|
|
744
|
+
"--rewrite/--no-rewrite",
|
|
745
|
+
"-r",
|
|
746
|
+
default=False,
|
|
747
|
+
is_flag=True,
|
|
748
|
+
help="Re-process the data, but don't download it again.",
|
|
749
|
+
)
|
|
626
750
|
@click.option("--owl", is_flag=True, help="Write OWL via ROBOT")
|
|
627
|
-
@click.option("--
|
|
751
|
+
@click.option("--ofn", is_flag=True, help="Write Functional OWL (OFN)")
|
|
752
|
+
@click.option("--ttl", is_flag=True, help="Write turtle RDF via OFN")
|
|
628
753
|
@click.option(
|
|
629
754
|
"--version", help="Specify data version to get. Use this if bioversions is acting up."
|
|
630
755
|
)
|
|
631
|
-
def _main(force: bool, owl: bool,
|
|
756
|
+
def _main(force: bool, owl: bool, ofn: bool, ttl: bool, version: str | None, rewrite: bool):
|
|
757
|
+
rewrite = True
|
|
632
758
|
try:
|
|
633
759
|
inst = cls(force=force, data_version=version)
|
|
634
760
|
except Exception as e:
|
|
635
761
|
click.secho(f"[{cls.ontology}] Got an exception during instantiation - {type(e)}")
|
|
636
762
|
sys.exit(1)
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
click.secho(f"[{cls.ontology}] Got an exception during OBO writing {type(e)}")
|
|
649
|
-
sys.exit(1)
|
|
763
|
+
inst.write_default(
|
|
764
|
+
write_obograph=True,
|
|
765
|
+
write_obo=True,
|
|
766
|
+
write_owl=owl,
|
|
767
|
+
write_ofn=ofn,
|
|
768
|
+
write_ttl=ttl,
|
|
769
|
+
write_nodes=True,
|
|
770
|
+
write_edges=True,
|
|
771
|
+
force=force or rewrite,
|
|
772
|
+
use_tqdm=True,
|
|
773
|
+
)
|
|
650
774
|
|
|
651
775
|
return _main
|
|
652
776
|
|
|
653
777
|
@property
|
|
654
778
|
def date_formatted(self) -> str:
|
|
655
779
|
"""Get the date as a formatted string."""
|
|
656
|
-
return (self.date if self.date else datetime.now()).strftime(DATE_FORMAT)
|
|
780
|
+
return (self.date if self.date else datetime.datetime.now()).strftime(DATE_FORMAT)
|
|
781
|
+
|
|
782
|
+
def _iter_terms_safe(self) -> Iterator[Term]:
|
|
783
|
+
if self.iter_only:
|
|
784
|
+
return iter(self.iter_terms(force=self.force))
|
|
785
|
+
return iter(self._items_accessor)
|
|
657
786
|
|
|
658
787
|
def _iter_terms(self, use_tqdm: bool = False, desc: str = "terms") -> Iterable[Term]:
|
|
788
|
+
yv = self._iter_terms_safe()
|
|
659
789
|
if use_tqdm:
|
|
660
|
-
total:
|
|
790
|
+
total: int | None
|
|
661
791
|
try:
|
|
662
792
|
total = len(self._items_accessor)
|
|
663
793
|
except TypeError:
|
|
664
794
|
total = None
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
yield from self
|
|
795
|
+
yv = tqdm(yv, desc=desc, unit_scale=True, unit="term", total=total)
|
|
796
|
+
yield from yv
|
|
668
797
|
|
|
669
|
-
def
|
|
670
|
-
|
|
671
|
-
yield
|
|
672
|
-
|
|
673
|
-
if self.auto_generated_by is not None:
|
|
674
|
-
yield f"auto-generated-by: {self.auto_generated_by}"
|
|
798
|
+
def _iter_stanzas(self, use_tqdm: bool = False, desc: str = "terms") -> Iterable[Stanza]:
|
|
799
|
+
yield from self._iter_terms(use_tqdm=use_tqdm, desc=desc)
|
|
800
|
+
yield from self.typedefs or []
|
|
675
801
|
|
|
676
|
-
|
|
802
|
+
def iterate_obo_lines(
|
|
803
|
+
self,
|
|
804
|
+
emit_object_properties: bool = True,
|
|
805
|
+
emit_annotation_properties: bool = True,
|
|
806
|
+
) -> Iterable[str]:
|
|
807
|
+
"""Iterate over the lines to write in an OBO file.
|
|
808
|
+
|
|
809
|
+
Here's the order:
|
|
810
|
+
|
|
811
|
+
1. format-version (technically, this is the only required field)
|
|
812
|
+
2. data-version
|
|
813
|
+
3. date
|
|
814
|
+
4. saved-by
|
|
815
|
+
5. auto-generated-by
|
|
816
|
+
6. import
|
|
817
|
+
7. subsetdef
|
|
818
|
+
8. synonymtypedef
|
|
819
|
+
9. default-namespace
|
|
820
|
+
10. namespace-id-rule
|
|
821
|
+
11. idspace
|
|
822
|
+
12. treat-xrefs-as-equivalent
|
|
823
|
+
13. treat-xrefs-as-genus-differentia
|
|
824
|
+
14. treat-xrefs-as-relationship
|
|
825
|
+
15. treat-xrefs-as-is_a
|
|
826
|
+
16. remark
|
|
827
|
+
17. ontology
|
|
828
|
+
"""
|
|
829
|
+
# 1
|
|
830
|
+
yield f"format-version: {FORMAT_VERSION}"
|
|
831
|
+
# 2
|
|
832
|
+
if self.data_version:
|
|
677
833
|
yield f"data-version: {self.data_version}"
|
|
678
|
-
|
|
679
|
-
|
|
834
|
+
# 3
|
|
835
|
+
if self.date:
|
|
836
|
+
f"date: {self.date_formatted}"
|
|
837
|
+
# 4 TODO saved-by
|
|
838
|
+
# 5
|
|
839
|
+
if self.auto_generated_by:
|
|
840
|
+
yield f"auto-generated-by: {self.auto_generated_by}"
|
|
841
|
+
# 6
|
|
842
|
+
for imp in self.imports or []:
|
|
843
|
+
yield f"import: {imp}"
|
|
844
|
+
# 7
|
|
845
|
+
for subset, subset_remark in self.subsetdefs or []:
|
|
846
|
+
yield f'subsetdef: {reference_escape(subset, ontology_prefix=self.ontology)} "{subset_remark}"'
|
|
847
|
+
# 8
|
|
848
|
+
for synonym_typedef in sorted(self.synonym_typedefs or []):
|
|
849
|
+
if synonym_typedef.curie == DEFAULT_SYNONYM_TYPE.curie:
|
|
850
|
+
continue
|
|
851
|
+
yield synonym_typedef.to_obo(ontology_prefix=self.ontology)
|
|
852
|
+
# 9 TODO default-namespace
|
|
853
|
+
# 10 TODO namespace-id-rule
|
|
854
|
+
# 11
|
|
855
|
+
for prefix, url in sorted(self._get_clean_idspaces().items()):
|
|
856
|
+
if prefix in DEFAULT_PREFIX_MAP:
|
|
857
|
+
# we don't need to write out the 4 default prefixes from
|
|
858
|
+
# table 2 in https://www.w3.org/TR/owl2-syntax/#IRIs since
|
|
859
|
+
# they're considered to always be builtin
|
|
860
|
+
continue
|
|
680
861
|
|
|
681
|
-
|
|
682
|
-
|
|
862
|
+
# additional assumptions about built in
|
|
863
|
+
if prefix in {"obo", "oboInOwl"}:
|
|
864
|
+
continue
|
|
683
865
|
|
|
684
|
-
|
|
685
|
-
|
|
866
|
+
# ROBOT assumes that all OBO foundry prefixes are builtin,
|
|
867
|
+
# so don't re-declare them
|
|
868
|
+
if bioregistry.is_obo_foundry(prefix):
|
|
686
869
|
continue
|
|
687
|
-
yield synonym_typedef.to_obo()
|
|
688
870
|
|
|
871
|
+
yv = f"idspace: {prefix} {url}"
|
|
872
|
+
if _yv_name := bioregistry.get_name(prefix):
|
|
873
|
+
yv += f' "{_yv_name}"'
|
|
874
|
+
yield yv
|
|
875
|
+
# 12-15 are handled only during reading, and
|
|
876
|
+
# PyOBO unmacros things before outputting
|
|
877
|
+
# 12 treat-xrefs-as-equivalent
|
|
878
|
+
# 13 treat-xrefs-as-genus-differentia
|
|
879
|
+
# 14 treat-xrefs-as-relationship
|
|
880
|
+
# 15 treat-xrefs-as-is_a
|
|
881
|
+
# 16 TODO remark
|
|
882
|
+
# 17
|
|
689
883
|
yield f"ontology: {self.ontology}"
|
|
884
|
+
# 18 (secret)
|
|
885
|
+
yield from self._iterate_properties()
|
|
886
|
+
|
|
887
|
+
typedefs = self._index_typedefs()
|
|
888
|
+
synonym_typedefs = self._index_synonym_typedefs()
|
|
889
|
+
|
|
890
|
+
# PROPERTIES
|
|
891
|
+
for typedef in sorted(self.typedefs or []):
|
|
892
|
+
yield from typedef.iterate_obo_lines(
|
|
893
|
+
ontology_prefix=self.ontology,
|
|
894
|
+
typedefs=typedefs,
|
|
895
|
+
synonym_typedefs=synonym_typedefs,
|
|
896
|
+
)
|
|
690
897
|
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
898
|
+
# TERMS AND INSTANCES
|
|
899
|
+
for term in self._iter_terms():
|
|
900
|
+
yield from term.iterate_obo_lines(
|
|
901
|
+
ontology_prefix=self.ontology,
|
|
902
|
+
typedefs=typedefs,
|
|
903
|
+
synonym_typedefs=synonym_typedefs,
|
|
904
|
+
emit_object_properties=emit_object_properties,
|
|
905
|
+
emit_annotation_properties=emit_annotation_properties,
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
def _iterate_properties(self) -> Iterable[str]:
|
|
909
|
+
for predicate, value in self._iterate_property_pairs():
|
|
910
|
+
match value:
|
|
911
|
+
case OBOLiteral():
|
|
912
|
+
end = f'"{obo_escape_slim(value.value)}" {reference_escape(value.datatype, ontology_prefix=self.ontology)}'
|
|
913
|
+
case Reference():
|
|
914
|
+
end = reference_escape(value, ontology_prefix=self.ontology)
|
|
915
|
+
yield f"property_value: {reference_escape(predicate, ontology_prefix=self.ontology)} {end}"
|
|
916
|
+
|
|
917
|
+
def _iterate_property_pairs(self) -> Iterable[Annotation]:
|
|
918
|
+
# Title
|
|
919
|
+
if self.name:
|
|
920
|
+
yield Annotation(v.has_title, OBOLiteral.string(self.name))
|
|
921
|
+
|
|
922
|
+
# License
|
|
923
|
+
# TODO add SPDX to idspaces and use as a CURIE?
|
|
924
|
+
if license_spdx_id := bioregistry.get_license(self.ontology):
|
|
925
|
+
if license_spdx_id.startswith("http"):
|
|
926
|
+
license_literal = OBOLiteral.uri(license_spdx_id)
|
|
927
|
+
else:
|
|
928
|
+
license_literal = OBOLiteral.string(license_spdx_id)
|
|
929
|
+
yield Annotation(v.has_license, license_literal)
|
|
930
|
+
|
|
931
|
+
# Description
|
|
932
|
+
if description := bioregistry.get_description(self.ontology):
|
|
700
933
|
description = obo_escape_slim(description.strip())
|
|
701
|
-
yield
|
|
934
|
+
yield Annotation(v.has_description, OBOLiteral.string(description.strip()))
|
|
702
935
|
|
|
936
|
+
# Root terms
|
|
703
937
|
for root_term in self.root_terms or []:
|
|
704
|
-
yield
|
|
938
|
+
yield Annotation(v.has_ontology_root_term, root_term)
|
|
939
|
+
|
|
940
|
+
# Extras
|
|
941
|
+
if self.property_values:
|
|
942
|
+
yield from self.property_values
|
|
705
943
|
|
|
706
|
-
|
|
707
|
-
|
|
944
|
+
def _index_typedefs(self) -> Mapping[ReferenceTuple, TypeDef]:
|
|
945
|
+
from .typedef import default_typedefs
|
|
946
|
+
|
|
947
|
+
return ChainMap(
|
|
948
|
+
{t.pair: t for t in self.typedefs or []},
|
|
949
|
+
default_typedefs,
|
|
950
|
+
)
|
|
708
951
|
|
|
709
|
-
|
|
710
|
-
|
|
952
|
+
def _index_synonym_typedefs(self) -> Mapping[ReferenceTuple, SynonymTypeDef]:
|
|
953
|
+
return ChainMap(
|
|
954
|
+
{t.pair: t for t in self.synonym_typedefs or []},
|
|
955
|
+
default_synonym_typedefs,
|
|
956
|
+
)
|
|
711
957
|
|
|
712
958
|
def write_obo(
|
|
713
|
-
self,
|
|
959
|
+
self,
|
|
960
|
+
file: None | str | TextIO | Path = None,
|
|
961
|
+
*,
|
|
962
|
+
use_tqdm: bool = False,
|
|
963
|
+
emit_object_properties: bool = True,
|
|
964
|
+
emit_annotation_properties: bool = True,
|
|
714
965
|
) -> None:
|
|
715
966
|
"""Write the OBO to a file."""
|
|
716
|
-
it = self.iterate_obo_lines(
|
|
967
|
+
it = self.iterate_obo_lines(
|
|
968
|
+
emit_object_properties=emit_object_properties,
|
|
969
|
+
emit_annotation_properties=emit_annotation_properties,
|
|
970
|
+
)
|
|
717
971
|
if use_tqdm:
|
|
718
|
-
it = tqdm(it, desc=f"
|
|
719
|
-
if isinstance(file,
|
|
972
|
+
it = tqdm(it, desc=f"[{self.ontology}] writing OBO", unit_scale=True, unit="line")
|
|
973
|
+
if isinstance(file, str | Path | os.PathLike):
|
|
720
974
|
with open(file, "w") as fh:
|
|
721
975
|
self._write_lines(it, fh)
|
|
722
976
|
else:
|
|
723
977
|
self._write_lines(it, file)
|
|
724
978
|
|
|
725
979
|
@staticmethod
|
|
726
|
-
def _write_lines(it, file:
|
|
980
|
+
def _write_lines(it, file: TextIO | None):
|
|
727
981
|
for line in it:
|
|
728
982
|
print(line, file=file)
|
|
729
983
|
|
|
730
|
-
def write_obonet_gz(self, path:
|
|
984
|
+
def write_obonet_gz(self, path: str | Path) -> None:
|
|
731
985
|
"""Write the OBO to a gzipped dump in Obonet JSON."""
|
|
732
986
|
graph = self.to_obonet()
|
|
733
|
-
|
|
734
|
-
json.dump(nx.node_link_data(graph), file)
|
|
987
|
+
write_gzipped_graph(path=path, graph=graph)
|
|
735
988
|
|
|
736
|
-
def
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
def _cache(self, *parts: str, name: Optional[str] = None) -> Path:
|
|
740
|
-
return self._path("cache", *parts, name=name)
|
|
741
|
-
|
|
742
|
-
@property
|
|
743
|
-
def _names_path(self) -> Path:
|
|
744
|
-
return self._cache(name="names.tsv")
|
|
745
|
-
|
|
746
|
-
@property
|
|
747
|
-
def _definitions_path(self) -> Path:
|
|
748
|
-
return self._cache(name="definitions.tsv")
|
|
989
|
+
def write_ofn(self, path: str | Path) -> None:
|
|
990
|
+
"""Write as Functional OWL (OFN)."""
|
|
991
|
+
from .functional.obo_to_functional import get_ofn_from_obo
|
|
749
992
|
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
return self._cache(name="species.tsv")
|
|
993
|
+
ofn = get_ofn_from_obo(self)
|
|
994
|
+
ofn.write_funowl(path)
|
|
753
995
|
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
996
|
+
def write_rdf(self, path: str | Path) -> None:
|
|
997
|
+
"""Write as Turtle RDF."""
|
|
998
|
+
from .functional.obo_to_functional import get_ofn_from_obo
|
|
757
999
|
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
return self._cache(name="alt_ids.tsv")
|
|
1000
|
+
ofn = get_ofn_from_obo(self)
|
|
1001
|
+
ofn.write_rdf(path)
|
|
761
1002
|
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
1003
|
+
def write_nodes(self, path: str | Path) -> None:
|
|
1004
|
+
"""Write a nodes TSV file."""
|
|
1005
|
+
# TODO reimplement internally
|
|
1006
|
+
self.get_graph().get_nodes_df().to_csv(path, sep="\t", index=False)
|
|
765
1007
|
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
1008
|
+
def write_edges(self, path: str | Path) -> None:
|
|
1009
|
+
"""Write a edges TSV file."""
|
|
1010
|
+
write_iterable_tsv(
|
|
1011
|
+
path=path,
|
|
1012
|
+
header=self.edges_header,
|
|
1013
|
+
it=self.iterate_edge_rows(),
|
|
1014
|
+
)
|
|
769
1015
|
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
return self._cache(name="relations.tsv")
|
|
1016
|
+
def _path(self, *parts: str, name: str | None = None) -> Path:
|
|
1017
|
+
return prefix_directory_join(self.ontology, *parts, name=name, version=self.data_version)
|
|
773
1018
|
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
return self._cache(name="properties.tsv")
|
|
1019
|
+
def _get_cache_path(self, name: CacheArtifact) -> Path:
|
|
1020
|
+
return get_cache_path(self.ontology, name=name, version=self.data_version)
|
|
777
1021
|
|
|
778
1022
|
@property
|
|
779
1023
|
def _root_metadata_path(self) -> Path:
|
|
780
1024
|
return prefix_directory_join(self.ontology, name="metadata.json")
|
|
781
1025
|
|
|
782
|
-
@property
|
|
783
|
-
def _versioned_metadata_path(self) -> Path:
|
|
784
|
-
return self._cache(name="metadata.json")
|
|
785
|
-
|
|
786
1026
|
@property
|
|
787
1027
|
def _obo_path(self) -> Path:
|
|
788
|
-
return
|
|
1028
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.obo")
|
|
789
1029
|
|
|
790
1030
|
@property
|
|
791
1031
|
def _obograph_path(self) -> Path:
|
|
792
|
-
return self._path(name=f"{self.ontology}.json")
|
|
1032
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.json")
|
|
793
1033
|
|
|
794
1034
|
@property
|
|
795
1035
|
def _owl_path(self) -> Path:
|
|
796
|
-
return self._path(name=f"{self.ontology}.owl")
|
|
1036
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.owl")
|
|
797
1037
|
|
|
798
1038
|
@property
|
|
799
1039
|
def _obonet_gz_path(self) -> Path:
|
|
800
|
-
return self._path(name=f"{self.ontology}.obonet.json.gz")
|
|
1040
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.obonet.json.gz")
|
|
801
1041
|
|
|
802
1042
|
@property
|
|
803
|
-
def
|
|
804
|
-
return self._path(name=f"{self.ontology}.
|
|
805
|
-
|
|
806
|
-
def write_default(
|
|
807
|
-
self,
|
|
808
|
-
use_tqdm: bool = False,
|
|
809
|
-
force: bool = False,
|
|
810
|
-
write_obo: bool = False,
|
|
811
|
-
write_obonet: bool = False,
|
|
812
|
-
write_obograph: bool = False,
|
|
813
|
-
write_owl: bool = False,
|
|
814
|
-
write_nodes: bool = False,
|
|
815
|
-
) -> None:
|
|
816
|
-
"""Write the OBO to the default path."""
|
|
817
|
-
metadata = self.get_metadata()
|
|
818
|
-
for path in (self._root_metadata_path, self._versioned_metadata_path):
|
|
819
|
-
logger.debug("[%s v%s] caching metadata to %s", self.ontology, self.data_version, path)
|
|
820
|
-
with path.open("w") as file:
|
|
821
|
-
json.dump(metadata, file, indent=2)
|
|
1043
|
+
def _ofn_path(self) -> Path:
|
|
1044
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.ofn")
|
|
822
1045
|
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
)
|
|
826
|
-
typedef_df: pd.DataFrame = self.get_typedef_df()
|
|
827
|
-
typedef_df.sort_values(list(typedef_df.columns), inplace=True)
|
|
828
|
-
typedef_df.to_csv(self._typedefs_path, sep="\t", index=False)
|
|
1046
|
+
@property
|
|
1047
|
+
def _ttl_path(self) -> Path:
|
|
1048
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.ttl")
|
|
829
1049
|
|
|
830
|
-
|
|
831
|
-
|
|
1050
|
+
def _get_cache_config(self) -> list[tuple[CacheArtifact, Sequence[str], Callable]]:
|
|
1051
|
+
return [
|
|
1052
|
+
(CacheArtifact.names, [f"{self.ontology}_id", "name"], self.iterate_id_name),
|
|
832
1053
|
(
|
|
833
|
-
|
|
834
|
-
self._definitions_path,
|
|
1054
|
+
CacheArtifact.definitions,
|
|
835
1055
|
[f"{self.ontology}_id", "definition"],
|
|
836
1056
|
self.iterate_id_definition,
|
|
837
1057
|
),
|
|
838
1058
|
(
|
|
839
|
-
|
|
840
|
-
self._species_path,
|
|
1059
|
+
CacheArtifact.species,
|
|
841
1060
|
[f"{self.ontology}_id", "taxonomy_id"],
|
|
842
1061
|
self.iterate_id_species,
|
|
843
1062
|
),
|
|
844
1063
|
(
|
|
845
|
-
|
|
846
|
-
|
|
1064
|
+
# TODO deprecate this in favor of literal mappings output
|
|
1065
|
+
CacheArtifact.synonyms,
|
|
847
1066
|
[f"{self.ontology}_id", "synonym"],
|
|
848
1067
|
self.iterate_synonym_rows,
|
|
849
1068
|
),
|
|
850
|
-
(
|
|
851
|
-
(
|
|
852
|
-
(
|
|
853
|
-
(
|
|
854
|
-
|
|
1069
|
+
(CacheArtifact.alts, [f"{self.ontology}_id", "alt_id"], self.iterate_alt_rows),
|
|
1070
|
+
(CacheArtifact.mappings, SSSOM_DF_COLUMNS, self.iterate_mapping_rows),
|
|
1071
|
+
(CacheArtifact.relations, self.relations_header, self.iter_relation_rows),
|
|
1072
|
+
(CacheArtifact.edges, self.edges_header, self.iterate_edge_rows),
|
|
1073
|
+
(
|
|
1074
|
+
# TODO deprecate this in favor of pair of literal and object properties
|
|
1075
|
+
CacheArtifact.properties,
|
|
1076
|
+
self.properties_header,
|
|
1077
|
+
self._iter_property_rows,
|
|
1078
|
+
),
|
|
1079
|
+
(
|
|
1080
|
+
CacheArtifact.object_properties,
|
|
1081
|
+
self.object_properties_header,
|
|
1082
|
+
self.iter_object_properties,
|
|
1083
|
+
),
|
|
1084
|
+
(
|
|
1085
|
+
CacheArtifact.literal_properties,
|
|
1086
|
+
self.literal_properties_header,
|
|
1087
|
+
self.iter_literal_properties,
|
|
1088
|
+
),
|
|
1089
|
+
(
|
|
1090
|
+
CacheArtifact.literal_mappings,
|
|
1091
|
+
ssslm.LiteralMappingTuple._fields,
|
|
1092
|
+
self.iterate_literal_mapping_rows,
|
|
1093
|
+
),
|
|
1094
|
+
]
|
|
1095
|
+
|
|
1096
|
+
def write_metadata(self) -> None:
|
|
1097
|
+
"""Write the metadata JSON file."""
|
|
1098
|
+
metadata = self.get_metadata()
|
|
1099
|
+
for path in (self._root_metadata_path, self._get_cache_path(CacheArtifact.metadata)):
|
|
1100
|
+
logger.debug("[%s v%s] caching metadata to %s", self.ontology, self.data_version, path)
|
|
1101
|
+
with path.open("w") as file:
|
|
1102
|
+
json.dump(metadata, file, indent=2)
|
|
1103
|
+
|
|
1104
|
+
def write_prefix_map(self) -> None:
|
|
1105
|
+
"""Write a prefix map file that includes all prefixes used in this ontology."""
|
|
1106
|
+
with self._get_cache_path(CacheArtifact.prefixes).open("w") as file:
|
|
1107
|
+
json.dump(self._get_clean_idspaces(), file, indent=2)
|
|
1108
|
+
|
|
1109
|
+
def write_cache(self, *, force: bool = False) -> None:
|
|
1110
|
+
"""Write cache parts."""
|
|
1111
|
+
typedefs_path = self._get_cache_path(CacheArtifact.typedefs)
|
|
1112
|
+
logger.debug(
|
|
1113
|
+
"[%s v%s] caching typedefs to %s",
|
|
1114
|
+
self.ontology,
|
|
1115
|
+
self.data_version,
|
|
1116
|
+
typedefs_path,
|
|
1117
|
+
)
|
|
1118
|
+
typedef_df: pd.DataFrame = self.get_typedef_df()
|
|
1119
|
+
typedef_df.sort_values(list(typedef_df.columns), inplace=True)
|
|
1120
|
+
typedef_df.to_csv(typedefs_path, sep="\t", index=False)
|
|
1121
|
+
|
|
1122
|
+
for cache_artifact, header, fn in self._get_cache_config():
|
|
1123
|
+
path = self._get_cache_path(cache_artifact)
|
|
855
1124
|
if path.exists() and not force:
|
|
856
1125
|
continue
|
|
857
|
-
|
|
1126
|
+
tqdm.write(
|
|
1127
|
+
f"[{self.ontology} {self.data_version}] writing {cache_artifact.name} to {path}",
|
|
1128
|
+
)
|
|
858
1129
|
write_iterable_tsv(
|
|
859
1130
|
path=path,
|
|
860
1131
|
header=header,
|
|
861
1132
|
it=fn(), # type:ignore
|
|
862
1133
|
)
|
|
863
1134
|
|
|
864
|
-
|
|
865
|
-
|
|
1135
|
+
typedefs = self._index_typedefs()
|
|
1136
|
+
for relation in (v.is_a, v.has_part, v.part_of, v.from_species, v.orthologous):
|
|
1137
|
+
if relation is not v.is_a and relation.pair not in typedefs:
|
|
866
1138
|
continue
|
|
867
|
-
relations_path =
|
|
1139
|
+
relations_path = get_relation_cache_path(
|
|
1140
|
+
self.ontology, reference=relation, version=self.data_version
|
|
1141
|
+
)
|
|
868
1142
|
if relations_path.exists() and not force:
|
|
869
1143
|
continue
|
|
870
1144
|
logger.debug(
|
|
@@ -880,36 +1154,83 @@ class Obo:
|
|
|
880
1154
|
relation_df.sort_values(list(relation_df.columns), inplace=True)
|
|
881
1155
|
relation_df.to_csv(relations_path, sep="\t", index=False)
|
|
882
1156
|
|
|
883
|
-
|
|
1157
|
+
def write_default(
|
|
1158
|
+
self,
|
|
1159
|
+
use_tqdm: bool = False,
|
|
1160
|
+
force: bool = False,
|
|
1161
|
+
write_obo: bool = False,
|
|
1162
|
+
write_obonet: bool = False,
|
|
1163
|
+
write_obograph: bool = False,
|
|
1164
|
+
write_owl: bool = False,
|
|
1165
|
+
write_ofn: bool = False,
|
|
1166
|
+
write_ttl: bool = False,
|
|
1167
|
+
write_nodes: bool = True,
|
|
1168
|
+
write_edges: bool = True,
|
|
1169
|
+
obograph_use_internal: bool = False,
|
|
1170
|
+
write_cache: bool = True,
|
|
1171
|
+
) -> None:
|
|
1172
|
+
"""Write the OBO to the default path."""
|
|
1173
|
+
self.write_metadata()
|
|
1174
|
+
self.write_prefix_map()
|
|
1175
|
+
if write_cache:
|
|
1176
|
+
self.write_cache(force=force)
|
|
1177
|
+
if write_obo and (not self._obo_path.exists() or force):
|
|
1178
|
+
tqdm.write(f"[{self.ontology}] writing OBO to {self._obo_path}")
|
|
884
1179
|
self.write_obo(self._obo_path, use_tqdm=use_tqdm)
|
|
1180
|
+
if (write_ofn or write_owl or write_obograph) and (not self._ofn_path.exists() or force):
|
|
1181
|
+
tqdm.write(f"[{self.ontology}] writing OFN to {self._ofn_path}")
|
|
1182
|
+
self.write_ofn(self._ofn_path)
|
|
885
1183
|
if write_obograph and (not self._obograph_path.exists() or force):
|
|
886
|
-
|
|
1184
|
+
if obograph_use_internal:
|
|
1185
|
+
tqdm.write(f"[{self.ontology}] writing OBO Graph to {self._obograph_path}")
|
|
1186
|
+
self.write_obograph(self._obograph_path)
|
|
1187
|
+
else:
|
|
1188
|
+
import bioontologies.robot
|
|
1189
|
+
|
|
1190
|
+
tqdm.write(
|
|
1191
|
+
f"[{self.ontology}] converting OFN to OBO Graph at {self._obograph_path}"
|
|
1192
|
+
)
|
|
1193
|
+
bioontologies.robot.convert(
|
|
1194
|
+
self._ofn_path, self._obograph_path, debug=True, merge=False, reason=False
|
|
1195
|
+
)
|
|
887
1196
|
if write_owl and (not self._owl_path.exists() or force):
|
|
888
|
-
|
|
1197
|
+
tqdm.write(f"[{self.ontology}] writing OWL to {self._owl_path}")
|
|
1198
|
+
import bioontologies.robot
|
|
1199
|
+
|
|
1200
|
+
bioontologies.robot.convert(
|
|
1201
|
+
self._ofn_path, self._owl_path, debug=True, merge=False, reason=False
|
|
1202
|
+
)
|
|
1203
|
+
if write_ttl and (not self._ttl_path.exists() or force):
|
|
1204
|
+
tqdm.write(f"[{self.ontology}] writing Turtle to {self._ttl_path}")
|
|
1205
|
+
self.write_rdf(self._ttl_path)
|
|
889
1206
|
if write_obonet and (not self._obonet_gz_path.exists() or force):
|
|
890
|
-
|
|
1207
|
+
tqdm.write(f"[{self.ontology}] writing obonet to {self._obonet_gz_path}")
|
|
891
1208
|
self.write_obonet_gz(self._obonet_gz_path)
|
|
892
1209
|
if write_nodes:
|
|
893
|
-
self.
|
|
1210
|
+
nodes_path = self._get_cache_path(CacheArtifact.nodes)
|
|
1211
|
+
tqdm.write(f"[{self.ontology}] writing nodes TSV to {nodes_path}")
|
|
1212
|
+
self.write_nodes(nodes_path)
|
|
894
1213
|
|
|
895
1214
|
@property
|
|
896
|
-
def _items_accessor(self):
|
|
1215
|
+
def _items_accessor(self) -> list[Term]:
|
|
897
1216
|
if self._items is None:
|
|
898
|
-
key
|
|
899
|
-
self._items = sorted(
|
|
1217
|
+
# if the term sort key is None, then the terms get sorted by their reference
|
|
1218
|
+
self._items = sorted(
|
|
1219
|
+
self.iter_terms(force=self.force),
|
|
1220
|
+
)
|
|
900
1221
|
return self._items
|
|
901
1222
|
|
|
902
|
-
def __iter__(self) -> Iterator[
|
|
903
|
-
|
|
904
|
-
return iter(self.iter_terms(force=self.force))
|
|
905
|
-
return iter(self._items_accessor)
|
|
1223
|
+
def __iter__(self) -> Iterator[Term]:
|
|
1224
|
+
yield from self._iter_terms_safe()
|
|
906
1225
|
|
|
907
1226
|
def ancestors(self, identifier: str) -> set[str]:
|
|
908
1227
|
"""Return a set of identifiers for parents of the given identifier."""
|
|
1228
|
+
# FIXME switch to references
|
|
909
1229
|
return nx.descendants(self.hierarchy, identifier) # note this is backwards
|
|
910
1230
|
|
|
911
1231
|
def descendants(self, identifier: str) -> set[str]:
|
|
912
1232
|
"""Return a set of identifiers for the children of the given identifier."""
|
|
1233
|
+
# FIXME switch to references
|
|
913
1234
|
return nx.ancestors(self.hierarchy, identifier) # note this is backwards
|
|
914
1235
|
|
|
915
1236
|
def is_descendant(self, descendant: str, ancestor: str) -> bool:
|
|
@@ -917,9 +1238,9 @@ class Obo:
|
|
|
917
1238
|
|
|
918
1239
|
.. code-block:: python
|
|
919
1240
|
|
|
920
|
-
from pyobo import
|
|
1241
|
+
from pyobo import get_ontology
|
|
921
1242
|
|
|
922
|
-
obo =
|
|
1243
|
+
obo = get_ontology("go")
|
|
923
1244
|
|
|
924
1245
|
interleukin_10_complex = "1905571" # interleukin-10 receptor complex
|
|
925
1246
|
all_complexes = "0032991"
|
|
@@ -935,21 +1256,22 @@ class Obo:
|
|
|
935
1256
|
|
|
936
1257
|
.. code-block:: python
|
|
937
1258
|
|
|
938
|
-
from pyobo import
|
|
1259
|
+
from pyobo import get_ontology
|
|
939
1260
|
|
|
940
|
-
obo =
|
|
1261
|
+
obo = get_ontology("go")
|
|
941
1262
|
|
|
942
1263
|
identifier = "1905571" # interleukin-10 receptor complex
|
|
943
1264
|
is_complex = "0032991" in nx.descendants(obo.hierarchy, identifier) # should be true
|
|
944
1265
|
"""
|
|
945
1266
|
if self._hierarchy is None:
|
|
946
1267
|
self._hierarchy = nx.DiGraph()
|
|
947
|
-
for
|
|
948
|
-
for parent in
|
|
949
|
-
|
|
1268
|
+
for stanza in self._iter_stanzas(desc=f"[{self.ontology}] getting hierarchy"):
|
|
1269
|
+
for parent in stanza.parents:
|
|
1270
|
+
# FIXME add referneces
|
|
1271
|
+
self._hierarchy.add_edge(stanza.identifier, parent.identifier)
|
|
950
1272
|
return self._hierarchy
|
|
951
1273
|
|
|
952
|
-
def to_obonet(self:
|
|
1274
|
+
def to_obonet(self: Obo, *, use_tqdm: bool = False) -> nx.MultiDiGraph:
|
|
953
1275
|
"""Export as a :mod`obonet` style graph."""
|
|
954
1276
|
rv = nx.MultiDiGraph()
|
|
955
1277
|
rv.graph.update(
|
|
@@ -957,46 +1279,56 @@ class Obo:
|
|
|
957
1279
|
"name": self.name,
|
|
958
1280
|
"ontology": self.ontology,
|
|
959
1281
|
"auto-generated-by": self.auto_generated_by,
|
|
960
|
-
"
|
|
961
|
-
"format-version": self.format_version,
|
|
1282
|
+
"format-version": FORMAT_VERSION,
|
|
962
1283
|
"data-version": self.data_version,
|
|
963
|
-
"synonymtypedef": _convert_synonym_typedefs(self.synonym_typedefs),
|
|
964
1284
|
"date": self.date_formatted,
|
|
1285
|
+
"typedefs": [typedef.reference.model_dump() for typedef in self.typedefs or []],
|
|
1286
|
+
"synonymtypedef": [
|
|
1287
|
+
synonym_typedef.to_obo(ontology_prefix=self.ontology)
|
|
1288
|
+
for synonym_typedef in self.synonym_typedefs or []
|
|
1289
|
+
],
|
|
965
1290
|
}
|
|
966
1291
|
)
|
|
967
1292
|
|
|
968
1293
|
nodes = {}
|
|
1294
|
+
#: a list of 3-tuples u,v,k
|
|
969
1295
|
links = []
|
|
970
|
-
|
|
1296
|
+
typedefs = self._index_typedefs()
|
|
1297
|
+
synonym_typedefs = self._index_synonym_typedefs()
|
|
1298
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
|
|
971
1299
|
parents = []
|
|
972
|
-
for parent in
|
|
1300
|
+
for parent in stanza.parents:
|
|
973
1301
|
if parent is None:
|
|
974
1302
|
raise ValueError("parent should not be none!")
|
|
975
|
-
links.append((
|
|
1303
|
+
links.append((stanza.curie, "is_a", parent.curie))
|
|
976
1304
|
parents.append(parent.curie)
|
|
977
1305
|
|
|
978
1306
|
relations = []
|
|
979
|
-
for typedef, target in
|
|
980
|
-
if target is None:
|
|
981
|
-
raise ValueError("target should not be none!")
|
|
1307
|
+
for typedef, target in stanza.iterate_relations():
|
|
982
1308
|
relations.append(f"{typedef.curie} {target.curie}")
|
|
983
|
-
links.append((
|
|
1309
|
+
links.append((stanza.curie, typedef.curie, target.curie))
|
|
1310
|
+
|
|
1311
|
+
for typedef, targets in sorted(stanza.properties.items()):
|
|
1312
|
+
for target_or_literal in targets:
|
|
1313
|
+
if isinstance(target_or_literal, curies.Reference):
|
|
1314
|
+
links.append((stanza.curie, typedef.curie, target_or_literal.curie))
|
|
984
1315
|
|
|
985
1316
|
d = {
|
|
986
|
-
"id":
|
|
987
|
-
"name":
|
|
988
|
-
"def":
|
|
989
|
-
"xref": [xref.curie for xref in
|
|
1317
|
+
"id": stanza.curie,
|
|
1318
|
+
"name": stanza.name,
|
|
1319
|
+
"def": stanza.definition and stanza._definition_fp(),
|
|
1320
|
+
"xref": [xref.curie for xref in stanza.xrefs],
|
|
990
1321
|
"is_a": parents,
|
|
991
1322
|
"relationship": relations,
|
|
992
|
-
"synonym": [
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
for prop, values in term.properties.items()
|
|
996
|
-
for value in values
|
|
1323
|
+
"synonym": [
|
|
1324
|
+
synonym._fp(ontology_prefix=self.ontology, synonym_typedefs=synonym_typedefs)
|
|
1325
|
+
for synonym in stanza.synonyms
|
|
997
1326
|
],
|
|
1327
|
+
"property_value": list(
|
|
1328
|
+
stanza._iterate_obo_properties(ontology_prefix=self.ontology, typedefs=typedefs)
|
|
1329
|
+
),
|
|
998
1330
|
}
|
|
999
|
-
nodes[
|
|
1331
|
+
nodes[stanza.curie] = {k: v for k, v in d.items() if v}
|
|
1000
1332
|
|
|
1001
1333
|
rv.add_nodes_from(nodes.items())
|
|
1002
1334
|
for _source, _key, _target in links:
|
|
@@ -1017,11 +1349,21 @@ class Obo:
|
|
|
1017
1349
|
"date": self.date and self.date.isoformat(),
|
|
1018
1350
|
}
|
|
1019
1351
|
|
|
1352
|
+
def iterate_references(self, *, use_tqdm: bool = False) -> Iterable[Reference]:
|
|
1353
|
+
"""Iterate over identifiers."""
|
|
1354
|
+
for stanza in self._iter_stanzas(
|
|
1355
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting identifiers"
|
|
1356
|
+
):
|
|
1357
|
+
if self._in_ontology(stanza.reference):
|
|
1358
|
+
yield stanza.reference
|
|
1359
|
+
|
|
1020
1360
|
def iterate_ids(self, *, use_tqdm: bool = False) -> Iterable[str]:
|
|
1021
1361
|
"""Iterate over identifiers."""
|
|
1022
|
-
for
|
|
1023
|
-
|
|
1024
|
-
|
|
1362
|
+
for stanza in self._iter_stanzas(
|
|
1363
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting identifiers"
|
|
1364
|
+
):
|
|
1365
|
+
if self._in_ontology_strict(stanza.reference):
|
|
1366
|
+
yield stanza.identifier
|
|
1025
1367
|
|
|
1026
1368
|
def get_ids(self, *, use_tqdm: bool = False) -> set[str]:
|
|
1027
1369
|
"""Get the set of identifiers."""
|
|
@@ -1029,9 +1371,11 @@ class Obo:
|
|
|
1029
1371
|
|
|
1030
1372
|
def iterate_id_name(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
|
|
1031
1373
|
"""Iterate identifier name pairs."""
|
|
1032
|
-
for
|
|
1033
|
-
|
|
1034
|
-
|
|
1374
|
+
for stanza in self._iter_stanzas(
|
|
1375
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"
|
|
1376
|
+
):
|
|
1377
|
+
if self._in_ontology(stanza.reference) and stanza.name:
|
|
1378
|
+
yield stanza.identifier, stanza.name
|
|
1035
1379
|
|
|
1036
1380
|
def get_id_name_mapping(self, *, use_tqdm: bool = False) -> Mapping[str, str]:
|
|
1037
1381
|
"""Get a mapping from identifiers to names."""
|
|
@@ -1039,11 +1383,13 @@ class Obo:
|
|
|
1039
1383
|
|
|
1040
1384
|
def iterate_id_definition(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
|
|
1041
1385
|
"""Iterate over pairs of terms' identifiers and their respective definitions."""
|
|
1042
|
-
for
|
|
1043
|
-
|
|
1386
|
+
for stanza in self._iter_stanzas(
|
|
1387
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"
|
|
1388
|
+
):
|
|
1389
|
+
if stanza.identifier and stanza.definition:
|
|
1044
1390
|
yield (
|
|
1045
|
-
|
|
1046
|
-
|
|
1391
|
+
stanza.identifier,
|
|
1392
|
+
stanza.definition.strip('"')
|
|
1047
1393
|
.replace("\n", " ")
|
|
1048
1394
|
.replace("\t", " ")
|
|
1049
1395
|
.replace(" ", " "),
|
|
@@ -1056,11 +1402,11 @@ class Obo:
|
|
|
1056
1402
|
def get_obsolete(self, *, use_tqdm: bool = False) -> set[str]:
|
|
1057
1403
|
"""Get the set of obsolete identifiers."""
|
|
1058
1404
|
return {
|
|
1059
|
-
|
|
1060
|
-
for
|
|
1405
|
+
stanza.identifier
|
|
1406
|
+
for stanza in self._iter_stanzas(
|
|
1061
1407
|
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting obsolete"
|
|
1062
1408
|
)
|
|
1063
|
-
if
|
|
1409
|
+
if stanza.identifier and stanza.is_obsolete
|
|
1064
1410
|
}
|
|
1065
1411
|
|
|
1066
1412
|
############
|
|
@@ -1068,18 +1414,19 @@ class Obo:
|
|
|
1068
1414
|
############
|
|
1069
1415
|
|
|
1070
1416
|
def iterate_id_species(
|
|
1071
|
-
self, *, prefix:
|
|
1417
|
+
self, *, prefix: str | None = None, use_tqdm: bool = False
|
|
1072
1418
|
) -> Iterable[tuple[str, str]]:
|
|
1073
1419
|
"""Iterate over terms' identifiers and respective species (if available)."""
|
|
1074
1420
|
if prefix is None:
|
|
1075
1421
|
prefix = NCBITAXON_PREFIX
|
|
1076
|
-
for
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1422
|
+
for stanza in self._iter_stanzas(
|
|
1423
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting species"
|
|
1424
|
+
):
|
|
1425
|
+
if isinstance(stanza, Term) and (species := stanza.get_species(prefix=prefix)):
|
|
1426
|
+
yield stanza.identifier, species.identifier
|
|
1080
1427
|
|
|
1081
1428
|
def get_id_species_mapping(
|
|
1082
|
-
self, *, prefix:
|
|
1429
|
+
self, *, prefix: str | None = None, use_tqdm: bool = False
|
|
1083
1430
|
) -> Mapping[str, str]:
|
|
1084
1431
|
"""Get a mapping from identifiers to species."""
|
|
1085
1432
|
return dict(self.iterate_id_species(prefix=prefix, use_tqdm=use_tqdm))
|
|
@@ -1109,42 +1456,103 @@ class Obo:
|
|
|
1109
1456
|
# PROPS #
|
|
1110
1457
|
#########
|
|
1111
1458
|
|
|
1112
|
-
def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[
|
|
1459
|
+
def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[Stanza, Annotation]]:
|
|
1113
1460
|
"""Iterate over tuples of terms, properties, and their values."""
|
|
1114
|
-
|
|
1115
|
-
for term in self._iter_terms(
|
|
1461
|
+
for stanza in self._iter_stanzas(
|
|
1116
1462
|
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting properties"
|
|
1117
1463
|
):
|
|
1118
|
-
for
|
|
1119
|
-
yield
|
|
1464
|
+
for property_tuple in stanza.get_property_annotations():
|
|
1465
|
+
yield stanza, property_tuple
|
|
1120
1466
|
|
|
1121
1467
|
@property
|
|
1122
1468
|
def properties_header(self):
|
|
1123
1469
|
"""Property dataframe header."""
|
|
1124
|
-
return [f"{self.ontology}_id", "property", "value"]
|
|
1470
|
+
return [f"{self.ontology}_id", "property", "value", "datatype", "language"]
|
|
1471
|
+
|
|
1472
|
+
@property
|
|
1473
|
+
def object_properties_header(self):
|
|
1474
|
+
"""Property dataframe header."""
|
|
1475
|
+
return ["source", "predicate", "target"]
|
|
1476
|
+
|
|
1477
|
+
@property
|
|
1478
|
+
def literal_properties_header(self):
|
|
1479
|
+
"""Property dataframe header."""
|
|
1480
|
+
return ["source", "predicate", "target", "datatype", "language"]
|
|
1125
1481
|
|
|
1126
|
-
def
|
|
1482
|
+
def _iter_property_rows(
|
|
1483
|
+
self, *, use_tqdm: bool = False
|
|
1484
|
+
) -> Iterable[tuple[str, str, str, str, str]]:
|
|
1127
1485
|
"""Iterate property rows."""
|
|
1128
|
-
for term,
|
|
1129
|
-
|
|
1486
|
+
for term, t in self.iterate_properties(use_tqdm=use_tqdm):
|
|
1487
|
+
pred = term._reference(t.predicate, ontology_prefix=self.ontology)
|
|
1488
|
+
match t.value:
|
|
1489
|
+
case OBOLiteral(value, datatype, language):
|
|
1490
|
+
yield (
|
|
1491
|
+
term.identifier,
|
|
1492
|
+
pred,
|
|
1493
|
+
value,
|
|
1494
|
+
get_preferred_curie(datatype),
|
|
1495
|
+
language or "",
|
|
1496
|
+
)
|
|
1497
|
+
case Reference() as obj:
|
|
1498
|
+
yield term.identifier, pred, get_preferred_curie(obj), "", ""
|
|
1499
|
+
case _:
|
|
1500
|
+
raise TypeError(f"got: {type(t)} - {t}")
|
|
1501
|
+
|
|
1502
|
+
def get_properties_df(self, *, use_tqdm: bool = False, drop_na: bool = True) -> pd.DataFrame:
|
|
1503
|
+
"""Get all properties as a dataframe."""
|
|
1504
|
+
df = pd.DataFrame(
|
|
1505
|
+
self._iter_property_rows(use_tqdm=use_tqdm),
|
|
1506
|
+
columns=self.properties_header,
|
|
1507
|
+
)
|
|
1508
|
+
if drop_na:
|
|
1509
|
+
df.dropna(inplace=True)
|
|
1510
|
+
return df
|
|
1511
|
+
|
|
1512
|
+
def iter_object_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
|
|
1513
|
+
"""Iterate over object property triples."""
|
|
1514
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
|
|
1515
|
+
for predicate, target in stanza.iterate_object_properties():
|
|
1516
|
+
yield stanza.curie, predicate.curie, target.curie
|
|
1130
1517
|
|
|
1131
|
-
def
|
|
1518
|
+
def get_object_properties_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
|
|
1132
1519
|
"""Get all properties as a dataframe."""
|
|
1133
1520
|
return pd.DataFrame(
|
|
1134
|
-
|
|
1135
|
-
columns=self.properties_header,
|
|
1521
|
+
self.iter_object_properties(use_tqdm=use_tqdm), columns=self.object_properties_header
|
|
1136
1522
|
)
|
|
1137
1523
|
|
|
1524
|
+
def iter_literal_properties(
|
|
1525
|
+
self, *, use_tqdm: bool = False
|
|
1526
|
+
) -> Iterable[tuple[str, str, str, str, str]]:
|
|
1527
|
+
"""Iterate over literal properties quads."""
|
|
1528
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
|
|
1529
|
+
for predicate, target in stanza.iterate_literal_properties():
|
|
1530
|
+
yield (
|
|
1531
|
+
stanza.curie,
|
|
1532
|
+
predicate.curie,
|
|
1533
|
+
target.value,
|
|
1534
|
+
target.datatype.curie,
|
|
1535
|
+
target.language or "",
|
|
1536
|
+
)
|
|
1537
|
+
|
|
1538
|
+
def get_literal_properties_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
|
|
1539
|
+
"""Get all properties as a dataframe."""
|
|
1540
|
+
return pd.DataFrame(self.iter_literal_properties(), columns=self.literal_properties_header)
|
|
1541
|
+
|
|
1138
1542
|
def iterate_filtered_properties(
|
|
1139
|
-
self, prop:
|
|
1140
|
-
) -> Iterable[tuple[
|
|
1543
|
+
self, prop: ReferenceHint, *, use_tqdm: bool = False
|
|
1544
|
+
) -> Iterable[tuple[Stanza, str]]:
|
|
1141
1545
|
"""Iterate over tuples of terms and the values for the given property."""
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1546
|
+
prop = _ensure_ref(prop)
|
|
1547
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
|
|
1548
|
+
for t in stanza.get_property_annotations():
|
|
1549
|
+
if t.predicate != prop:
|
|
1550
|
+
continue
|
|
1551
|
+
yield stanza, reference_or_literal_to_str(t.value)
|
|
1552
|
+
|
|
1553
|
+
def get_filtered_properties_df(
|
|
1554
|
+
self, prop: ReferenceHint, *, use_tqdm: bool = False
|
|
1555
|
+
) -> pd.DataFrame:
|
|
1148
1556
|
"""Get a dataframe of terms' identifiers to the given property's values."""
|
|
1149
1557
|
return pd.DataFrame(
|
|
1150
1558
|
list(self.get_filtered_properties_mapping(prop, use_tqdm=use_tqdm).items()),
|
|
@@ -1152,7 +1560,7 @@ class Obo:
|
|
|
1152
1560
|
)
|
|
1153
1561
|
|
|
1154
1562
|
def get_filtered_properties_mapping(
|
|
1155
|
-
self, prop:
|
|
1563
|
+
self, prop: ReferenceHint, *, use_tqdm: bool = False
|
|
1156
1564
|
) -> Mapping[str, str]:
|
|
1157
1565
|
"""Get a mapping from a term's identifier to the property.
|
|
1158
1566
|
|
|
@@ -1164,7 +1572,7 @@ class Obo:
|
|
|
1164
1572
|
}
|
|
1165
1573
|
|
|
1166
1574
|
def get_filtered_properties_multimapping(
|
|
1167
|
-
self, prop:
|
|
1575
|
+
self, prop: ReferenceHint, *, use_tqdm: bool = False
|
|
1168
1576
|
) -> Mapping[str, list[str]]:
|
|
1169
1577
|
"""Get a mapping from a term's identifier to the property values."""
|
|
1170
1578
|
return multidict(
|
|
@@ -1176,22 +1584,63 @@ class Obo:
|
|
|
1176
1584
|
# RELATIONS #
|
|
1177
1585
|
#############
|
|
1178
1586
|
|
|
1587
|
+
def iterate_edges(
|
|
1588
|
+
self, *, use_tqdm: bool = False
|
|
1589
|
+
) -> Iterable[tuple[Stanza, TypeDef, Reference]]:
|
|
1590
|
+
"""Iterate over triples of terms, relations, and their targets."""
|
|
1591
|
+
_warned: set[ReferenceTuple] = set()
|
|
1592
|
+
typedefs = self._index_typedefs()
|
|
1593
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm, desc=f"[{self.ontology}] edge"):
|
|
1594
|
+
for predicate, reference in stanza._iter_edges():
|
|
1595
|
+
if td := self._get_typedef(stanza, predicate, _warned, typedefs):
|
|
1596
|
+
yield stanza, td, reference
|
|
1597
|
+
|
|
1598
|
+
@property
|
|
1599
|
+
def edges_header(self) -> Sequence[str]:
|
|
1600
|
+
"""Header for the edges dataframe."""
|
|
1601
|
+
return [":START_ID", ":TYPE", ":END_ID"]
|
|
1602
|
+
|
|
1179
1603
|
def iterate_relations(
|
|
1180
1604
|
self, *, use_tqdm: bool = False
|
|
1181
|
-
) -> Iterable[tuple[
|
|
1182
|
-
"""Iterate over tuples of terms, relations, and their targets.
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1605
|
+
) -> Iterable[tuple[Stanza, TypeDef, Reference]]:
|
|
1606
|
+
"""Iterate over tuples of terms, relations, and their targets.
|
|
1607
|
+
|
|
1608
|
+
This only outputs stuff from the `relationship:` tag, not
|
|
1609
|
+
all possible triples. For that, see :func:`iterate_edges`.
|
|
1610
|
+
"""
|
|
1611
|
+
_warned: set[ReferenceTuple] = set()
|
|
1612
|
+
typedefs = self._index_typedefs()
|
|
1613
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm, desc=f"[{self.ontology}] relation"):
|
|
1614
|
+
for predicate, reference in stanza.iterate_relations():
|
|
1615
|
+
if td := self._get_typedef(stanza, predicate, _warned, typedefs):
|
|
1616
|
+
yield stanza, td, reference
|
|
1617
|
+
|
|
1618
|
+
def get_edges_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
|
|
1619
|
+
"""Get an edges dataframe."""
|
|
1620
|
+
return pd.DataFrame(self.iterate_edge_rows(use_tqdm=use_tqdm), columns=self.edges_header)
|
|
1621
|
+
|
|
1622
|
+
def iterate_edge_rows(self, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
|
|
1623
|
+
"""Iterate the edge rows."""
|
|
1624
|
+
for term, typedef, reference in self.iterate_edges(use_tqdm=use_tqdm):
|
|
1625
|
+
yield term.curie, typedef.curie, reference.curie
|
|
1626
|
+
|
|
1627
|
+
def _get_typedef(
|
|
1628
|
+
self,
|
|
1629
|
+
term: Stanza,
|
|
1630
|
+
predicate: Reference,
|
|
1631
|
+
_warned: set[ReferenceTuple],
|
|
1632
|
+
typedefs: Mapping[ReferenceTuple, TypeDef],
|
|
1633
|
+
) -> TypeDef | None:
|
|
1634
|
+
pp = predicate.pair
|
|
1635
|
+
if pp in typedefs:
|
|
1636
|
+
return typedefs[pp]
|
|
1637
|
+
if pp not in _warned:
|
|
1638
|
+
_warn_string = f"[{term.curie}] undefined typedef: {pp}"
|
|
1639
|
+
if predicate.name:
|
|
1640
|
+
_warn_string += f" ({predicate.name})"
|
|
1641
|
+
logger.warning(_warn_string)
|
|
1642
|
+
_warned.add(pp)
|
|
1643
|
+
return None
|
|
1195
1644
|
|
|
1196
1645
|
def iter_relation_rows(
|
|
1197
1646
|
self, use_tqdm: bool = False
|
|
@@ -1208,14 +1657,14 @@ class Obo:
|
|
|
1208
1657
|
|
|
1209
1658
|
def iterate_filtered_relations(
|
|
1210
1659
|
self,
|
|
1211
|
-
relation:
|
|
1660
|
+
relation: ReferenceHint,
|
|
1212
1661
|
*,
|
|
1213
1662
|
use_tqdm: bool = False,
|
|
1214
|
-
) -> Iterable[tuple[
|
|
1663
|
+
) -> Iterable[tuple[Stanza, Reference]]:
|
|
1215
1664
|
"""Iterate over tuples of terms and ther targets for the given relation."""
|
|
1216
|
-
|
|
1217
|
-
for term,
|
|
1218
|
-
if
|
|
1665
|
+
_pair = _ensure_ref(relation, ontology_prefix=self.ontology).pair
|
|
1666
|
+
for term, predicate, reference in self.iterate_relations(use_tqdm=use_tqdm):
|
|
1667
|
+
if _pair == predicate.pair:
|
|
1219
1668
|
yield term, reference
|
|
1220
1669
|
|
|
1221
1670
|
@property
|
|
@@ -1232,7 +1681,7 @@ class Obo:
|
|
|
1232
1681
|
|
|
1233
1682
|
def get_filtered_relations_df(
|
|
1234
1683
|
self,
|
|
1235
|
-
relation:
|
|
1684
|
+
relation: ReferenceHint,
|
|
1236
1685
|
*,
|
|
1237
1686
|
use_tqdm: bool = False,
|
|
1238
1687
|
) -> pd.DataFrame:
|
|
@@ -1247,11 +1696,11 @@ class Obo:
|
|
|
1247
1696
|
|
|
1248
1697
|
def iterate_filtered_relations_filtered_targets(
|
|
1249
1698
|
self,
|
|
1250
|
-
relation:
|
|
1699
|
+
relation: ReferenceHint,
|
|
1251
1700
|
target_prefix: str,
|
|
1252
1701
|
*,
|
|
1253
1702
|
use_tqdm: bool = False,
|
|
1254
|
-
) -> Iterable[tuple[
|
|
1703
|
+
) -> Iterable[tuple[Stanza, Reference]]:
|
|
1255
1704
|
"""Iterate over relationships between one identifier and another."""
|
|
1256
1705
|
for term, reference in self.iterate_filtered_relations(
|
|
1257
1706
|
relation=relation, use_tqdm=use_tqdm
|
|
@@ -1261,7 +1710,7 @@ class Obo:
|
|
|
1261
1710
|
|
|
1262
1711
|
def get_relation_mapping(
|
|
1263
1712
|
self,
|
|
1264
|
-
relation:
|
|
1713
|
+
relation: ReferenceHint,
|
|
1265
1714
|
target_prefix: str,
|
|
1266
1715
|
*,
|
|
1267
1716
|
use_tqdm: bool = False,
|
|
@@ -1272,8 +1721,8 @@ class Obo:
|
|
|
1272
1721
|
|
|
1273
1722
|
Example usage: get homology between HGNC and MGI:
|
|
1274
1723
|
|
|
1275
|
-
>>> from pyobo.sources.hgnc import
|
|
1276
|
-
>>> obo =
|
|
1724
|
+
>>> from pyobo.sources.hgnc import HGNCGetter
|
|
1725
|
+
>>> obo = HGNCGetter()
|
|
1277
1726
|
>>> human_mapt_hgnc_id = "6893"
|
|
1278
1727
|
>>> mouse_mapt_mgi_id = "97180"
|
|
1279
1728
|
>>> hgnc_mgi_orthology_mapping = obo.get_relation_mapping("ro:HOM0000017", "mgi")
|
|
@@ -1291,15 +1740,15 @@ class Obo:
|
|
|
1291
1740
|
def get_relation(
|
|
1292
1741
|
self,
|
|
1293
1742
|
source_identifier: str,
|
|
1294
|
-
relation:
|
|
1743
|
+
relation: ReferenceHint,
|
|
1295
1744
|
target_prefix: str,
|
|
1296
1745
|
*,
|
|
1297
1746
|
use_tqdm: bool = False,
|
|
1298
|
-
) ->
|
|
1747
|
+
) -> str | None:
|
|
1299
1748
|
"""Get the value for a bijective relation mapping between this resource and a target resource.
|
|
1300
1749
|
|
|
1301
|
-
>>> from pyobo.sources.hgnc import
|
|
1302
|
-
>>> obo =
|
|
1750
|
+
>>> from pyobo.sources.hgnc import HGNCGetter
|
|
1751
|
+
>>> obo = HGNCGetter()
|
|
1303
1752
|
>>> human_mapt_hgnc_id = "6893"
|
|
1304
1753
|
>>> mouse_mapt_mgi_id = "97180"
|
|
1305
1754
|
>>> assert mouse_mapt_mgi_id == obo.get_relation(human_mapt_hgnc_id, "ro:HOM0000017", "mgi")
|
|
@@ -1311,7 +1760,7 @@ class Obo:
|
|
|
1311
1760
|
|
|
1312
1761
|
def get_relation_multimapping(
|
|
1313
1762
|
self,
|
|
1314
|
-
relation:
|
|
1763
|
+
relation: ReferenceHint,
|
|
1315
1764
|
target_prefix: str,
|
|
1316
1765
|
*,
|
|
1317
1766
|
use_tqdm: bool = False,
|
|
@@ -1334,22 +1783,24 @@ class Obo:
|
|
|
1334
1783
|
) -> Mapping[str, list[Reference]]:
|
|
1335
1784
|
"""Get a mapping from identifiers to a list of all references for the given relation."""
|
|
1336
1785
|
return multidict(
|
|
1337
|
-
(
|
|
1338
|
-
for
|
|
1786
|
+
(stanza.identifier, reference)
|
|
1787
|
+
for stanza in self._iter_stanzas(
|
|
1339
1788
|
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting {typedef.curie}"
|
|
1340
1789
|
)
|
|
1341
|
-
for reference in
|
|
1790
|
+
for reference in stanza.get_relationships(typedef)
|
|
1342
1791
|
)
|
|
1343
1792
|
|
|
1344
1793
|
############
|
|
1345
1794
|
# SYNONYMS #
|
|
1346
1795
|
############
|
|
1347
1796
|
|
|
1348
|
-
def iterate_synonyms(self, *, use_tqdm: bool = False) -> Iterable[tuple[
|
|
1797
|
+
def iterate_synonyms(self, *, use_tqdm: bool = False) -> Iterable[tuple[Stanza, Synonym]]:
|
|
1349
1798
|
"""Iterate over pairs of term and synonym object."""
|
|
1350
|
-
for
|
|
1351
|
-
|
|
1352
|
-
|
|
1799
|
+
for stanza in self._iter_stanzas(
|
|
1800
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting synonyms"
|
|
1801
|
+
):
|
|
1802
|
+
for synonym in sorted(stanza.synonyms):
|
|
1803
|
+
yield stanza, synonym
|
|
1353
1804
|
|
|
1354
1805
|
def iterate_synonym_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
|
|
1355
1806
|
"""Iterate over pairs of identifier and synonym text."""
|
|
@@ -1360,40 +1811,95 @@ class Obo:
|
|
|
1360
1811
|
"""Get a mapping from identifiers to a list of sorted synonym strings."""
|
|
1361
1812
|
return multidict(self.iterate_synonym_rows(use_tqdm=use_tqdm))
|
|
1362
1813
|
|
|
1814
|
+
def get_literal_mappings(self) -> Iterable[ssslm.LiteralMapping]:
|
|
1815
|
+
"""Get literal mappings in a standard data model."""
|
|
1816
|
+
stanzas: Iterable[Stanza] = itt.chain(self, self.typedefs or [])
|
|
1817
|
+
yield from itt.chain.from_iterable(
|
|
1818
|
+
stanza.get_literal_mappings()
|
|
1819
|
+
for stanza in stanzas
|
|
1820
|
+
if self._in_ontology(stanza.reference)
|
|
1821
|
+
)
|
|
1822
|
+
|
|
1823
|
+
def _in_ontology(self, reference: Reference | Referenced) -> bool:
|
|
1824
|
+
return self._in_ontology_strict(reference) or self._in_ontology_aux(reference)
|
|
1825
|
+
|
|
1826
|
+
def _in_ontology_strict(self, reference: Reference | Referenced) -> bool:
|
|
1827
|
+
return reference.prefix == self.ontology
|
|
1828
|
+
|
|
1829
|
+
def _in_ontology_aux(self, reference: Reference | Referenced) -> bool:
|
|
1830
|
+
return reference.prefix == "obo" and reference.identifier.startswith(self.ontology + "#")
|
|
1831
|
+
|
|
1363
1832
|
#########
|
|
1364
1833
|
# XREFS #
|
|
1365
1834
|
#########
|
|
1366
1835
|
|
|
1367
|
-
def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[tuple[
|
|
1836
|
+
def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[tuple[Stanza, Reference]]:
|
|
1368
1837
|
"""Iterate over xrefs."""
|
|
1369
|
-
for
|
|
1370
|
-
|
|
1371
|
-
|
|
1838
|
+
for stanza in self._iter_stanzas(
|
|
1839
|
+
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting xrefs"
|
|
1840
|
+
):
|
|
1841
|
+
xrefs = {xref for _, xref in stanza.get_mappings(add_context=False)}
|
|
1842
|
+
for xref in sorted(xrefs):
|
|
1843
|
+
yield stanza, xref
|
|
1372
1844
|
|
|
1373
1845
|
def iterate_filtered_xrefs(
|
|
1374
1846
|
self, prefix: str, *, use_tqdm: bool = False
|
|
1375
|
-
) -> Iterable[tuple[
|
|
1847
|
+
) -> Iterable[tuple[Stanza, Reference]]:
|
|
1376
1848
|
"""Iterate over xrefs to a given prefix."""
|
|
1377
1849
|
for term, xref in self.iterate_xrefs(use_tqdm=use_tqdm):
|
|
1378
1850
|
if xref.prefix == prefix:
|
|
1379
1851
|
yield term, xref
|
|
1380
1852
|
|
|
1381
|
-
def
|
|
1382
|
-
"""Iterate over
|
|
1383
|
-
for
|
|
1384
|
-
yield
|
|
1853
|
+
def iterate_literal_mapping_rows(self) -> Iterable[ssslm.LiteralMappingTuple]:
|
|
1854
|
+
"""Iterate over literal mapping rows."""
|
|
1855
|
+
for synonym in self.get_literal_mappings():
|
|
1856
|
+
yield synonym._as_row()
|
|
1385
1857
|
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
return [f"{self.ontology}_id", TARGET_PREFIX, TARGET_ID]
|
|
1858
|
+
def get_literal_mappings_df(self) -> pd.DataFrame:
|
|
1859
|
+
"""Get a literal mappings dataframe."""
|
|
1860
|
+
return ssslm.literal_mappings_to_df(self.get_literal_mappings())
|
|
1390
1861
|
|
|
1391
|
-
def
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1862
|
+
def iterate_mapping_rows(
|
|
1863
|
+
self, *, use_tqdm: bool = False
|
|
1864
|
+
) -> Iterable[tuple[str, str, str, str, str, float | None, str | None]]:
|
|
1865
|
+
"""Iterate over SSSOM rows for mappings."""
|
|
1866
|
+
for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
|
|
1867
|
+
for predicate, obj_ref, context in stanza.get_mappings(
|
|
1868
|
+
include_xrefs=True, add_context=True
|
|
1869
|
+
):
|
|
1870
|
+
yield (
|
|
1871
|
+
get_preferred_curie(stanza),
|
|
1872
|
+
stanza.name,
|
|
1873
|
+
get_preferred_curie(obj_ref),
|
|
1874
|
+
get_preferred_curie(predicate),
|
|
1875
|
+
get_preferred_curie(context.justification),
|
|
1876
|
+
context.confidence if context.confidence is not None else None,
|
|
1877
|
+
get_preferred_curie(context.contributor) if context.contributor else None,
|
|
1878
|
+
)
|
|
1879
|
+
|
|
1880
|
+
def get_mappings_df(
|
|
1881
|
+
self,
|
|
1882
|
+
*,
|
|
1883
|
+
use_tqdm: bool = False,
|
|
1884
|
+
include_subject_labels: bool = False,
|
|
1885
|
+
include_mapping_source_column: bool = False,
|
|
1886
|
+
) -> pd.DataFrame:
|
|
1887
|
+
"""Get a dataframe with SSSOM extracted from the OBO document."""
|
|
1888
|
+
df = pd.DataFrame(self.iterate_mapping_rows(use_tqdm=use_tqdm), columns=SSSOM_DF_COLUMNS)
|
|
1889
|
+
if not include_subject_labels:
|
|
1890
|
+
del df["subject_label"]
|
|
1891
|
+
|
|
1892
|
+
# if no confidences/contributor, remove that column
|
|
1893
|
+
for c in ["confidence", "contributor"]:
|
|
1894
|
+
if df[c].isna().all():
|
|
1895
|
+
del df[c]
|
|
1896
|
+
|
|
1897
|
+
# append on the mapping_source
|
|
1898
|
+
# (https://mapping-commons.github.io/sssom/mapping_source/)
|
|
1899
|
+
if include_mapping_source_column:
|
|
1900
|
+
df["mapping_source"] = self.ontology
|
|
1901
|
+
|
|
1902
|
+
return df
|
|
1397
1903
|
|
|
1398
1904
|
def get_filtered_xrefs_mapping(
|
|
1399
1905
|
self, prefix: str, *, use_tqdm: bool = False
|
|
@@ -1417,11 +1923,12 @@ class Obo:
|
|
|
1417
1923
|
# ALTS #
|
|
1418
1924
|
########
|
|
1419
1925
|
|
|
1420
|
-
def iterate_alts(self) -> Iterable[tuple[
|
|
1926
|
+
def iterate_alts(self) -> Iterable[tuple[Stanza, Reference]]:
|
|
1421
1927
|
"""Iterate over alternative identifiers."""
|
|
1422
|
-
for
|
|
1423
|
-
|
|
1424
|
-
|
|
1928
|
+
for stanza in self._iter_stanzas():
|
|
1929
|
+
if self._in_ontology(stanza):
|
|
1930
|
+
for alt in stanza.alt_ids:
|
|
1931
|
+
yield stanza, alt
|
|
1425
1932
|
|
|
1426
1933
|
def iterate_alt_rows(self) -> Iterable[tuple[str, str]]:
|
|
1427
1934
|
"""Iterate over pairs of terms' primary identifiers and alternate identifiers."""
|
|
@@ -1433,33 +1940,315 @@ class Obo:
|
|
|
1433
1940
|
return multidict((term.identifier, alt.identifier) for term, alt in self.iterate_alts())
|
|
1434
1941
|
|
|
1435
1942
|
|
|
1943
|
+
@dataclass
|
|
1944
|
+
class TypeDef(Stanza):
|
|
1945
|
+
"""A type definition in OBO.
|
|
1946
|
+
|
|
1947
|
+
See the subsection of https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.2.2.
|
|
1948
|
+
"""
|
|
1949
|
+
|
|
1950
|
+
reference: Annotated[Reference, 1]
|
|
1951
|
+
is_anonymous: Annotated[bool | None, 2] = None
|
|
1952
|
+
# 3 - name is covered by reference
|
|
1953
|
+
namespace: Annotated[str | None, 4] = None
|
|
1954
|
+
# 5 alt_id is part of proerties
|
|
1955
|
+
definition: Annotated[str | None, 6] = None
|
|
1956
|
+
comment: Annotated[str | None, 7] = None
|
|
1957
|
+
subsets: Annotated[list[Reference], 8] = field(default_factory=list)
|
|
1958
|
+
synonyms: Annotated[list[Synonym], 9] = field(default_factory=list)
|
|
1959
|
+
xrefs: Annotated[list[Reference], 10] = field(default_factory=list)
|
|
1960
|
+
_axioms: AnnotationsDict = field(default_factory=lambda: defaultdict(list))
|
|
1961
|
+
properties: Annotated[PropertiesHint, 11] = field(default_factory=lambda: defaultdict(list))
|
|
1962
|
+
domain: Annotated[Reference | None, 12, "typedef-only"] = None
|
|
1963
|
+
range: Annotated[Reference | None, 13, "typedef-only"] = None
|
|
1964
|
+
builtin: Annotated[bool | None, 14] = None
|
|
1965
|
+
holds_over_chain: Annotated[list[list[Reference]], 15, "typedef-only"] = field(
|
|
1966
|
+
default_factory=list
|
|
1967
|
+
)
|
|
1968
|
+
is_anti_symmetric: Annotated[bool | None, 16, "typedef-only"] = None
|
|
1969
|
+
is_cyclic: Annotated[bool | None, 17, "typedef-only"] = None
|
|
1970
|
+
is_reflexive: Annotated[bool | None, 18, "typedef-only"] = None
|
|
1971
|
+
is_symmetric: Annotated[bool | None, 19, "typedef-only"] = None
|
|
1972
|
+
is_transitive: Annotated[bool | None, 20, "typedef-only"] = None
|
|
1973
|
+
is_functional: Annotated[bool | None, 21, "typedef-only"] = None
|
|
1974
|
+
is_inverse_functional: Annotated[bool | None, 22, "typedef-only"] = None
|
|
1975
|
+
parents: Annotated[list[Reference], 23] = field(default_factory=list)
|
|
1976
|
+
intersection_of: Annotated[IntersectionOfHint, 24] = field(default_factory=list)
|
|
1977
|
+
union_of: Annotated[list[Reference], 25] = field(default_factory=list)
|
|
1978
|
+
equivalent_to: Annotated[list[Reference], 26] = field(default_factory=list)
|
|
1979
|
+
disjoint_from: Annotated[list[Reference], 27] = field(default_factory=list)
|
|
1980
|
+
# TODO inverse should be inverse_of, cardinality any
|
|
1981
|
+
inverse: Annotated[Reference | None, 28, "typedef-only"] = None
|
|
1982
|
+
# TODO check if there are any examples of this being multiple
|
|
1983
|
+
transitive_over: Annotated[list[Reference], 29, "typedef-only"] = field(default_factory=list)
|
|
1984
|
+
equivalent_to_chain: Annotated[list[list[Reference]], 30, "typedef-only"] = field(
|
|
1985
|
+
default_factory=list
|
|
1986
|
+
)
|
|
1987
|
+
#: From the OBO spec:
|
|
1988
|
+
#:
|
|
1989
|
+
#: For example: spatially_disconnected_from is disjoint_over part_of, in that two
|
|
1990
|
+
#: disconnected entities have no parts in common. This can be translated to OWL as:
|
|
1991
|
+
#: ``disjoint_over(R S), R(A B) ==> (S some A) disjointFrom (S some B)``
|
|
1992
|
+
disjoint_over: Annotated[list[Reference], 31] = field(default_factory=list)
|
|
1993
|
+
relationships: Annotated[RelationsHint, 32] = field(default_factory=lambda: defaultdict(list))
|
|
1994
|
+
is_obsolete: Annotated[bool | None, 33] = None
|
|
1995
|
+
created_by: Annotated[str | None, 34] = None
|
|
1996
|
+
creation_date: Annotated[datetime.datetime | None, 35] = None
|
|
1997
|
+
# TODO expand_assertion_to
|
|
1998
|
+
# TODO expand_expression_to
|
|
1999
|
+
#: Whether this relationship is a metadata tag. Properties that are marked as metadata tags are
|
|
2000
|
+
#: used to record object metadata. Object metadata is additional information about an object
|
|
2001
|
+
#: that is useful to track, but does not impact the definition of the object or how it should
|
|
2002
|
+
#: be treated by a reasoner. Metadata tags might be used to record special term synonyms or
|
|
2003
|
+
#: structured notes about a term, for example.
|
|
2004
|
+
is_metadata_tag: Annotated[bool | None, 40, "typedef-only"] = None
|
|
2005
|
+
is_class_level: Annotated[bool | None, 41] = None
|
|
2006
|
+
|
|
2007
|
+
type: StanzaType = "TypeDef"
|
|
2008
|
+
|
|
2009
|
+
def __hash__(self) -> int:
|
|
2010
|
+
# have to re-define hash because of the @dataclass
|
|
2011
|
+
return hash((self.__class__, self.prefix, self.identifier))
|
|
2012
|
+
|
|
2013
|
+
def _get_references(self) -> dict[str, set[Reference]]:
|
|
2014
|
+
rv = super()._get_references()
|
|
2015
|
+
|
|
2016
|
+
def _add(r: Reference) -> None:
|
|
2017
|
+
rv[r.prefix].add(r)
|
|
2018
|
+
|
|
2019
|
+
if self.domain:
|
|
2020
|
+
_add(self.domain)
|
|
2021
|
+
if self.range:
|
|
2022
|
+
_add(self.range)
|
|
2023
|
+
if self.inverse:
|
|
2024
|
+
_add(self.inverse)
|
|
2025
|
+
|
|
2026
|
+
# TODO all of the properties, which are from oboInOwl
|
|
2027
|
+
for rr in itt.chain(self.transitive_over, self.disjoint_over):
|
|
2028
|
+
_add(rr)
|
|
2029
|
+
for part in itt.chain(self.holds_over_chain, self.equivalent_to_chain):
|
|
2030
|
+
for rr in part:
|
|
2031
|
+
_add(rr)
|
|
2032
|
+
return dict(rv)
|
|
2033
|
+
|
|
2034
|
+
def iterate_obo_lines(
|
|
2035
|
+
self,
|
|
2036
|
+
ontology_prefix: str,
|
|
2037
|
+
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
|
|
2038
|
+
typedefs: Mapping[ReferenceTuple, TypeDef] | None = None,
|
|
2039
|
+
) -> Iterable[str]:
|
|
2040
|
+
"""Iterate over the lines to write in an OBO file.
|
|
2041
|
+
|
|
2042
|
+
:param ontology_prefix:
|
|
2043
|
+
The prefix of the ontology into which the type definition is being written.
|
|
2044
|
+
This is used for compressing builtin identifiers
|
|
2045
|
+
:yield:
|
|
2046
|
+
The lines to write to an OBO file
|
|
2047
|
+
|
|
2048
|
+
`S.3.5.5 <https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.3.5.5>`_
|
|
2049
|
+
of the OBO Flat File Specification v1.4 says tags should appear in the following order:
|
|
2050
|
+
|
|
2051
|
+
1. id
|
|
2052
|
+
2. is_anonymous
|
|
2053
|
+
3. name
|
|
2054
|
+
4. namespace
|
|
2055
|
+
5. alt_id
|
|
2056
|
+
6. def
|
|
2057
|
+
7. comment
|
|
2058
|
+
8. subset
|
|
2059
|
+
9. synonym
|
|
2060
|
+
10. xref
|
|
2061
|
+
11. property_value
|
|
2062
|
+
12. domain
|
|
2063
|
+
13. range
|
|
2064
|
+
14. builtin
|
|
2065
|
+
15. holds_over_chain
|
|
2066
|
+
16. is_anti_symmetric
|
|
2067
|
+
17. is_cyclic
|
|
2068
|
+
18. is_reflexive
|
|
2069
|
+
19. is_symmetric
|
|
2070
|
+
20. is_transitive
|
|
2071
|
+
21. is_functional
|
|
2072
|
+
22. is_inverse_functional
|
|
2073
|
+
23. is_a
|
|
2074
|
+
24. intersection_of
|
|
2075
|
+
25. union_of
|
|
2076
|
+
26. equivalent_to
|
|
2077
|
+
27. disjoint_from
|
|
2078
|
+
28. inverse_of
|
|
2079
|
+
29. transitive_over
|
|
2080
|
+
30. equivalent_to_chain
|
|
2081
|
+
31. disjoint_over
|
|
2082
|
+
32. relationship
|
|
2083
|
+
33. is_obsolete
|
|
2084
|
+
34. created_by
|
|
2085
|
+
35. creation_date
|
|
2086
|
+
36. replaced_by
|
|
2087
|
+
37. consider
|
|
2088
|
+
38. expand_assertion_to
|
|
2089
|
+
39. expand_expression_to
|
|
2090
|
+
40. is_metadata_tag
|
|
2091
|
+
41. is_class_level
|
|
2092
|
+
"""
|
|
2093
|
+
if synonym_typedefs is None:
|
|
2094
|
+
synonym_typedefs = {}
|
|
2095
|
+
if typedefs is None:
|
|
2096
|
+
typedefs = {}
|
|
2097
|
+
|
|
2098
|
+
yield "\n[Typedef]"
|
|
2099
|
+
# 1
|
|
2100
|
+
yield f"id: {reference_escape(self.reference, ontology_prefix=ontology_prefix)}"
|
|
2101
|
+
# 2
|
|
2102
|
+
yield from _boolean_tag("is_anonymous", self.is_anonymous)
|
|
2103
|
+
# 3
|
|
2104
|
+
if self.name:
|
|
2105
|
+
yield f"name: {self.name}"
|
|
2106
|
+
# 4
|
|
2107
|
+
if self.namespace:
|
|
2108
|
+
yield f"namespace: {self.namespace}"
|
|
2109
|
+
# 5
|
|
2110
|
+
yield from _reference_list_tag("alt_id", self.alt_ids, ontology_prefix)
|
|
2111
|
+
# 6
|
|
2112
|
+
if self.definition:
|
|
2113
|
+
yield f"def: {self._definition_fp()}"
|
|
2114
|
+
# 7
|
|
2115
|
+
if self.comment:
|
|
2116
|
+
yield f"comment: {self.comment}"
|
|
2117
|
+
# 8
|
|
2118
|
+
yield from _reference_list_tag("subset", self.subsets, ontology_prefix)
|
|
2119
|
+
# 9
|
|
2120
|
+
for synonym in self.synonyms:
|
|
2121
|
+
yield synonym.to_obo(ontology_prefix=ontology_prefix, synonym_typedefs=synonym_typedefs)
|
|
2122
|
+
# 10
|
|
2123
|
+
yield from self._iterate_xref_obo(ontology_prefix=ontology_prefix)
|
|
2124
|
+
# 11
|
|
2125
|
+
yield from self._iterate_obo_properties(
|
|
2126
|
+
ontology_prefix=ontology_prefix,
|
|
2127
|
+
skip_predicate_objects=v.SKIP_PROPERTY_PREDICATES_OBJECTS,
|
|
2128
|
+
skip_predicate_literals=v.SKIP_PROPERTY_PREDICATES_LITERAL,
|
|
2129
|
+
typedefs=typedefs,
|
|
2130
|
+
)
|
|
2131
|
+
# 12
|
|
2132
|
+
if self.domain:
|
|
2133
|
+
yield f"domain: {reference_escape(self.domain, ontology_prefix=ontology_prefix, add_name_comment=True)}"
|
|
2134
|
+
# 13
|
|
2135
|
+
if self.range:
|
|
2136
|
+
yield f"range: {reference_escape(self.range, ontology_prefix=ontology_prefix, add_name_comment=True)}"
|
|
2137
|
+
# 14
|
|
2138
|
+
yield from _boolean_tag("builtin", self.builtin)
|
|
2139
|
+
# 15
|
|
2140
|
+
yield from _chain_tag("holds_over_chain", self.holds_over_chain, ontology_prefix)
|
|
2141
|
+
# 16
|
|
2142
|
+
yield from _boolean_tag("is_anti_symmetric", self.is_anti_symmetric)
|
|
2143
|
+
# 17
|
|
2144
|
+
yield from _boolean_tag("is_cyclic", self.is_cyclic)
|
|
2145
|
+
# 18
|
|
2146
|
+
yield from _boolean_tag("is_reflexive", self.is_reflexive)
|
|
2147
|
+
# 19
|
|
2148
|
+
yield from _boolean_tag("is_symmetric", self.is_symmetric)
|
|
2149
|
+
# 20
|
|
2150
|
+
yield from _boolean_tag("is_transitive", self.is_transitive)
|
|
2151
|
+
# 21
|
|
2152
|
+
yield from _boolean_tag("is_functional", self.is_functional)
|
|
2153
|
+
# 22
|
|
2154
|
+
yield from _boolean_tag("is_inverse_functional", self.is_inverse_functional)
|
|
2155
|
+
# 23
|
|
2156
|
+
yield from _reference_list_tag("is_a", self.parents, ontology_prefix)
|
|
2157
|
+
# 24
|
|
2158
|
+
yield from self._iterate_intersection_of_obo(ontology_prefix=ontology_prefix)
|
|
2159
|
+
# 25
|
|
2160
|
+
yield from _reference_list_tag("union_of", self.union_of, ontology_prefix)
|
|
2161
|
+
# 26
|
|
2162
|
+
yield from _reference_list_tag("equivalent_to", self.equivalent_to, ontology_prefix)
|
|
2163
|
+
# 27
|
|
2164
|
+
yield from _reference_list_tag("disjoint_from", self.disjoint_from, ontology_prefix)
|
|
2165
|
+
# 28
|
|
2166
|
+
if self.inverse:
|
|
2167
|
+
yield f"inverse_of: {reference_escape(self.inverse, ontology_prefix=ontology_prefix, add_name_comment=True)}"
|
|
2168
|
+
# 29
|
|
2169
|
+
yield from _reference_list_tag("transitive_over", self.transitive_over, ontology_prefix)
|
|
2170
|
+
# 30
|
|
2171
|
+
yield from _chain_tag("equivalent_to_chain", self.equivalent_to_chain, ontology_prefix)
|
|
2172
|
+
# 31 disjoint_over, see https://github.com/search?q=%22disjoint_over%3A%22+path%3A*.obo&type=code
|
|
2173
|
+
yield from _reference_list_tag(
|
|
2174
|
+
"disjoint_over", self.disjoint_over, ontology_prefix=ontology_prefix
|
|
2175
|
+
)
|
|
2176
|
+
# 32
|
|
2177
|
+
yield from self._iterate_obo_relations(ontology_prefix=ontology_prefix, typedefs=typedefs)
|
|
2178
|
+
# 33
|
|
2179
|
+
yield from _boolean_tag("is_obsolete", self.is_obsolete)
|
|
2180
|
+
# 34
|
|
2181
|
+
if self.created_by:
|
|
2182
|
+
yield f"created_by: {self.created_by}"
|
|
2183
|
+
# 35
|
|
2184
|
+
if self.creation_date is not None:
|
|
2185
|
+
yield f"creation_date: {self.creation_date.isoformat()}"
|
|
2186
|
+
# 36
|
|
2187
|
+
yield from _tag_property_targets(
|
|
2188
|
+
"replaced_by", self, v.term_replaced_by, ontology_prefix=ontology_prefix
|
|
2189
|
+
)
|
|
2190
|
+
# 37
|
|
2191
|
+
yield from _tag_property_targets(
|
|
2192
|
+
"consider", self, v.see_also, ontology_prefix=ontology_prefix
|
|
2193
|
+
)
|
|
2194
|
+
# 38 TODO expand_assertion_to
|
|
2195
|
+
# 39 TODO expand_expression_to
|
|
2196
|
+
# 40
|
|
2197
|
+
yield from _boolean_tag("is_metadata_tag", self.is_metadata_tag)
|
|
2198
|
+
# 41
|
|
2199
|
+
yield from _boolean_tag("is_class_level", self.is_class_level)
|
|
2200
|
+
|
|
2201
|
+
@classmethod
|
|
2202
|
+
def from_triple(cls, prefix: str, identifier: str, name: str | None = None) -> TypeDef:
|
|
2203
|
+
"""Create a typedef from a reference."""
|
|
2204
|
+
return cls(reference=Reference(prefix=prefix, identifier=identifier, name=name))
|
|
2205
|
+
|
|
2206
|
+
@classmethod
|
|
2207
|
+
def default(
|
|
2208
|
+
cls, prefix: str, identifier: str, *, name: str | None = None, is_metadata_tag: bool
|
|
2209
|
+
) -> Self:
|
|
2210
|
+
"""Construct a default type definition from within the OBO namespace."""
|
|
2211
|
+
return cls(
|
|
2212
|
+
reference=default_reference(prefix, identifier, name=name),
|
|
2213
|
+
is_metadata_tag=is_metadata_tag,
|
|
2214
|
+
)
|
|
2215
|
+
|
|
2216
|
+
|
|
2217
|
+
class AdHocOntologyBase(Obo):
|
|
2218
|
+
"""A base class for ad-hoc ontologies."""
|
|
2219
|
+
|
|
2220
|
+
|
|
1436
2221
|
def make_ad_hoc_ontology(
|
|
1437
2222
|
_ontology: str,
|
|
1438
|
-
_name: str,
|
|
1439
|
-
_auto_generated_by:
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
2223
|
+
_name: str | None = None,
|
|
2224
|
+
_auto_generated_by: str | None = None,
|
|
2225
|
+
_typedefs: list[TypeDef] | None = None,
|
|
2226
|
+
_synonym_typedefs: list[SynonymTypeDef] | None = None,
|
|
2227
|
+
_date: datetime.datetime | None = None,
|
|
2228
|
+
_data_version: str | None = None,
|
|
2229
|
+
_idspaces: Mapping[str, str] | None = None,
|
|
2230
|
+
_root_terms: list[Reference] | None = None,
|
|
2231
|
+
_subsetdefs: list[tuple[Reference, str]] | None = None,
|
|
2232
|
+
_property_values: list[Annotation] | None = None,
|
|
2233
|
+
_imports: list[str] | None = None,
|
|
1447
2234
|
*,
|
|
1448
|
-
terms: list[Term],
|
|
1449
|
-
) ->
|
|
2235
|
+
terms: list[Term] | None = None,
|
|
2236
|
+
) -> Obo:
|
|
1450
2237
|
"""Make an ad-hoc ontology."""
|
|
1451
2238
|
|
|
1452
|
-
class AdHocOntology(
|
|
2239
|
+
class AdHocOntology(AdHocOntologyBase):
|
|
1453
2240
|
"""An ad hoc ontology created from an OBO file."""
|
|
1454
2241
|
|
|
1455
2242
|
ontology = _ontology
|
|
1456
2243
|
name = _name
|
|
1457
2244
|
auto_generated_by = _auto_generated_by
|
|
1458
|
-
format_version = _format_version
|
|
1459
2245
|
typedefs = _typedefs
|
|
1460
2246
|
synonym_typedefs = _synonym_typedefs
|
|
1461
2247
|
idspaces = _idspaces
|
|
1462
2248
|
root_terms = _root_terms
|
|
2249
|
+
subsetdefs = _subsetdefs
|
|
2250
|
+
property_values = _property_values
|
|
2251
|
+
imports = _imports
|
|
1463
2252
|
|
|
1464
2253
|
def __post_init__(self):
|
|
1465
2254
|
self.date = _date
|
|
@@ -1467,30 +2256,11 @@ def make_ad_hoc_ontology(
|
|
|
1467
2256
|
|
|
1468
2257
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
1469
2258
|
"""Iterate over terms in the ad hoc ontology."""
|
|
1470
|
-
return terms
|
|
2259
|
+
return terms or []
|
|
1471
2260
|
|
|
1472
2261
|
return AdHocOntology()
|
|
1473
2262
|
|
|
1474
2263
|
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
return []
|
|
1479
|
-
return [_convert_typedef(typedef) for typedef in typedefs]
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
def _convert_typedef(typedef: TypeDef) -> Mapping[str, Any]:
|
|
1483
|
-
"""Convert a type def."""
|
|
1484
|
-
# TODO add more later
|
|
1485
|
-
return typedef.reference.model_dump()
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
def _convert_synonym_typedefs(synonym_typedefs: Optional[Iterable[SynonymTypeDef]]) -> list[str]:
|
|
1489
|
-
"""Convert the synonym type defs."""
|
|
1490
|
-
if not synonym_typedefs:
|
|
1491
|
-
return []
|
|
1492
|
-
return [_convert_synonym_typedef(synonym_typedef) for synonym_typedef in synonym_typedefs]
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
def _convert_synonym_typedef(synonym_typedef: SynonymTypeDef) -> str:
|
|
1496
|
-
return f'{synonym_typedef.preferred_curie} "{synonym_typedef.name}"'
|
|
2264
|
+
HUMAN_TERM = Term(reference=v.HUMAN)
|
|
2265
|
+
CHARLIE_TERM = Term(reference=v.CHARLIE, type="Instance").append_parent(HUMAN_TERM)
|
|
2266
|
+
PYOBO_INJECTED = "Injected by PyOBO"
|