pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -117
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +107 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +210 -160
  20. pyobo/cli/database_utils.py +155 -0
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +209 -191
  25. pyobo/gilda_utils.py +52 -250
  26. pyobo/identifier_utils/__init__.py +33 -0
  27. pyobo/identifier_utils/api.py +305 -0
  28. pyobo/identifier_utils/preprocessing.json +873 -0
  29. pyobo/identifier_utils/preprocessing.py +27 -0
  30. pyobo/identifier_utils/relations/__init__.py +8 -0
  31. pyobo/identifier_utils/relations/api.py +162 -0
  32. pyobo/identifier_utils/relations/data.json +5824 -0
  33. pyobo/identifier_utils/relations/data_owl.json +57 -0
  34. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  35. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  36. pyobo/mocks.py +9 -6
  37. pyobo/ner/__init__.py +9 -0
  38. pyobo/ner/api.py +72 -0
  39. pyobo/ner/normalizer.py +33 -0
  40. pyobo/obographs.py +48 -40
  41. pyobo/plugins.py +5 -4
  42. pyobo/py.typed +0 -0
  43. pyobo/reader.py +1354 -395
  44. pyobo/reader_utils.py +155 -0
  45. pyobo/resource_utils.py +42 -22
  46. pyobo/resources/__init__.py +0 -0
  47. pyobo/resources/goc.py +75 -0
  48. pyobo/resources/goc.tsv +188 -0
  49. pyobo/resources/ncbitaxon.py +4 -5
  50. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  51. pyobo/resources/ro.py +3 -2
  52. pyobo/resources/ro.tsv +0 -0
  53. pyobo/resources/so.py +0 -0
  54. pyobo/resources/so.tsv +0 -0
  55. pyobo/sources/README.md +12 -8
  56. pyobo/sources/__init__.py +52 -29
  57. pyobo/sources/agrovoc.py +0 -0
  58. pyobo/sources/antibodyregistry.py +11 -12
  59. pyobo/sources/bigg/__init__.py +13 -0
  60. pyobo/sources/bigg/bigg_compartment.py +81 -0
  61. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  62. pyobo/sources/bigg/bigg_model.py +46 -0
  63. pyobo/sources/bigg/bigg_reaction.py +77 -0
  64. pyobo/sources/biogrid.py +1 -2
  65. pyobo/sources/ccle.py +7 -12
  66. pyobo/sources/cgnc.py +9 -6
  67. pyobo/sources/chebi.py +1 -1
  68. pyobo/sources/chembl/__init__.py +9 -0
  69. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  70. pyobo/sources/chembl/chembl_target.py +160 -0
  71. pyobo/sources/civic_gene.py +55 -15
  72. pyobo/sources/clinicaltrials.py +160 -0
  73. pyobo/sources/complexportal.py +24 -24
  74. pyobo/sources/conso.py +14 -22
  75. pyobo/sources/cpt.py +0 -0
  76. pyobo/sources/credit.py +1 -9
  77. pyobo/sources/cvx.py +27 -5
  78. pyobo/sources/depmap.py +9 -12
  79. pyobo/sources/dictybase_gene.py +2 -7
  80. pyobo/sources/drugbank/__init__.py +9 -0
  81. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  82. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  83. pyobo/sources/drugcentral.py +17 -13
  84. pyobo/sources/expasy.py +31 -34
  85. pyobo/sources/famplex.py +13 -18
  86. pyobo/sources/flybase.py +8 -13
  87. pyobo/sources/gard.py +62 -0
  88. pyobo/sources/geonames/__init__.py +9 -0
  89. pyobo/sources/geonames/features.py +28 -0
  90. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  91. pyobo/sources/geonames/utils.py +115 -0
  92. pyobo/sources/gmt_utils.py +6 -7
  93. pyobo/sources/go.py +20 -13
  94. pyobo/sources/gtdb.py +154 -0
  95. pyobo/sources/gwascentral/__init__.py +9 -0
  96. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  97. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  98. pyobo/sources/hgnc/__init__.py +9 -0
  99. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  100. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  101. pyobo/sources/icd/__init__.py +9 -0
  102. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  103. pyobo/sources/icd/icd11.py +148 -0
  104. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  105. pyobo/sources/interpro.py +4 -9
  106. pyobo/sources/itis.py +0 -5
  107. pyobo/sources/kegg/__init__.py +0 -0
  108. pyobo/sources/kegg/api.py +16 -38
  109. pyobo/sources/kegg/genes.py +9 -20
  110. pyobo/sources/kegg/genome.py +1 -7
  111. pyobo/sources/kegg/pathway.py +9 -21
  112. pyobo/sources/mesh.py +58 -24
  113. pyobo/sources/mgi.py +3 -10
  114. pyobo/sources/mirbase/__init__.py +11 -0
  115. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  116. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  117. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  118. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  119. pyobo/sources/msigdb.py +74 -39
  120. pyobo/sources/ncbi/__init__.py +9 -0
  121. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  122. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  123. pyobo/sources/nih_reporter.py +60 -0
  124. pyobo/sources/nlm/__init__.py +9 -0
  125. pyobo/sources/nlm/nlm_catalog.py +48 -0
  126. pyobo/sources/nlm/nlm_publisher.py +36 -0
  127. pyobo/sources/nlm/utils.py +116 -0
  128. pyobo/sources/npass.py +6 -8
  129. pyobo/sources/omim_ps.py +11 -4
  130. pyobo/sources/pathbank.py +4 -8
  131. pyobo/sources/pfam/__init__.py +9 -0
  132. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  133. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  134. pyobo/sources/pharmgkb/__init__.py +15 -0
  135. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  136. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  137. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  138. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  139. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  140. pyobo/sources/pharmgkb/utils.py +86 -0
  141. pyobo/sources/pid.py +1 -6
  142. pyobo/sources/pombase.py +6 -10
  143. pyobo/sources/pubchem.py +4 -9
  144. pyobo/sources/reactome.py +5 -11
  145. pyobo/sources/rgd.py +11 -16
  146. pyobo/sources/rhea.py +37 -36
  147. pyobo/sources/ror.py +69 -42
  148. pyobo/sources/selventa/__init__.py +0 -0
  149. pyobo/sources/selventa/schem.py +4 -7
  150. pyobo/sources/selventa/scomp.py +1 -6
  151. pyobo/sources/selventa/sdis.py +4 -7
  152. pyobo/sources/selventa/sfam.py +1 -6
  153. pyobo/sources/sgd.py +6 -11
  154. pyobo/sources/signor/__init__.py +7 -0
  155. pyobo/sources/signor/download.py +41 -0
  156. pyobo/sources/signor/signor_complexes.py +105 -0
  157. pyobo/sources/slm.py +12 -15
  158. pyobo/sources/umls/__init__.py +7 -1
  159. pyobo/sources/umls/__main__.py +0 -0
  160. pyobo/sources/umls/get_synonym_types.py +20 -4
  161. pyobo/sources/umls/sty.py +57 -0
  162. pyobo/sources/umls/synonym_types.tsv +1 -1
  163. pyobo/sources/umls/umls.py +18 -22
  164. pyobo/sources/unimod.py +46 -0
  165. pyobo/sources/uniprot/__init__.py +1 -1
  166. pyobo/sources/uniprot/uniprot.py +40 -32
  167. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  168. pyobo/sources/utils.py +3 -2
  169. pyobo/sources/wikipathways.py +7 -10
  170. pyobo/sources/zfin.py +5 -10
  171. pyobo/ssg/__init__.py +12 -16
  172. pyobo/ssg/base.html +0 -0
  173. pyobo/ssg/index.html +26 -13
  174. pyobo/ssg/term.html +12 -2
  175. pyobo/ssg/typedef.html +0 -0
  176. pyobo/struct/__init__.py +54 -8
  177. pyobo/struct/functional/__init__.py +1 -0
  178. pyobo/struct/functional/dsl.py +2572 -0
  179. pyobo/struct/functional/macros.py +423 -0
  180. pyobo/struct/functional/obo_to_functional.py +385 -0
  181. pyobo/struct/functional/ontology.py +272 -0
  182. pyobo/struct/functional/utils.py +112 -0
  183. pyobo/struct/reference.py +331 -136
  184. pyobo/struct/struct.py +1484 -657
  185. pyobo/struct/struct_utils.py +1078 -0
  186. pyobo/struct/typedef.py +162 -210
  187. pyobo/struct/utils.py +12 -5
  188. pyobo/struct/vocabulary.py +138 -0
  189. pyobo/utils/__init__.py +0 -0
  190. pyobo/utils/cache.py +16 -15
  191. pyobo/utils/io.py +51 -41
  192. pyobo/utils/iter.py +5 -5
  193. pyobo/utils/misc.py +41 -53
  194. pyobo/utils/ndex_utils.py +0 -0
  195. pyobo/utils/path.py +73 -70
  196. pyobo/version.py +3 -3
  197. pyobo-0.12.1.dist-info/METADATA +671 -0
  198. pyobo-0.12.1.dist-info/RECORD +201 -0
  199. pyobo-0.12.1.dist-info/WHEEL +4 -0
  200. {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
  201. pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
  202. pyobo/aws.py +0 -162
  203. pyobo/cli/aws.py +0 -47
  204. pyobo/identifier_utils.py +0 -142
  205. pyobo/normalizer.py +0 -232
  206. pyobo/registries/__init__.py +0 -16
  207. pyobo/registries/metaregistry.json +0 -507
  208. pyobo/registries/metaregistry.py +0 -135
  209. pyobo/sources/icd11.py +0 -105
  210. pyobo/xrefdb/__init__.py +0 -1
  211. pyobo/xrefdb/canonicalizer.py +0 -214
  212. pyobo/xrefdb/priority.py +0 -59
  213. pyobo/xrefdb/sources/__init__.py +0 -60
  214. pyobo/xrefdb/sources/biomappings.py +0 -36
  215. pyobo/xrefdb/sources/cbms2019.py +0 -91
  216. pyobo/xrefdb/sources/chembl.py +0 -83
  217. pyobo/xrefdb/sources/compath.py +0 -82
  218. pyobo/xrefdb/sources/famplex.py +0 -64
  219. pyobo/xrefdb/sources/gilda.py +0 -50
  220. pyobo/xrefdb/sources/intact.py +0 -113
  221. pyobo/xrefdb/sources/ncit.py +0 -133
  222. pyobo/xrefdb/sources/pubchem.py +0 -27
  223. pyobo/xrefdb/sources/wikidata.py +0 -116
  224. pyobo/xrefdb/xrefs_pipeline.py +0 -180
  225. pyobo-0.11.2.dist-info/METADATA +0 -711
  226. pyobo-0.11.2.dist-info/RECORD +0 -157
  227. pyobo-0.11.2.dist-info/WHEEL +0 -5
  228. pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/struct/struct.py CHANGED
@@ -1,111 +1,209 @@
1
1
  """Data structures for OBO."""
2
2
 
3
- import gzip
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ import itertools as itt
4
7
  import json
5
8
  import logging
6
9
  import os
7
10
  import sys
8
- from collections import defaultdict
9
- from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence
11
+ import warnings
12
+ from collections import ChainMap, defaultdict
13
+ from collections.abc import Callable, Collection, Iterable, Iterator, Mapping, Sequence
10
14
  from dataclasses import dataclass, field
11
- from datetime import datetime
12
- from operator import attrgetter
13
15
  from pathlib import Path
14
16
  from textwrap import dedent
15
- from typing import (
16
- Any,
17
- Callable,
18
- ClassVar,
19
- Optional,
20
- TextIO,
21
- Union,
22
- )
17
+ from typing import Annotated, Any, ClassVar, TextIO
23
18
 
24
19
  import bioregistry
25
20
  import click
21
+ import curies
26
22
  import networkx as nx
27
23
  import pandas as pd
24
+ import ssslm
25
+ from curies import ReferenceTuple
26
+ from curies import vocabulary as _cv
28
27
  from more_click import force_option, verbose_option
29
28
  from tqdm.auto import tqdm
30
- from typing_extensions import Literal
31
-
32
- from .reference import Reference, Referenced
33
- from .typedef import (
34
- RelationHint,
35
- TypeDef,
36
- comment,
37
- default_typedefs,
38
- exact_match,
39
- from_species,
40
- get_reference_tuple,
41
- has_ontology_root_term,
42
- has_part,
43
- is_a,
44
- orthologous,
45
- part_of,
46
- see_also,
47
- term_replaced_by,
29
+ from typing_extensions import Self
30
+
31
+ from . import vocabulary as v
32
+ from .reference import (
33
+ OBOLiteral,
34
+ Reference,
35
+ Referenced,
36
+ _reference_list_tag,
37
+ comma_separate_references,
38
+ default_reference,
39
+ get_preferred_curie,
40
+ reference_escape,
41
+ reference_or_literal_to_str,
42
+ )
43
+ from .struct_utils import (
44
+ Annotation,
45
+ AnnotationsDict,
46
+ HasReferencesMixin,
47
+ IntersectionOfHint,
48
+ PropertiesHint,
49
+ ReferenceHint,
50
+ RelationsHint,
51
+ Stanza,
52
+ StanzaType,
53
+ UnionOfHint,
54
+ _chain_tag,
55
+ _ensure_ref,
56
+ _get_prefixes_from_annotations,
57
+ _get_references_from_annotations,
58
+ _tag_property_targets,
48
59
  )
49
- from .utils import comma_separate, obo_escape_slim
60
+ from .utils import _boolean_tag, obo_escape_slim
50
61
  from ..api.utils import get_version
51
62
  from ..constants import (
63
+ BUILD_SUBDIRECTORY_NAME,
52
64
  DATE_FORMAT,
65
+ DEFAULT_PREFIX_MAP,
53
66
  NCBITAXON_PREFIX,
54
67
  RELATION_ID,
55
68
  RELATION_PREFIX,
56
69
  TARGET_ID,
57
70
  TARGET_PREFIX,
58
71
  )
59
- from ..identifier_utils import normalize_curie
60
- from ..utils.io import multidict, write_iterable_tsv
61
- from ..utils.misc import obo_to_owl
62
- from ..utils.path import get_prefix_obo_path, prefix_directory_join
72
+ from ..utils.cache import write_gzipped_graph
73
+ from ..utils.io import multidict, safe_open, write_iterable_tsv
74
+ from ..utils.path import (
75
+ CacheArtifact,
76
+ get_cache_path,
77
+ get_relation_cache_path,
78
+ prefix_directory_join,
79
+ )
80
+ from ..version import get_version as get_pyobo_version
63
81
 
64
82
  __all__ = [
83
+ "Obo",
65
84
  "Synonym",
66
85
  "SynonymTypeDef",
67
- "SynonymSpecificity",
68
- "SynonymSpecificities",
69
86
  "Term",
70
- "Obo",
71
- "make_ad_hoc_ontology",
72
87
  "abbreviation",
73
88
  "acronym",
89
+ "make_ad_hoc_ontology",
74
90
  ]
75
91
 
76
92
  logger = logging.getLogger(__name__)
77
93
 
78
- SynonymSpecificity = Literal["EXACT", "NARROW", "BROAD", "RELATED"]
79
- SynonymSpecificities: Sequence[SynonymSpecificity] = ("EXACT", "NARROW", "BROAD", "RELATED")
94
+ #: This is what happens if no specificity is given
95
+ DEFAULT_SPECIFICITY: _cv.SynonymScope = "RELATED"
96
+
97
+ #: Columns in the SSSOM dataframe
98
+ SSSOM_DF_COLUMNS = [
99
+ "subject_id",
100
+ "subject_label",
101
+ "object_id",
102
+ "predicate_id",
103
+ "mapping_justification",
104
+ "confidence",
105
+ "contributor",
106
+ ]
107
+ UNSPECIFIED_MATCHING_CURIE = "sempav:UnspecifiedMatching"
108
+ FORMAT_VERSION = "1.4"
80
109
 
81
110
 
82
111
  @dataclass
83
- class Synonym:
112
+ class Synonym(HasReferencesMixin):
84
113
  """A synonym with optional specificity and references."""
85
114
 
86
115
  #: The string representing the synonym
87
116
  name: str
88
117
 
89
118
  #: The specificity of the synonym
90
- specificity: SynonymSpecificity = "EXACT"
119
+ specificity: _cv.SynonymScope | None = None
91
120
 
92
121
  #: The type of synonym. Must be defined in OBO document!
93
- type: "SynonymTypeDef" = field(
94
- default_factory=lambda: DEFAULT_SYNONYM_TYPE # type:ignore
95
- )
122
+ type: Reference | None = None
96
123
 
97
124
  #: References to articles where the synonym appears
98
- provenance: list[Reference] = field(default_factory=list)
125
+ provenance: Sequence[Reference | OBOLiteral] = field(default_factory=list)
126
+
127
+ #: Extra annotations
128
+ annotations: list[Annotation] = field(default_factory=list)
129
+
130
+ #: Language tag for the synonym
131
+ language: str | None = None
132
+
133
+ def __lt__(self, other: Synonym) -> bool:
134
+ """Sort lexically by name."""
135
+ return self._sort_key() < other._sort_key()
136
+
137
+ def _get_references(self) -> defaultdict[str, set[Reference]]:
138
+ """Get all prefixes used by the typedef."""
139
+ rv: defaultdict[str, set[Reference]] = defaultdict(set)
140
+ rv[v.has_dbxref.prefix].add(v.has_dbxref)
141
+ if self.type is not None:
142
+ rv[self.type.prefix].add(self.type)
143
+ for provenance in self.provenance:
144
+ match provenance:
145
+ case Reference():
146
+ rv[provenance.prefix].add(provenance)
147
+ case OBOLiteral(_, datatype, _language):
148
+ rv[datatype.prefix].add(v._c(datatype))
149
+ for prefix, references in _get_references_from_annotations(self.annotations).items():
150
+ rv[prefix].update(references)
151
+ return rv
152
+
153
+ def _sort_key(self) -> tuple[str, _cv.SynonymScope, str]:
154
+ return (
155
+ self.name,
156
+ self.specificity or DEFAULT_SPECIFICITY,
157
+ self.type.curie if self.type else "",
158
+ )
159
+
160
+ @property
161
+ def predicate(self) -> curies.NamedReference:
162
+ """Get the specificity reference."""
163
+ return _cv.synonym_scopes[self.specificity or DEFAULT_SPECIFICITY]
99
164
 
100
- def to_obo(self) -> str:
165
+ def to_obo(
166
+ self,
167
+ ontology_prefix: str,
168
+ synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
169
+ ) -> str:
101
170
  """Write this synonym as an OBO line to appear in a [Term] stanza."""
102
- return f"synonym: {self._fp()}"
171
+ return f"synonym: {self._fp(ontology_prefix, synonym_typedefs)}"
103
172
 
104
- def _fp(self) -> str:
105
- x = f'"{self._escape(self.name)}" {self.specificity}'
106
- if self.type and self.type.pair != DEFAULT_SYNONYM_TYPE.pair:
107
- x = f"{x} {self.type.preferred_curie}"
108
- return f"{x} [{comma_separate(self.provenance)}]"
173
+ def _fp(
174
+ self,
175
+ ontology_prefix: str,
176
+ synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
177
+ ) -> str:
178
+ if synonym_typedefs is None:
179
+ synonym_typedefs = {}
180
+
181
+ x = f'"{self._escape(self.name)}"'
182
+
183
+ # Add on the specificity, e.g., EXACT
184
+ synonym_typedef = _synonym_typedef_warn(ontology_prefix, self.type, synonym_typedefs)
185
+ if synonym_typedef is not None and synonym_typedef.specificity is not None:
186
+ x = f"{x} {synonym_typedef.specificity}"
187
+ elif self.specificity is not None:
188
+ x = f"{x} {self.specificity}"
189
+ elif self.type is not None:
190
+ # it's not valid to have a synonym type without a specificity,
191
+ # so automatically assign one if we'll need it
192
+ x = f"{x} {DEFAULT_SPECIFICITY}"
193
+
194
+ # Add on the synonym type, if exists
195
+ if self.type is not None:
196
+ x = f"{x} {reference_escape(self.type, ontology_prefix=ontology_prefix)}"
197
+
198
+ # the provenance list is required, even if it's empty :/
199
+ x = f"{x} [{comma_separate_references(self.provenance)}]"
200
+
201
+ # OBO flat file format does not support language,
202
+ # but at least we can mention it here as a comment
203
+ if self.language:
204
+ x += f" ! language: {self.language}"
205
+
206
+ return x
109
207
 
110
208
  @staticmethod
111
209
  def _escape(s: str) -> str:
@@ -113,113 +211,100 @@ class Synonym:
113
211
 
114
212
 
115
213
  @dataclass
116
- class SynonymTypeDef(Referenced):
214
+ class SynonymTypeDef(Referenced, HasReferencesMixin):
117
215
  """A type definition for synonyms in OBO."""
118
216
 
119
217
  reference: Reference
120
- specificity: Optional[SynonymSpecificity] = None
218
+ specificity: _cv.SynonymScope | None = None
121
219
 
122
- def to_obo(self) -> str:
220
+ def __hash__(self) -> int:
221
+ # have to re-define hash because of the @dataclass
222
+ return hash((self.__class__, self.prefix, self.identifier))
223
+
224
+ def to_obo(self, ontology_prefix: str) -> str:
123
225
  """Serialize to OBO."""
124
- rv = f'synonymtypedef: {self.preferred_curie} "{self.name}"'
226
+ rv = f"synonymtypedef: {reference_escape(self.reference, ontology_prefix=ontology_prefix)}"
227
+ name = self.name or ""
228
+ rv = f'{rv} "{name}"'
125
229
  if self.specificity:
126
230
  rv = f"{rv} {self.specificity}"
127
231
  return rv
128
232
 
129
- @classmethod
130
- def from_text(
131
- cls,
132
- text: str,
133
- specificity: Optional[SynonymSpecificity] = None,
134
- *,
135
- lower: bool = True,
136
- ) -> "SynonymTypeDef":
137
- """Get a type definition from text that's normalized."""
138
- identifier = (
139
- text.replace("-", "_")
140
- .replace(" ", "_")
141
- .replace('"', "")
142
- .replace(")", "")
143
- .replace("(", "")
144
- )
145
- if lower:
146
- identifier = identifier.lower()
147
- return cls(
148
- reference=Reference(prefix="obo", identifier=identifier, name=text.replace('"', "")),
149
- specificity=specificity,
150
- )
233
+ def _get_references(self) -> dict[str, set[Reference]]:
234
+ """Get all references used by the typedef."""
235
+ rv: defaultdict[str, set[Reference]] = defaultdict(set)
236
+ rv[self.reference.prefix].add(self.reference)
237
+ if self.specificity is not None:
238
+ # weird syntax, but this just gets the synonym scope
239
+ # predicate as a pyobo reference
240
+ r = v._c(_cv.synonym_scopes[self.specificity])
241
+ rv[r.prefix].add(r)
242
+ return dict(rv)
151
243
 
152
244
 
153
245
  DEFAULT_SYNONYM_TYPE = SynonymTypeDef(
154
- reference=Reference(prefix="oboInOwl", identifier="SynonymType", name="Synonym"),
246
+ reference=Reference(prefix="oboInOwl", identifier="SynonymType", name="synonym type"),
155
247
  )
156
248
  abbreviation = SynonymTypeDef(
157
249
  reference=Reference(prefix="OMO", identifier="0003000", name="abbreviation")
158
250
  )
159
251
  acronym = SynonymTypeDef(reference=Reference(prefix="omo", identifier="0003012", name="acronym"))
160
-
161
-
162
- ReferenceHint = Union[Reference, "Term", tuple[str, str], str]
163
-
164
-
165
- def _ensure_ref(reference: ReferenceHint) -> Reference:
166
- if reference is None:
167
- raise ValueError("can not append null reference")
168
- if isinstance(reference, Term):
169
- return reference.reference
170
- if isinstance(reference, str):
171
- _rv = Reference.from_curie(reference)
172
- if _rv is None:
173
- raise ValueError(f"could not parse CURIE from {reference}")
174
- return _rv
175
- if isinstance(reference, tuple):
176
- return Reference(prefix=reference[0], identifier=reference[1])
177
- if isinstance(reference, Reference):
178
- return reference
179
- raise TypeError(f"invalid type given for a reference ({type(reference)}): {reference}")
252
+ uk_spelling = SynonymTypeDef(
253
+ reference=Reference(prefix="omo", identifier="0003005", name="UK spelling synonym")
254
+ )
255
+ default_synonym_typedefs: dict[ReferenceTuple, SynonymTypeDef] = {
256
+ abbreviation.pair: abbreviation,
257
+ acronym.pair: acronym,
258
+ uk_spelling.pair: uk_spelling,
259
+ }
180
260
 
181
261
 
182
262
  @dataclass
183
- class Term(Referenced):
263
+ class Term(Stanza):
184
264
  """A term in OBO."""
185
265
 
186
266
  #: The primary reference for the entity
187
267
  reference: Reference
188
268
 
189
269
  #: A description of the entity
190
- definition: Optional[str] = None
270
+ definition: str | None = None
191
271
 
192
- #: References to articles in which the term appears
193
- provenance: list[Reference] = field(default_factory=list)
272
+ #: Object properties
273
+ relationships: RelationsHint = field(default_factory=lambda: defaultdict(list))
194
274
 
195
- #: Relationships defined by [Typedef] stanzas
196
- relationships: dict[TypeDef, list[Reference]] = field(default_factory=lambda: defaultdict(list))
275
+ _axioms: AnnotationsDict = field(default_factory=lambda: defaultdict(list))
197
276
 
198
- #: Properties, which are not defined with Typedef and have scalar values instead of references.
199
- properties: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))
277
+ properties: PropertiesHint = field(default_factory=lambda: defaultdict(list))
200
278
 
201
279
  #: Relationships with the default "is_a"
202
280
  parents: list[Reference] = field(default_factory=list)
203
281
 
282
+ intersection_of: IntersectionOfHint = field(default_factory=list)
283
+ union_of: UnionOfHint = field(default_factory=list)
284
+ equivalent_to: list[Reference] = field(default_factory=list)
285
+ disjoint_from: list[Reference] = field(default_factory=list)
286
+
204
287
  #: Synonyms of this term
205
288
  synonyms: list[Synonym] = field(default_factory=list)
206
289
 
207
- #: Equivalent references
290
+ #: Database cross-references, see :func:`get_mappings` for
291
+ #: access to all mappings in an SSSOM-like interface
208
292
  xrefs: list[Reference] = field(default_factory=list)
209
- xref_types: list[Reference] = field(default_factory=list)
210
-
211
- #: Alternate Identifiers
212
- alt_ids: list[Reference] = field(default_factory=list)
213
293
 
214
294
  #: The sub-namespace within the ontology
215
- namespace: Optional[str] = None
295
+ namespace: str | None = None
216
296
 
217
297
  #: An annotation for obsolescence. By default, is None, but this means that it is not obsolete.
218
- is_obsolete: Optional[bool] = None
298
+ is_obsolete: bool | None = None
299
+
300
+ type: StanzaType = "Term"
219
301
 
220
- type: Literal["Term", "Instance"] = "Term"
302
+ builtin: bool | None = None
303
+ is_anonymous: bool | None = None
304
+ subsets: list[Reference] = field(default_factory=list)
221
305
 
222
- def __hash__(self):
306
+ def __hash__(self) -> int:
307
+ # have to re-define hash because of the @dataclass
223
308
  return hash((self.__class__, self.prefix, self.identifier))
224
309
 
225
310
  @classmethod
@@ -227,10 +312,10 @@ class Term(Referenced):
227
312
  cls,
228
313
  prefix: str,
229
314
  identifier: str,
230
- name: Optional[str] = None,
231
- definition: Optional[str] = None,
315
+ name: str | None = None,
316
+ definition: str | None = None,
232
317
  **kwargs,
233
- ) -> "Term":
318
+ ) -> Term:
234
319
  """Create a term from a reference."""
235
320
  return cls(
236
321
  reference=Reference(prefix=prefix, identifier=identifier, name=name),
@@ -239,245 +324,198 @@ class Term(Referenced):
239
324
  )
240
325
 
241
326
  @classmethod
242
- def auto(
243
- cls,
244
- prefix: str,
245
- identifier: str,
246
- ) -> "Term":
247
- """Create a term from a reference."""
248
- from ..api import get_definition
249
-
250
- return cls(
251
- reference=Reference.auto(prefix=prefix, identifier=identifier),
252
- definition=get_definition(prefix, identifier),
253
- )
254
-
255
- @classmethod
256
- def from_curie(cls, curie: str, name: Optional[str] = None) -> "Term":
257
- """Create a term directly from a CURIE and optional name."""
258
- prefix, identifier = normalize_curie(curie)
259
- if prefix is None or identifier is None:
260
- raise ValueError
261
- return cls.from_triple(prefix=prefix, identifier=identifier, name=name)
262
-
263
- def append_provenance(self, reference: ReferenceHint) -> None:
264
- """Add a provenance reference."""
265
- self.provenance.append(_ensure_ref(reference))
266
-
267
- def append_synonym(
268
- self,
269
- synonym: Union[str, Synonym],
270
- *,
271
- type: Optional[SynonymTypeDef] = None,
272
- specificity: Optional[SynonymSpecificity] = None,
273
- ) -> None:
274
- """Add a synonym."""
275
- if isinstance(synonym, str):
276
- synonym = Synonym(
277
- synonym, type=type or DEFAULT_SYNONYM_TYPE, specificity=specificity or "EXACT"
278
- )
279
- self.synonyms.append(synonym)
280
-
281
- def append_alt(self, alt: Union[str, Reference]) -> None:
282
- """Add an alternative identifier."""
283
- if isinstance(alt, str):
284
- alt = Reference(prefix=self.prefix, identifier=alt)
285
- self.alt_ids.append(alt)
327
+ def default(cls, prefix, identifier, name=None) -> Self:
328
+ """Create a default term."""
329
+ return cls(reference=default_reference(prefix=prefix, identifier=identifier, name=name))
286
330
 
287
- def append_see_also(self, reference: ReferenceHint) -> "Term":
288
- """Add a see also relationship."""
289
- self.relationships[see_also].append(_ensure_ref(reference))
290
- return self
291
-
292
- def append_comment(self, value: str) -> "Term":
293
- """Add a comment relationship."""
294
- self.append_property(comment.curie, value)
295
- return self
296
-
297
- def append_replaced_by(self, reference: ReferenceHint) -> "Term":
298
- """Add a replaced by relationship."""
299
- self.append_relationship(term_replaced_by, reference)
300
- return self
301
-
302
- def append_parent(self, reference: ReferenceHint) -> "Term":
303
- """Add a parent to this entity."""
304
- reference = _ensure_ref(reference)
305
- if reference not in self.parents:
306
- self.parents.append(reference)
307
- return self
331
+ def append_see_also_uri(self, uri: str) -> Self:
332
+ """Add a see also property."""
333
+ return self.annotate_uri(v.see_also, uri)
308
334
 
309
335
  def extend_parents(self, references: Collection[Reference]) -> None:
310
336
  """Add a collection of parents to this entity."""
337
+ warnings.warn("use append_parent", DeprecationWarning, stacklevel=2)
311
338
  if any(x is None for x in references):
312
339
  raise ValueError("can not append a collection of parents containing a null parent")
313
340
  self.parents.extend(references)
314
341
 
315
- def get_properties(self, prop) -> list[str]:
342
+ def get_property_literals(self, prop: ReferenceHint) -> list[str]:
316
343
  """Get properties from the given key."""
317
- return self.properties[prop]
344
+ return [reference_or_literal_to_str(t) for t in self.properties.get(_ensure_ref(prop), [])]
318
345
 
319
- def get_property(self, prop) -> Optional[str]:
346
+ def get_property(self, prop: ReferenceHint) -> str | None:
320
347
  """Get a single property of the given key."""
321
- r = self.get_properties(prop)
322
- if not r:
323
- return None
324
- if len(r) != 1:
325
- raise ValueError
326
- return r[0]
327
-
328
- def get_relationship(self, typedef: TypeDef) -> Optional[Reference]:
329
- """Get a single relationship of the given type."""
330
- r = self.get_relationships(typedef)
348
+ r = self.get_property_literals(prop)
331
349
  if not r:
332
350
  return None
333
351
  if len(r) != 1:
334
352
  raise ValueError
335
353
  return r[0]
336
354
 
337
- def get_relationships(self, typedef: TypeDef) -> list[Reference]:
338
- """Get relationships from the given type."""
339
- return self.relationships[typedef]
340
-
341
- def append_exact_match(self, reference: ReferenceHint):
355
+ def append_exact_match(
356
+ self,
357
+ reference: ReferenceHint,
358
+ *,
359
+ mapping_justification: Reference | None = None,
360
+ confidence: float | None = None,
361
+ contributor: Reference | None = None,
362
+ ) -> Self:
342
363
  """Append an exact match, also adding an xref."""
343
364
  reference = _ensure_ref(reference)
344
- self.append_relationship(exact_match, reference)
345
- self.append_xref(reference)
365
+ axioms = self._prepare_mapping_annotations(
366
+ mapping_justification=mapping_justification,
367
+ confidence=confidence,
368
+ contributor=contributor,
369
+ )
370
+ self.annotate_object(v.exact_match, reference, annotations=axioms)
346
371
  return self
347
372
 
348
- def append_xref(self, reference: ReferenceHint) -> None:
349
- """Append an xref."""
350
- self.xrefs.append(_ensure_ref(reference))
351
-
352
- def append_relationship(self, typedef: TypeDef, reference: ReferenceHint) -> None:
353
- """Append a relationship."""
354
- self.relationships[typedef].append(_ensure_ref(reference))
355
-
356
- def set_species(self, identifier: str, name: Optional[str] = None):
373
+ def set_species(self, identifier: str, name: str | None = None) -> Self:
357
374
  """Append the from_species relation."""
358
375
  if name is None:
359
376
  from pyobo.resources.ncbitaxon import get_ncbitaxon_name
360
377
 
361
378
  name = get_ncbitaxon_name(identifier)
362
- self.append_relationship(
363
- from_species, Reference(prefix=NCBITAXON_PREFIX, identifier=identifier, name=name)
379
+ return self.append_relationship(
380
+ v.from_species, Reference(prefix=NCBITAXON_PREFIX, identifier=identifier, name=name)
364
381
  )
365
382
 
366
- def get_species(self, prefix: str = NCBITAXON_PREFIX) -> Optional[Reference]:
383
+ def get_species(self, prefix: str = NCBITAXON_PREFIX) -> Reference | None:
367
384
  """Get the species if it exists.
368
385
 
369
386
  :param prefix: The prefix to use in case the term has several species annotations.
370
387
  """
371
- for species in self.relationships.get(from_species, []):
388
+ for species in self.get_relationships(v.from_species):
372
389
  if species.prefix == prefix:
373
390
  return species
374
391
  return None
375
392
 
376
- def extend_relationship(self, typedef: TypeDef, references: Iterable[Reference]) -> None:
393
+ def extend_relationship(self, typedef: ReferenceHint, references: Iterable[Reference]) -> None:
377
394
  """Append several relationships."""
395
+ warnings.warn("use append_relationship", DeprecationWarning, stacklevel=2)
378
396
  if any(x is None for x in references):
379
397
  raise ValueError("can not extend a collection that includes a null reference")
398
+ typedef = _ensure_ref(typedef)
380
399
  self.relationships[typedef].extend(references)
381
400
 
382
- def append_property(
383
- self, prop: Union[str, Reference, Referenced], value: Union[str, Reference, Referenced]
384
- ) -> None:
385
- """Append a property."""
386
- if isinstance(prop, (Reference, Referenced)):
387
- prop = prop.preferred_curie
388
- if isinstance(value, (Reference, Referenced)):
389
- value = value.preferred_curie
390
- self.properties[prop].append(value)
391
-
392
- def _definition_fp(self) -> str:
393
- if self.definition is None:
394
- raise AssertionError
395
- return f'"{obo_escape_slim(self.definition)}" [{comma_separate(self.provenance)}]'
396
-
397
- def iterate_relations(self) -> Iterable[tuple[TypeDef, Reference]]:
398
- """Iterate over pairs of typedefs and targets."""
399
- for typedef, targets in sorted(self.relationships.items(), key=_sort_relations):
400
- for target in sorted(targets, key=lambda ref: ref.preferred_curie):
401
- yield typedef, target
402
-
403
- def iterate_properties(self) -> Iterable[tuple[str, str]]:
404
- """Iterate over pairs of property and values."""
405
- for prop, values in sorted(self.properties.items()):
406
- for value in sorted(values):
407
- yield prop, value
408
-
409
- def iterate_obo_lines(self, *, ontology, typedefs) -> Iterable[str]:
401
+ def iterate_obo_lines(
402
+ self,
403
+ *,
404
+ ontology_prefix: str,
405
+ typedefs: Mapping[ReferenceTuple, TypeDef],
406
+ synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
407
+ emit_object_properties: bool = True,
408
+ emit_annotation_properties: bool = True,
409
+ ) -> Iterable[str]:
410
410
  """Iterate over the lines to write in an OBO file."""
411
411
  yield f"\n[{self.type}]"
412
- yield f"id: {self.preferred_curie}"
413
- if self.is_obsolete:
414
- yield "is_obsolete: true"
412
+ # 1
413
+ yield f"id: {self._reference(self.reference, ontology_prefix)}"
414
+ # 2
415
+ yield from _boolean_tag("is_anonymous", self.is_anonymous)
416
+ # 3
415
417
  if self.name:
416
418
  yield f"name: {obo_escape_slim(self.name)}"
419
+ # 4
417
420
  if self.namespace and self.namespace != "?":
418
421
  namespace_normalized = (
419
422
  self.namespace.replace(" ", "_").replace("-", "_").replace("(", "").replace(")", "")
420
423
  )
421
424
  yield f"namespace: {namespace_normalized}"
422
-
425
+ # 5
426
+ for alt in sorted(self.alt_ids):
427
+ yield f"alt_id: {self._reference(alt, ontology_prefix, add_name_comment=True)}"
428
+ # 6
423
429
  if self.definition:
424
430
  yield f"def: {self._definition_fp()}"
425
-
426
- for xref in sorted(self.xrefs, key=attrgetter("prefix", "identifier")):
427
- yield f"xref: {xref}" # __str__ bakes in the ! name
428
-
431
+ # 7
432
+ for x in self.get_property_values(v.comment):
433
+ if isinstance(x, OBOLiteral):
434
+ yield f'comment: "{x.value}"'
435
+ # 8
436
+ yield from _reference_list_tag("subset", self.subsets, ontology_prefix)
437
+ # 9
438
+ for synonym in sorted(self.synonyms):
439
+ yield synonym.to_obo(ontology_prefix=ontology_prefix, synonym_typedefs=synonym_typedefs)
440
+ # 10
441
+ yield from self._iterate_xref_obo(ontology_prefix=ontology_prefix)
442
+ # 11
443
+ yield from _boolean_tag("builtin", self.builtin)
444
+ # 12
445
+ if emit_annotation_properties:
446
+ yield from self._iterate_obo_properties(
447
+ ontology_prefix=ontology_prefix,
448
+ skip_predicate_objects=v.SKIP_PROPERTY_PREDICATES_OBJECTS,
449
+ skip_predicate_literals=v.SKIP_PROPERTY_PREDICATES_LITERAL,
450
+ typedefs=typedefs,
451
+ )
452
+ # 13
429
453
  parent_tag = "is_a" if self.type == "Term" else "instance_of"
430
- for parent in sorted(self.parents, key=attrgetter("prefix", "identifier")):
431
- yield f"{parent_tag}: {parent}" # __str__ bakes in the ! name
432
-
433
- for typedef, references in sorted(self.relationships.items(), key=_sort_relations):
434
- if (not typedefs or typedef not in typedefs) and (
435
- ontology,
436
- typedef.curie,
437
- ) not in _TYPEDEF_WARNINGS:
438
- logger.warning(f"[{ontology}] typedef not defined in OBO: {typedef.curie}")
439
- _TYPEDEF_WARNINGS.add((ontology, typedef.curie))
440
-
441
- typedef_preferred_curie = typedef.preferred_curie
442
- for reference in sorted(references, key=attrgetter("prefix", "identifier")):
443
- s = f"relationship: {typedef_preferred_curie} {reference.preferred_curie}"
444
- if typedef.name or reference.name:
445
- s += " !"
446
- if typedef.name:
447
- s += f" {typedef.name}"
448
- if reference.name:
449
- s += f" {reference.name}"
450
- yield s
451
-
452
- for prop, value in sorted(self.iterate_properties(), key=_sort_properties):
453
- # TODO deal with typedefs for properties
454
- yield f'property_value: {prop} "{value}" xsd:string' # TODO deal with types later
455
-
456
- for synonym in sorted(self.synonyms, key=attrgetter("name")):
457
- yield synonym.to_obo()
458
-
459
- @staticmethod
460
- def _escape(s) -> str:
461
- return s.replace("\n", "\\n").replace('"', '\\"')
454
+ yield from _reference_list_tag(parent_tag, self.parents, ontology_prefix)
455
+ # 14
456
+ yield from self._iterate_intersection_of_obo(ontology_prefix=ontology_prefix)
457
+ # 15
458
+ yield from _reference_list_tag("union_of", self.union_of, ontology_prefix=ontology_prefix)
459
+ # 16
460
+ yield from _reference_list_tag(
461
+ "equivalent_to", self.equivalent_to, ontology_prefix=ontology_prefix
462
+ )
463
+ # 17
464
+ yield from _reference_list_tag(
465
+ "disjoint_from", self.disjoint_from, ontology_prefix=ontology_prefix
466
+ )
467
+ # 18
468
+ if emit_object_properties:
469
+ yield from self._iterate_obo_relations(
470
+ ontology_prefix=ontology_prefix, typedefs=typedefs
471
+ )
472
+ # 19 TODO created_by
473
+ # 20
474
+ for x in self.get_property_values(v.obo_creation_date):
475
+ if isinstance(x, OBOLiteral):
476
+ yield f"creation_date: {x.value}"
477
+ # 21
478
+ yield from _boolean_tag("is_obsolete", self.is_obsolete)
479
+ # 22
480
+ yield from _tag_property_targets(
481
+ "replaced_by", self, v.term_replaced_by, ontology_prefix=ontology_prefix
482
+ )
483
+ # 23
484
+ yield from _tag_property_targets(
485
+ "consider", self, v.see_also, ontology_prefix=ontology_prefix
486
+ )
462
487
 
463
488
 
464
489
  #: A set of warnings, used to make sure we don't show the same one over and over
465
- _TYPEDEF_WARNINGS: set[tuple[str, str]] = set()
466
-
490
+ _SYNONYM_TYPEDEF_WARNINGS: set[tuple[str, Reference]] = set()
467
491
 
468
- def _sort_relations(r):
469
- typedef, _references = r
470
- return typedef.preferred_curie
471
492
 
472
-
473
- def _sort_properties(r):
474
- o = r[1]
475
- if isinstance(o, str):
476
- return o
477
- elif isinstance(o, Term):
478
- return o.curie
479
- else:
480
- raise TypeError(f"What {type(r)}: {r}")
493
+ def _synonym_typedef_warn(
494
+ prefix: str,
495
+ predicate: Reference | None,
496
+ synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
497
+ ) -> SynonymTypeDef | None:
498
+ if predicate is None or predicate.pair == DEFAULT_SYNONYM_TYPE.pair:
499
+ return None
500
+ if predicate.pair in default_synonym_typedefs:
501
+ return default_synonym_typedefs[predicate.pair]
502
+ if predicate.pair in synonym_typedefs:
503
+ return synonym_typedefs[predicate.pair]
504
+ key = prefix, predicate
505
+ if key not in _SYNONYM_TYPEDEF_WARNINGS:
506
+ _SYNONYM_TYPEDEF_WARNINGS.add(key)
507
+ predicate_preferred_curie = get_preferred_curie(predicate)
508
+ if predicate.prefix == "obo":
509
+ # Throw our hands up in the air. By using `obo` as the prefix,
510
+ # we already threw using "real" definitions out the window
511
+ logger.warning(
512
+ f"[{prefix}] synonym typedef with OBO prefix not defined: {predicate_preferred_curie}."
513
+ f"\n\tThis might be because you used an unqualified prefix in an OBO file, "
514
+ f"which automatically gets an OBO prefix."
515
+ )
516
+ else:
517
+ logger.warning(f"[{prefix}] synonym typedef not defined: {predicate_preferred_curie}")
518
+ return None
481
519
 
482
520
 
483
521
  class BioregistryError(ValueError):
@@ -495,6 +533,9 @@ class BioregistryError(ValueError):
495
533
  )
496
534
 
497
535
 
536
+ LOGGED_MISSING_URI: set[tuple[str, str]] = set()
537
+
538
+
498
539
  @dataclass
499
540
  class Obo:
500
541
  """An OBO document."""
@@ -506,22 +547,19 @@ class Obo:
506
547
  check_bioregistry_prefix: ClassVar[bool] = True
507
548
 
508
549
  #: The name of the ontology. If not given, tries looking up with the Bioregistry.
509
- name: ClassVar[Optional[str]] = None
510
-
511
- #: The OBO format
512
- format_version: ClassVar[str] = "1.2"
550
+ name: ClassVar[str | None] = None
513
551
 
514
552
  #: Type definitions
515
- typedefs: ClassVar[Optional[list[TypeDef]]] = None
553
+ typedefs: ClassVar[list[TypeDef] | None] = None
516
554
 
517
555
  #: Synonym type definitions
518
- synonym_typedefs: ClassVar[Optional[list[SynonymTypeDef]]] = None
556
+ synonym_typedefs: ClassVar[list[SynonymTypeDef] | None] = None
519
557
 
520
558
  #: An annotation about how an ontology was generated
521
- auto_generated_by: ClassVar[Optional[str]] = None
559
+ auto_generated_by: ClassVar[str | None] = None
522
560
 
523
561
  #: The idspaces used in the document
524
- idspaces: ClassVar[Optional[Mapping[str, str]]] = None
562
+ idspaces: ClassVar[Mapping[str, str] | None] = None
525
563
 
526
564
  #: For super-sized datasets that shouldn't be read into memory
527
565
  iter_only: ClassVar[bool] = False
@@ -530,28 +568,32 @@ class Obo:
530
568
  dynamic_version: ClassVar[bool] = False
531
569
 
532
570
  #: Set to a static version for the resource (i.e., the resource is not itself versioned)
533
- static_version: ClassVar[Optional[str]] = None
571
+ static_version: ClassVar[str | None] = None
534
572
 
535
- bioversions_key: ClassVar[Optional[str]] = None
573
+ bioversions_key: ClassVar[str | None] = None
536
574
 
537
575
  #: Root terms to use for the ontology
538
- root_terms: ClassVar[Optional[list[Reference]]] = None
576
+ root_terms: ClassVar[list[Reference] | None] = None
539
577
 
540
578
  #: The date the ontology was generated
541
- date: Optional[datetime] = field(default_factory=datetime.today)
579
+ date: datetime.datetime | None = field(default_factory=datetime.datetime.today)
542
580
 
543
581
  #: The ontology version
544
- data_version: Optional[str] = None
582
+ data_version: str | None = None
545
583
 
546
584
  #: Should this ontology be reloaded?
547
585
  force: bool = False
548
586
 
549
587
  #: The hierarchy of terms
550
- _hierarchy: Optional[nx.DiGraph] = field(init=False, default=None, repr=False)
588
+ _hierarchy: nx.DiGraph | None = field(init=False, default=None, repr=False)
551
589
  #: A cache of terms
552
- _items: Optional[list[Term]] = field(init=False, default=None, repr=False)
590
+ _items: list[Term] | None = field(init=False, default=None, repr=False)
591
+
592
+ subsetdefs: ClassVar[list[tuple[Reference, str]] | None] = None
553
593
 
554
- term_sort_key: ClassVar[Optional[Callable[["Obo", Term], int]]] = None
594
+ property_values: ClassVar[list[Annotation] | None] = None
595
+
596
+ imports: ClassVar[list[str] | None] = None
555
597
 
556
598
  def __post_init__(self):
557
599
  """Run post-init checks."""
@@ -576,9 +618,85 @@ class Obo:
576
618
  elif "/" in self.data_version:
577
619
  raise ValueError(f"{self.ontology} has a slash in version: {self.data_version}")
578
620
  if self.auto_generated_by is None:
579
- self.auto_generated_by = f"bio2obo:{self.ontology}" # type:ignore
621
+ self.auto_generated_by = f"PyOBO v{get_pyobo_version(with_git_hash=True)} on {datetime.datetime.now().isoformat()}" # type:ignore
622
+
623
+ def _get_clean_idspaces(self) -> dict[str, str]:
624
+ """Get normalized idspace dictionary."""
625
+ rv = dict(
626
+ ChainMap(
627
+ # Add reasonable defaults, most of which are
628
+ # mandated by the OWL spec anyway (except skos?)
629
+ DEFAULT_PREFIX_MAP,
630
+ dict(self.idspaces or {}),
631
+ # automatically detect all prefixes in reference in the ontology,
632
+ # then look up Bioregistry-approved URI prefixes
633
+ self._infer_prefix_map(),
634
+ )
635
+ )
636
+ return rv
580
637
 
581
- def _get_version(self) -> Optional[str]:
638
+ def _infer_prefix_map(self) -> dict[str, str]:
639
+ """Get a prefix map including all prefixes used in the ontology."""
640
+ rv = {}
641
+ for prefix in sorted(self._get_prefixes(), key=str.casefold):
642
+ resource = bioregistry.get_resource(prefix)
643
+ if resource is None:
644
+ raise ValueError
645
+ uri_prefix = resource.get_rdf_uri_prefix()
646
+ if uri_prefix is None:
647
+ uri_prefix = resource.get_uri_prefix()
648
+ if uri_prefix is None:
649
+ # This allows us an escape hatch, since some
650
+ # prefixes don't have an associated URI prefix
651
+ uri_prefix = f"https://bioregistry.io/{prefix}:"
652
+ if (self.ontology, prefix) not in LOGGED_MISSING_URI:
653
+ LOGGED_MISSING_URI.add((self.ontology, prefix))
654
+ logger.warning(
655
+ "[%s] uses prefix with no URI format: %s. Auto-generating Bioregistry link: %s",
656
+ self.ontology,
657
+ prefix,
658
+ uri_prefix,
659
+ )
660
+
661
+ pp = bioregistry.get_preferred_prefix(prefix) or str(prefix)
662
+ rv[pp] = uri_prefix
663
+ return rv
664
+
665
+ def _get_prefixes(self) -> set[str]:
666
+ """Get all prefixes used by the ontology."""
667
+ prefixes: set[str] = set(DEFAULT_PREFIX_MAP)
668
+ for stanza in self._iter_stanzas():
669
+ prefixes.update(stanza._get_prefixes())
670
+ for synonym_typedef in self.synonym_typedefs or []:
671
+ prefixes.update(synonym_typedef._get_prefixes())
672
+ prefixes.update(subset.prefix for subset, _ in self.subsetdefs or [])
673
+ # _iterate_property_pairs covers metadata, root terms,
674
+ # and properties in self.property_values
675
+ prefixes.update(_get_prefixes_from_annotations(self._iterate_property_pairs()))
676
+ if self.auto_generated_by:
677
+ prefixes.add("oboInOwl")
678
+ return prefixes
679
+
680
+ def _get_references(self) -> dict[str, set[Reference]]:
681
+ """Get all references used by the ontology."""
682
+ rv: defaultdict[str, set[Reference]] = defaultdict(set)
683
+
684
+ for rr in itt.chain(self, self.typedefs or [], self.synonym_typedefs or []):
685
+ for prefix, references in rr._get_references().items():
686
+ rv[prefix].update(references)
687
+ for subset, _ in self.subsetdefs or []:
688
+ rv[subset.prefix].add(subset)
689
+ # _iterate_property_pairs covers metadata, root terms,
690
+ # and properties in self.property_values
691
+ for prefix, references in _get_references_from_annotations(
692
+ self._iterate_property_pairs()
693
+ ).items():
694
+ rv[prefix].update(references)
695
+ if self.auto_generated_by:
696
+ rv[v.obo_autogenerated_by.prefix].add(v.obo_autogenerated_by)
697
+ return dict(rv)
698
+
699
+ def _get_version(self) -> str | None:
582
700
  if self.bioversions_key:
583
701
  try:
584
702
  return get_version(self.bioversions_key)
@@ -594,6 +712,13 @@ class Obo:
594
712
  raise ValueError(f"There is no version available for {self.ontology}")
595
713
  return self.data_version
596
714
 
715
+ @property
716
+ def _prefix_version(self) -> str:
717
+ """Get the prefix and version (for logging)."""
718
+ if self.data_version:
719
+ return f"{self.ontology} {self.data_version}"
720
+ return self.ontology
721
+
597
722
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
598
723
  """Iterate over terms in this ontology."""
599
724
  raise NotImplementedError
@@ -604,273 +729,481 @@ class Obo:
604
729
 
605
730
  return graph_from_obo(self)
606
731
 
607
- def write_obograph(self, path: Path) -> None:
732
+ def write_obograph(self, path: str | Path) -> None:
608
733
  """Write OBO Graph json."""
609
734
  graph = self.get_graph()
610
- path.write_text(graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
735
+ with safe_open(path, read=False) as file:
736
+ file.write(graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
611
737
 
612
738
  @classmethod
613
- def cli(cls) -> None:
739
+ def cli(cls, *args, default_rewrite: bool = False) -> Any:
614
740
  """Run the CLI for this class."""
615
- cli = cls.get_cls_cli()
616
- cli()
741
+ cli = cls.get_cls_cli(default_rewrite=default_rewrite)
742
+ return cli(*args)
617
743
 
618
744
  @classmethod
619
- def get_cls_cli(cls) -> click.Command:
745
+ def get_cls_cli(cls, *, default_rewrite: bool = False) -> click.Command:
620
746
  """Get the CLI for this class."""
621
747
 
622
748
  @click.command()
623
749
  @verbose_option
624
750
  @force_option
625
- @click.option("--rewrite", "-r", is_flag=True)
751
+ @click.option(
752
+ "--rewrite/--no-rewrite",
753
+ "-r",
754
+ default=False,
755
+ is_flag=True,
756
+ help="Re-process the data, but don't download it again.",
757
+ )
626
758
  @click.option("--owl", is_flag=True, help="Write OWL via ROBOT")
627
- @click.option("--nodes", is_flag=True, help="Write nodes file")
759
+ @click.option("--ofn", is_flag=True, help="Write Functional OWL (OFN)")
760
+ @click.option("--ttl", is_flag=True, help="Write turtle RDF via OFN")
628
761
  @click.option(
629
762
  "--version", help="Specify data version to get. Use this if bioversions is acting up."
630
763
  )
631
- def _main(force: bool, owl: bool, nodes: bool, version: Optional[str], rewrite: bool):
764
+ def _main(force: bool, owl: bool, ofn: bool, ttl: bool, version: str | None, rewrite: bool):
765
+ rewrite = True
632
766
  try:
633
767
  inst = cls(force=force, data_version=version)
634
768
  except Exception as e:
635
769
  click.secho(f"[{cls.ontology}] Got an exception during instantiation - {type(e)}")
636
770
  sys.exit(1)
637
-
638
- try:
639
- inst.write_default(
640
- write_obograph=True,
641
- write_obo=True,
642
- write_owl=owl,
643
- write_nodes=nodes,
644
- force=force or rewrite,
645
- use_tqdm=True,
646
- )
647
- except Exception as e:
648
- click.secho(f"[{cls.ontology}] Got an exception during OBO writing {type(e)}")
649
- sys.exit(1)
771
+ inst.write_default(
772
+ write_obograph=False,
773
+ write_obo=False,
774
+ write_owl=owl,
775
+ write_ofn=ofn,
776
+ write_ttl=ttl,
777
+ write_nodes=True,
778
+ force=force or rewrite,
779
+ use_tqdm=True,
780
+ )
650
781
 
651
782
  return _main
652
783
 
653
784
  @property
654
785
  def date_formatted(self) -> str:
655
786
  """Get the date as a formatted string."""
656
- return (self.date if self.date else datetime.now()).strftime(DATE_FORMAT)
787
+ return (self.date if self.date else datetime.datetime.now()).strftime(DATE_FORMAT)
788
+
789
+ def _iter_terms_safe(self) -> Iterator[Term]:
790
+ if self.iter_only:
791
+ return iter(self.iter_terms(force=self.force))
792
+ return iter(self._items_accessor)
657
793
 
658
794
  def _iter_terms(self, use_tqdm: bool = False, desc: str = "terms") -> Iterable[Term]:
795
+ yv = self._iter_terms_safe()
659
796
  if use_tqdm:
660
- total: Optional[int]
797
+ total: int | None
661
798
  try:
662
799
  total = len(self._items_accessor)
663
800
  except TypeError:
664
801
  total = None
665
- yield from tqdm(self, desc=desc, unit_scale=True, unit="term", total=total)
666
- else:
667
- yield from self
668
-
669
- def iterate_obo_lines(self) -> Iterable[str]:
670
- """Iterate over the lines to write in an OBO file."""
671
- yield f"format-version: {self.format_version}"
802
+ yv = tqdm(yv, desc=desc, unit_scale=True, unit="term", total=total)
803
+ yield from yv
672
804
 
673
- if self.auto_generated_by is not None:
674
- yield f"auto-generated-by: {self.auto_generated_by}"
805
+ def _iter_stanzas(self, use_tqdm: bool = False, desc: str = "terms") -> Iterable[Stanza]:
806
+ yield from self._iter_terms(use_tqdm=use_tqdm, desc=desc)
807
+ yield from self.typedefs or []
675
808
 
676
- if self.data_version is not None:
809
+ def iterate_obo_lines(
810
+ self,
811
+ emit_object_properties: bool = True,
812
+ emit_annotation_properties: bool = True,
813
+ ) -> Iterable[str]:
814
+ """Iterate over the lines to write in an OBO file.
815
+
816
+ Here's the order:
817
+
818
+ 1. format-version (technically, this is the only required field)
819
+ 2. data-version
820
+ 3. date
821
+ 4. saved-by
822
+ 5. auto-generated-by
823
+ 6. import
824
+ 7. subsetdef
825
+ 8. synonymtypedef
826
+ 9. default-namespace
827
+ 10. namespace-id-rule
828
+ 11. idspace
829
+ 12. treat-xrefs-as-equivalent
830
+ 13. treat-xrefs-as-genus-differentia
831
+ 14. treat-xrefs-as-relationship
832
+ 15. treat-xrefs-as-is_a
833
+ 16. remark
834
+ 17. ontology
835
+ """
836
+ # 1
837
+ yield f"format-version: {FORMAT_VERSION}"
838
+ # 2
839
+ if self.data_version:
677
840
  yield f"data-version: {self.data_version}"
678
- else:
679
- yield f"date: {self.date_formatted}"
841
+ # 3
842
+ if self.date:
843
+ f"date: {self.date_formatted}"
844
+ # 4 TODO saved-by
845
+ # 5
846
+ if self.auto_generated_by:
847
+ yield f"auto-generated-by: {self.auto_generated_by}"
848
+ # 6
849
+ for imp in self.imports or []:
850
+ yield f"import: {imp}"
851
+ # 7
852
+ for subset, subset_remark in self.subsetdefs or []:
853
+ yield f'subsetdef: {reference_escape(subset, ontology_prefix=self.ontology)} "{subset_remark}"'
854
+ # 8
855
+ for synonym_typedef in sorted(self.synonym_typedefs or []):
856
+ if synonym_typedef.curie == DEFAULT_SYNONYM_TYPE.curie:
857
+ continue
858
+ yield synonym_typedef.to_obo(ontology_prefix=self.ontology)
859
+ # 9 TODO default-namespace
860
+ # 10 TODO namespace-id-rule
861
+ # 11
862
+ for prefix, url in sorted(self._get_clean_idspaces().items()):
863
+ if prefix in DEFAULT_PREFIX_MAP:
864
+ # we don't need to write out the 4 default prefixes from
865
+ # table 2 in https://www.w3.org/TR/owl2-syntax/#IRIs since
866
+ # they're considered to always be builtin
867
+ continue
680
868
 
681
- for prefix, url in sorted((self.idspaces or {}).items()):
682
- yield f"idspace: {prefix} {url}"
869
+ # additional assumptions about built in
870
+ if prefix in {"obo", "oboInOwl"}:
871
+ continue
683
872
 
684
- for synonym_typedef in sorted((self.synonym_typedefs or []), key=attrgetter("curie")):
685
- if synonym_typedef.curie == DEFAULT_SYNONYM_TYPE.curie:
873
+ # ROBOT assumes that all OBO foundry prefixes are builtin,
874
+ # so don't re-declare them
875
+ if bioregistry.is_obo_foundry(prefix):
686
876
  continue
687
- yield synonym_typedef.to_obo()
688
877
 
878
+ yv = f"idspace: {prefix} {url}"
879
+ if _yv_name := bioregistry.get_name(prefix):
880
+ yv += f' "{_yv_name}"'
881
+ yield yv
882
+ # 12-15 are handled only during reading, and
883
+ # PyOBO unmacros things before outputting
884
+ # 12 treat-xrefs-as-equivalent
885
+ # 13 treat-xrefs-as-genus-differentia
886
+ # 14 treat-xrefs-as-relationship
887
+ # 15 treat-xrefs-as-is_a
888
+ # 16 TODO remark
889
+ # 17
689
890
  yield f"ontology: {self.ontology}"
891
+ # 18 (secret)
892
+ yield from self._iterate_properties()
893
+
894
+ typedefs = self._index_typedefs()
895
+ synonym_typedefs = self._index_synonym_typedefs()
896
+
897
+ # PROPERTIES
898
+ for typedef in sorted(self.typedefs or []):
899
+ yield from typedef.iterate_obo_lines(
900
+ ontology_prefix=self.ontology,
901
+ typedefs=typedefs,
902
+ synonym_typedefs=synonym_typedefs,
903
+ )
690
904
 
691
- if self.name is None:
692
- raise ValueError("ontology is missing name")
693
- yield f'property_value: http://purl.org/dc/elements/1.1/title "{self.name}" xsd:string'
694
- license_spdx_id = bioregistry.get_license(self.ontology)
695
- if license_spdx_id:
696
- # TODO add SPDX to idspaces and use as a CURIE?
697
- yield f'property_value: http://purl.org/dc/terms/license "{license_spdx_id}" xsd:string'
698
- description = bioregistry.get_description(self.ontology)
699
- if description:
905
+ # TERMS AND INSTANCES
906
+ for term in self._iter_terms():
907
+ yield from term.iterate_obo_lines(
908
+ ontology_prefix=self.ontology,
909
+ typedefs=typedefs,
910
+ synonym_typedefs=synonym_typedefs,
911
+ emit_object_properties=emit_object_properties,
912
+ emit_annotation_properties=emit_annotation_properties,
913
+ )
914
+
915
+ def _iterate_properties(self) -> Iterable[str]:
916
+ for predicate, value in self._iterate_property_pairs():
917
+ match value:
918
+ case OBOLiteral():
919
+ end = f'"{obo_escape_slim(value.value)}" {reference_escape(value.datatype, ontology_prefix=self.ontology)}'
920
+ case Reference():
921
+ end = reference_escape(value, ontology_prefix=self.ontology)
922
+ yield f"property_value: {reference_escape(predicate, ontology_prefix=self.ontology)} {end}"
923
+
924
+ def _iterate_property_pairs(self) -> Iterable[Annotation]:
925
+ # Title
926
+ if self.name:
927
+ yield Annotation(v.has_title, OBOLiteral.string(self.name))
928
+
929
+ # License
930
+ # TODO add SPDX to idspaces and use as a CURIE?
931
+ if license_spdx_id := bioregistry.get_license(self.ontology):
932
+ if license_spdx_id.startswith("http"):
933
+ license_literal = OBOLiteral.uri(license_spdx_id)
934
+ else:
935
+ license_literal = OBOLiteral.string(license_spdx_id)
936
+ yield Annotation(v.has_license, license_literal)
937
+
938
+ # Description
939
+ if description := bioregistry.get_description(self.ontology):
700
940
  description = obo_escape_slim(description.strip())
701
- yield f'property_value: http://purl.org/dc/elements/1.1/description "{description}" xsd:string'
941
+ yield Annotation(v.has_description, OBOLiteral.string(description.strip()))
702
942
 
943
+ # Root terms
703
944
  for root_term in self.root_terms or []:
704
- yield f"property_value: {has_ontology_root_term.preferred_curie} {root_term.preferred_curie}"
945
+ yield Annotation(v.has_ontology_root_term, root_term)
946
+
947
+ # Extras
948
+ if self.property_values:
949
+ yield from self.property_values
705
950
 
706
- for typedef in sorted(self.typedefs or [], key=attrgetter("curie")):
707
- yield from typedef.iterate_obo_lines()
951
+ def _index_typedefs(self) -> Mapping[ReferenceTuple, TypeDef]:
952
+ from .typedef import default_typedefs
708
953
 
709
- for term in self:
710
- yield from term.iterate_obo_lines(ontology=self.ontology, typedefs=self.typedefs)
954
+ return ChainMap(
955
+ {t.pair: t for t in self.typedefs or []},
956
+ default_typedefs,
957
+ )
958
+
959
+ def _index_synonym_typedefs(self) -> Mapping[ReferenceTuple, SynonymTypeDef]:
960
+ return ChainMap(
961
+ {t.pair: t for t in self.synonym_typedefs or []},
962
+ default_synonym_typedefs,
963
+ )
711
964
 
712
965
  def write_obo(
713
- self, file: Union[None, str, TextIO, Path] = None, use_tqdm: bool = False
966
+ self,
967
+ file: None | str | TextIO | Path = None,
968
+ *,
969
+ use_tqdm: bool = False,
970
+ emit_object_properties: bool = True,
971
+ emit_annotation_properties: bool = True,
714
972
  ) -> None:
715
973
  """Write the OBO to a file."""
716
- it = self.iterate_obo_lines()
974
+ it = self.iterate_obo_lines(
975
+ emit_object_properties=emit_object_properties,
976
+ emit_annotation_properties=emit_annotation_properties,
977
+ )
717
978
  if use_tqdm:
718
- it = tqdm(it, desc=f"Writing {self.ontology}", unit_scale=True, unit="line")
719
- if isinstance(file, (str, Path, os.PathLike)):
720
- with open(file, "w") as fh:
979
+ it = tqdm(
980
+ it,
981
+ desc=f"[{self._prefix_version}] writing OBO",
982
+ unit_scale=True,
983
+ unit="line",
984
+ )
985
+ if isinstance(file, str | Path | os.PathLike):
986
+ with safe_open(file, read=False) as fh:
721
987
  self._write_lines(it, fh)
722
988
  else:
723
989
  self._write_lines(it, file)
724
990
 
725
991
  @staticmethod
726
- def _write_lines(it, file: Optional[TextIO]):
992
+ def _write_lines(it, file: TextIO | None):
727
993
  for line in it:
728
994
  print(line, file=file)
729
995
 
730
- def write_obonet_gz(self, path: Union[str, Path]) -> None:
996
+ def write_obonet_gz(self, path: str | Path) -> None:
731
997
  """Write the OBO to a gzipped dump in Obonet JSON."""
732
998
  graph = self.to_obonet()
733
- with gzip.open(path, "wt") as file:
734
- json.dump(nx.node_link_data(graph), file)
999
+ write_gzipped_graph(path=path, graph=graph)
735
1000
 
736
- def _path(self, *parts: str, name: Optional[str] = None) -> Path:
737
- return prefix_directory_join(self.ontology, *parts, name=name, version=self.data_version)
1001
+ def write_ofn(self, path: str | Path) -> None:
1002
+ """Write as Functional OWL (OFN)."""
1003
+ from .functional.obo_to_functional import get_ofn_from_obo
738
1004
 
739
- def _cache(self, *parts: str, name: Optional[str] = None) -> Path:
740
- return self._path("cache", *parts, name=name)
1005
+ ofn = get_ofn_from_obo(self)
1006
+ ofn.write_funowl(path)
741
1007
 
742
- @property
743
- def _names_path(self) -> Path:
744
- return self._cache(name="names.tsv")
1008
+ def write_rdf(self, path: str | Path) -> None:
1009
+ """Write as Turtle RDF."""
1010
+ from .functional.obo_to_functional import get_ofn_from_obo
745
1011
 
746
- @property
747
- def _definitions_path(self) -> Path:
748
- return self._cache(name="definitions.tsv")
1012
+ ofn = get_ofn_from_obo(self)
1013
+ ofn.write_rdf(path)
749
1014
 
750
- @property
751
- def _species_path(self) -> Path:
752
- return self._cache(name="species.tsv")
1015
+ def write_nodes(self, path: str | Path) -> None:
1016
+ """Write a nodes TSV file."""
1017
+ write_iterable_tsv(
1018
+ path=path,
1019
+ header=self.nodes_header,
1020
+ it=self.iterate_edge_rows(),
1021
+ )
753
1022
 
754
1023
  @property
755
- def _synonyms_path(self) -> Path:
756
- return self._cache(name="synonyms.tsv")
1024
+ def nodes_header(self) -> Sequence[str]:
1025
+ """Get the header for nodes."""
1026
+ return [
1027
+ "curie:ID",
1028
+ "name:string",
1029
+ "synonyms:string[]",
1030
+ "synonym_predicates:string[]",
1031
+ "synonym_types:string[]",
1032
+ "definition:string",
1033
+ "deprecated:boolean",
1034
+ "type:string",
1035
+ "provenance:string[]",
1036
+ "alts:string[]",
1037
+ "replaced_by:string[]",
1038
+ "mapping_objects:string[]",
1039
+ "mapping_predicates:string[]",
1040
+ "version:string",
1041
+ ]
757
1042
 
758
- @property
759
- def _alts_path(self):
760
- return self._cache(name="alt_ids.tsv")
1043
+ def _get_node_row(self, node: Term, sep: str, version: str) -> Sequence[str]:
1044
+ synonym_predicate_curies, synonym_type_curies, synonyms = [], [], []
1045
+ for synonym in node.synonyms:
1046
+ synonym_predicate_curies.append(synonym.predicate.curie)
1047
+ synonym_type_curies.append(synonym.type.curie if synonym.type else "")
1048
+ synonyms.append(synonym.name)
1049
+ mapping_predicate_curies, mapping_target_curies = [], []
1050
+ for predicate, obj in node.get_mappings(include_xrefs=True, add_context=False):
1051
+ mapping_predicate_curies.append(predicate.curie)
1052
+ mapping_target_curies.append(obj.curie)
1053
+ return (
1054
+ node.curie,
1055
+ node.name or "",
1056
+ sep.join(synonyms),
1057
+ sep.join(synonym_predicate_curies),
1058
+ sep.join(synonym_type_curies),
1059
+ node.definition or "",
1060
+ "true" if node.is_obsolete else "false",
1061
+ node.type,
1062
+ sep.join(
1063
+ reference.curie for reference in node.provenance if isinstance(reference, Reference)
1064
+ ),
1065
+ sep.join(alt_reference.curie for alt_reference in node.alt_ids),
1066
+ sep.join(ref.curie for ref in node.get_replaced_by()),
1067
+ sep.join(mapping_target_curies),
1068
+ sep.join(mapping_predicate_curies),
1069
+ version,
1070
+ )
761
1071
 
762
- @property
763
- def _typedefs_path(self) -> Path:
764
- return self._cache(name="typedefs.tsv")
1072
+ def iterate_node_rows(self, sep: str = ";") -> Iterable[Sequence[str]]:
1073
+ """Get a nodes iterator appropriate for serialization."""
1074
+ version = self.data_version or ""
1075
+ for node in self.iter_terms():
1076
+ if node.prefix != self.ontology:
1077
+ continue
1078
+ yield self._get_node_row(node, sep=sep, version=version)
1079
+
1080
+ def write_edges(self, path: str | Path) -> None:
1081
+ """Write a edges TSV file."""
1082
+ # node, this is actually taken care of as part of the cache configuration
1083
+ write_iterable_tsv(
1084
+ path=path,
1085
+ header=self.edges_header,
1086
+ it=self.iterate_edge_rows(),
1087
+ )
765
1088
 
766
- @property
767
- def _xrefs_path(self) -> Path:
768
- return self._cache(name="xrefs.tsv")
1089
+ def _path(self, *parts: str, name: str | None = None) -> Path:
1090
+ return prefix_directory_join(self.ontology, *parts, name=name, version=self.data_version)
769
1091
 
770
- @property
771
- def _relations_path(self) -> Path:
772
- return self._cache(name="relations.tsv")
773
-
774
- @property
775
- def _properties_path(self) -> Path:
776
- return self._cache(name="properties.tsv")
1092
+ def _get_cache_path(self, name: CacheArtifact) -> Path:
1093
+ return get_cache_path(self.ontology, name=name, version=self.data_version)
777
1094
 
778
1095
  @property
779
1096
  def _root_metadata_path(self) -> Path:
780
1097
  return prefix_directory_join(self.ontology, name="metadata.json")
781
1098
 
782
- @property
783
- def _versioned_metadata_path(self) -> Path:
784
- return self._cache(name="metadata.json")
785
-
786
1099
  @property
787
1100
  def _obo_path(self) -> Path:
788
- return get_prefix_obo_path(self.ontology, version=self.data_version)
1101
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.obo.gz")
789
1102
 
790
1103
  @property
791
1104
  def _obograph_path(self) -> Path:
792
- return self._path(name=f"{self.ontology}.json")
1105
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.json.gz")
793
1106
 
794
1107
  @property
795
1108
  def _owl_path(self) -> Path:
796
- return self._path(name=f"{self.ontology}.owl")
1109
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.owl.gz")
797
1110
 
798
1111
  @property
799
1112
  def _obonet_gz_path(self) -> Path:
800
- return self._path(name=f"{self.ontology}.obonet.json.gz")
1113
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.obonet.json.gz")
801
1114
 
802
1115
  @property
803
- def _nodes_path(self) -> Path:
804
- return self._path(name=f"{self.ontology}.nodes.tsv")
805
-
806
- def write_default(
807
- self,
808
- use_tqdm: bool = False,
809
- force: bool = False,
810
- write_obo: bool = False,
811
- write_obonet: bool = False,
812
- write_obograph: bool = False,
813
- write_owl: bool = False,
814
- write_nodes: bool = False,
815
- ) -> None:
816
- """Write the OBO to the default path."""
817
- metadata = self.get_metadata()
818
- for path in (self._root_metadata_path, self._versioned_metadata_path):
819
- logger.debug("[%s v%s] caching metadata to %s", self.ontology, self.data_version, path)
820
- with path.open("w") as file:
821
- json.dump(metadata, file, indent=2)
1116
+ def _ofn_path(self) -> Path:
1117
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.ofn.gz")
822
1118
 
823
- logger.debug(
824
- "[%s v%s] caching typedefs to %s", self.ontology, self.data_version, self._typedefs_path
825
- )
826
- typedef_df: pd.DataFrame = self.get_typedef_df()
827
- typedef_df.sort_values(list(typedef_df.columns), inplace=True)
828
- typedef_df.to_csv(self._typedefs_path, sep="\t", index=False)
1119
+ @property
1120
+ def _ttl_path(self) -> Path:
1121
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.ttl")
829
1122
 
830
- for label, path, header, fn in [
831
- ("names", self._names_path, [f"{self.ontology}_id", "name"], self.iterate_id_name),
1123
+ def _get_cache_config(self) -> list[tuple[CacheArtifact, Sequence[str], Callable]]:
1124
+ return [
1125
+ (CacheArtifact.names, [f"{self.ontology}_id", "name"], self.iterate_id_name),
832
1126
  (
833
- "definitions",
834
- self._definitions_path,
1127
+ CacheArtifact.definitions,
835
1128
  [f"{self.ontology}_id", "definition"],
836
1129
  self.iterate_id_definition,
837
1130
  ),
838
1131
  (
839
- "species",
840
- self._species_path,
1132
+ CacheArtifact.species,
841
1133
  [f"{self.ontology}_id", "taxonomy_id"],
842
1134
  self.iterate_id_species,
843
1135
  ),
1136
+ (CacheArtifact.alts, [f"{self.ontology}_id", "alt_id"], self.iterate_alt_rows),
1137
+ (CacheArtifact.mappings, SSSOM_DF_COLUMNS, self.iterate_mapping_rows),
1138
+ (CacheArtifact.relations, self.relations_header, self.iter_relation_rows),
1139
+ (CacheArtifact.edges, self.edges_header, self.iterate_edge_rows),
1140
+ (
1141
+ CacheArtifact.object_properties,
1142
+ self.object_properties_header,
1143
+ self.iter_object_properties,
1144
+ ),
1145
+ (
1146
+ CacheArtifact.literal_properties,
1147
+ self.literal_properties_header,
1148
+ self.iter_literal_properties,
1149
+ ),
844
1150
  (
845
- "synonyms",
846
- self._synonyms_path,
847
- [f"{self.ontology}_id", "synonym"],
848
- self.iterate_synonym_rows,
1151
+ CacheArtifact.literal_mappings,
1152
+ ssslm.LiteralMappingTuple._fields,
1153
+ self.iterate_literal_mapping_rows,
849
1154
  ),
850
- ("alts", self._alts_path, [f"{self.ontology}_id", "alt_id"], self.iterate_alt_rows),
851
- ("xrefs", self._xrefs_path, self.xrefs_header, self.iterate_xref_rows),
852
- ("relations", self._relations_path, self.relations_header, self.iter_relation_rows),
853
- ("properties", self._properties_path, self.properties_header, self.iter_property_rows),
854
- ]:
855
- if path.exists() and not force:
1155
+ ]
1156
+
1157
+ def write_metadata(self) -> None:
1158
+ """Write the metadata JSON file."""
1159
+ metadata = self.get_metadata()
1160
+ for path in (self._root_metadata_path, self._get_cache_path(CacheArtifact.metadata)):
1161
+ logger.debug("[%s] caching metadata to %s", self._prefix_version, path)
1162
+ with safe_open(path, read=False) as file:
1163
+ json.dump(metadata, file, indent=2)
1164
+
1165
+ def write_prefix_map(self) -> None:
1166
+ """Write a prefix map file that includes all prefixes used in this ontology."""
1167
+ with self._get_cache_path(CacheArtifact.prefixes).open("w") as file:
1168
+ json.dump(self._get_clean_idspaces(), file, indent=2)
1169
+
1170
+ def write_cache(self, *, force: bool = False) -> None:
1171
+ """Write cache parts."""
1172
+ typedefs_path = self._get_cache_path(CacheArtifact.typedefs)
1173
+ logger.debug(
1174
+ "[%s] caching typedefs to %s",
1175
+ self._prefix_version,
1176
+ typedefs_path,
1177
+ )
1178
+ typedef_df: pd.DataFrame = self.get_typedef_df()
1179
+ typedef_df.sort_values(list(typedef_df.columns), inplace=True)
1180
+ typedef_df.to_csv(typedefs_path, sep="\t", index=False)
1181
+
1182
+ for cache_artifact, header, fn in self._get_cache_config():
1183
+ path = self._get_cache_path(cache_artifact)
1184
+ if path.is_file() and not force:
856
1185
  continue
857
- logger.debug("[%s v%s] caching %s to %s", self.ontology, self.data_version, label, path)
1186
+ tqdm.write(
1187
+ f"[{self._prefix_version}] writing {cache_artifact.name} to {path}",
1188
+ )
858
1189
  write_iterable_tsv(
859
1190
  path=path,
860
1191
  header=header,
861
1192
  it=fn(), # type:ignore
862
1193
  )
863
1194
 
864
- for relation in (is_a, has_part, part_of, from_species, orthologous):
865
- if relation is not is_a and self.typedefs is not None and relation not in self.typedefs:
1195
+ typedefs = self._index_typedefs()
1196
+ for relation in (v.is_a, v.has_part, v.part_of, v.from_species, v.orthologous):
1197
+ if relation is not v.is_a and relation.pair not in typedefs:
866
1198
  continue
867
- relations_path = self._cache("relations", name=f"{relation.curie}.tsv")
868
- if relations_path.exists() and not force:
1199
+ relations_path = get_relation_cache_path(
1200
+ self.ontology, reference=relation, version=self.data_version
1201
+ )
1202
+ if relations_path.is_file() and not force:
869
1203
  continue
870
1204
  logger.debug(
871
- "[%s v%s] caching relation %s ! %s",
872
- self.ontology,
873
- self.data_version,
1205
+ "[%s] caching relation %s ! %s",
1206
+ self._prefix_version,
874
1207
  relation.curie,
875
1208
  relation.name,
876
1209
  )
@@ -880,36 +1213,82 @@ class Obo:
880
1213
  relation_df.sort_values(list(relation_df.columns), inplace=True)
881
1214
  relation_df.to_csv(relations_path, sep="\t", index=False)
882
1215
 
883
- if (write_obo or write_owl) and (not self._obo_path.exists() or force):
1216
+ def write_default(
1217
+ self,
1218
+ use_tqdm: bool = False,
1219
+ force: bool = False,
1220
+ write_obo: bool = False,
1221
+ write_obonet: bool = False,
1222
+ write_obograph: bool = False,
1223
+ write_owl: bool = False,
1224
+ write_ofn: bool = False,
1225
+ write_ttl: bool = False,
1226
+ write_nodes: bool = False,
1227
+ obograph_use_internal: bool = False,
1228
+ write_cache: bool = True,
1229
+ ) -> None:
1230
+ """Write the OBO to the default path."""
1231
+ self.write_metadata()
1232
+ self.write_prefix_map()
1233
+ if write_cache:
1234
+ self.write_cache(force=force)
1235
+ if write_obo and (not self._obo_path.is_file() or force):
1236
+ tqdm.write(f"[{self._prefix_version}] writing OBO to {self._obo_path}")
884
1237
  self.write_obo(self._obo_path, use_tqdm=use_tqdm)
885
- if write_obograph and (not self._obograph_path.exists() or force):
886
- self.write_obograph(self._obograph_path)
887
- if write_owl and (not self._owl_path.exists() or force):
888
- obo_to_owl(self._obo_path, self._owl_path)
889
- if write_obonet and (not self._obonet_gz_path.exists() or force):
890
- logger.debug("writing obonet to %s", self._obonet_gz_path)
1238
+ if (write_ofn or write_owl or write_obograph) and (not self._ofn_path.is_file() or force):
1239
+ tqdm.write(f"[{self._prefix_version}] writing OFN to {self._ofn_path}")
1240
+ self.write_ofn(self._ofn_path)
1241
+ if write_obograph and (not self._obograph_path.is_file() or force):
1242
+ if obograph_use_internal:
1243
+ tqdm.write(f"[{self._prefix_version}] writing OBO Graph to {self._obograph_path}")
1244
+ self.write_obograph(self._obograph_path)
1245
+ else:
1246
+ import bioontologies.robot
1247
+
1248
+ tqdm.write(
1249
+ f"[{self.ontology}] converting OFN to OBO Graph at {self._obograph_path}"
1250
+ )
1251
+ bioontologies.robot.convert(
1252
+ self._ofn_path, self._obograph_path, debug=True, merge=False, reason=False
1253
+ )
1254
+ if write_owl and (not self._owl_path.is_file() or force):
1255
+ tqdm.write(f"[{self._prefix_version}] writing OWL to {self._owl_path}")
1256
+ import bioontologies.robot
1257
+
1258
+ bioontologies.robot.convert(
1259
+ self._ofn_path, self._owl_path, debug=True, merge=False, reason=False
1260
+ )
1261
+ if write_ttl and (not self._ttl_path.is_file() or force):
1262
+ tqdm.write(f"[{self._prefix_version}] writing Turtle to {self._ttl_path}")
1263
+ self.write_rdf(self._ttl_path)
1264
+ if write_obonet and (not self._obonet_gz_path.is_file() or force):
1265
+ tqdm.write(f"[{self._prefix_version}] writing obonet to {self._obonet_gz_path}")
891
1266
  self.write_obonet_gz(self._obonet_gz_path)
892
1267
  if write_nodes:
893
- self.get_graph().get_nodes_df().to_csv(self._nodes_path, sep="\t", index=False)
1268
+ nodes_path = self._get_cache_path(CacheArtifact.nodes)
1269
+ tqdm.write(f"[{self._prefix_version}] writing nodes TSV to {nodes_path}")
1270
+ self.write_nodes(nodes_path)
894
1271
 
895
1272
  @property
896
- def _items_accessor(self):
1273
+ def _items_accessor(self) -> list[Term]:
897
1274
  if self._items is None:
898
- key = self.term_sort_key or attrgetter("curie")
899
- self._items = sorted(self.iter_terms(force=self.force), key=key)
1275
+ # if the term sort key is None, then the terms get sorted by their reference
1276
+ self._items = sorted(
1277
+ self.iter_terms(force=self.force),
1278
+ )
900
1279
  return self._items
901
1280
 
902
- def __iter__(self) -> Iterator["Term"]:
903
- if self.iter_only:
904
- return iter(self.iter_terms(force=self.force))
905
- return iter(self._items_accessor)
1281
+ def __iter__(self) -> Iterator[Term]:
1282
+ yield from self._iter_terms_safe()
906
1283
 
907
1284
  def ancestors(self, identifier: str) -> set[str]:
908
1285
  """Return a set of identifiers for parents of the given identifier."""
1286
+ # FIXME switch to references
909
1287
  return nx.descendants(self.hierarchy, identifier) # note this is backwards
910
1288
 
911
1289
  def descendants(self, identifier: str) -> set[str]:
912
1290
  """Return a set of identifiers for the children of the given identifier."""
1291
+ # FIXME switch to references
913
1292
  return nx.ancestors(self.hierarchy, identifier) # note this is backwards
914
1293
 
915
1294
  def is_descendant(self, descendant: str, ancestor: str) -> bool:
@@ -917,9 +1296,9 @@ class Obo:
917
1296
 
918
1297
  .. code-block:: python
919
1298
 
920
- from pyobo import get_obo
1299
+ from pyobo import get_ontology
921
1300
 
922
- obo = get_obo("go")
1301
+ obo = get_ontology("go")
923
1302
 
924
1303
  interleukin_10_complex = "1905571" # interleukin-10 receptor complex
925
1304
  all_complexes = "0032991"
@@ -935,21 +1314,22 @@ class Obo:
935
1314
 
936
1315
  .. code-block:: python
937
1316
 
938
- from pyobo import get_obo
1317
+ from pyobo import get_ontology
939
1318
 
940
- obo = get_obo("go")
1319
+ obo = get_ontology("go")
941
1320
 
942
1321
  identifier = "1905571" # interleukin-10 receptor complex
943
1322
  is_complex = "0032991" in nx.descendants(obo.hierarchy, identifier) # should be true
944
1323
  """
945
1324
  if self._hierarchy is None:
946
1325
  self._hierarchy = nx.DiGraph()
947
- for term in self._iter_terms(desc=f"[{self.ontology}] getting hierarchy"):
948
- for parent in term.parents:
949
- self._hierarchy.add_edge(term.identifier, parent.identifier)
1326
+ for stanza in self._iter_stanzas(desc=f"[{self.ontology}] getting hierarchy"):
1327
+ for parent in stanza.parents:
1328
+ # FIXME add referneces
1329
+ self._hierarchy.add_edge(stanza.identifier, parent.identifier)
950
1330
  return self._hierarchy
951
1331
 
952
- def to_obonet(self: "Obo", *, use_tqdm: bool = False) -> nx.MultiDiGraph:
1332
+ def to_obonet(self: Obo, *, use_tqdm: bool = False) -> nx.MultiDiGraph:
953
1333
  """Export as a :mod`obonet` style graph."""
954
1334
  rv = nx.MultiDiGraph()
955
1335
  rv.graph.update(
@@ -957,55 +1337,64 @@ class Obo:
957
1337
  "name": self.name,
958
1338
  "ontology": self.ontology,
959
1339
  "auto-generated-by": self.auto_generated_by,
960
- "typedefs": _convert_typedefs(self.typedefs),
961
- "format-version": self.format_version,
1340
+ "format-version": FORMAT_VERSION,
962
1341
  "data-version": self.data_version,
963
- "synonymtypedef": _convert_synonym_typedefs(self.synonym_typedefs),
964
1342
  "date": self.date_formatted,
1343
+ "typedefs": [typedef.reference.model_dump() for typedef in self.typedefs or []],
1344
+ "synonymtypedef": [
1345
+ synonym_typedef.to_obo(ontology_prefix=self.ontology)
1346
+ for synonym_typedef in self.synonym_typedefs or []
1347
+ ],
965
1348
  }
966
1349
  )
967
1350
 
968
1351
  nodes = {}
1352
+ #: a list of 3-tuples u,v,k
969
1353
  links = []
970
- for term in self._iter_terms(use_tqdm=use_tqdm):
1354
+ typedefs = self._index_typedefs()
1355
+ synonym_typedefs = self._index_synonym_typedefs()
1356
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
971
1357
  parents = []
972
- for parent in term.parents:
1358
+ for parent in stanza.parents:
973
1359
  if parent is None:
974
1360
  raise ValueError("parent should not be none!")
975
- links.append((term.curie, "is_a", parent.curie))
1361
+ links.append((stanza.curie, "is_a", parent.curie))
976
1362
  parents.append(parent.curie)
977
1363
 
978
1364
  relations = []
979
- for typedef, target in term.iterate_relations():
980
- if target is None:
981
- raise ValueError("target should not be none!")
1365
+ for typedef, target in stanza.iterate_relations():
982
1366
  relations.append(f"{typedef.curie} {target.curie}")
983
- links.append((term.curie, typedef.curie, target.curie))
1367
+ links.append((stanza.curie, typedef.curie, target.curie))
1368
+
1369
+ for typedef, targets in sorted(stanza.properties.items()):
1370
+ for target_or_literal in targets:
1371
+ if isinstance(target_or_literal, curies.Reference):
1372
+ links.append((stanza.curie, typedef.curie, target_or_literal.curie))
984
1373
 
985
1374
  d = {
986
- "id": term.curie,
987
- "name": term.name,
988
- "def": term.definition and term._definition_fp(),
989
- "xref": [xref.curie for xref in term.xrefs],
1375
+ "id": stanza.curie,
1376
+ "name": stanza.name,
1377
+ "def": stanza.definition and stanza._definition_fp(),
1378
+ "xref": [xref.curie for xref in stanza.xrefs],
990
1379
  "is_a": parents,
991
1380
  "relationship": relations,
992
- "synonym": [synonym._fp() for synonym in term.synonyms],
993
- "property_value": [
994
- f"{prop} {value}"
995
- for prop, values in term.properties.items()
996
- for value in values
1381
+ "synonym": [
1382
+ synonym._fp(ontology_prefix=self.ontology, synonym_typedefs=synonym_typedefs)
1383
+ for synonym in stanza.synonyms
997
1384
  ],
1385
+ "property_value": list(
1386
+ stanza._iterate_obo_properties(ontology_prefix=self.ontology, typedefs=typedefs)
1387
+ ),
998
1388
  }
999
- nodes[term.curie] = {k: v for k, v in d.items() if v}
1389
+ nodes[stanza.curie] = {k: v for k, v in d.items() if v}
1000
1390
 
1001
1391
  rv.add_nodes_from(nodes.items())
1002
1392
  for _source, _key, _target in links:
1003
1393
  rv.add_edge(_source, _target, key=_key)
1004
1394
 
1005
1395
  logger.info(
1006
- "[%s v%s] exported graph with %d nodes",
1007
- self.ontology,
1008
- self.data_version,
1396
+ "[%s] exported graph with %d nodes",
1397
+ self._prefix_version,
1009
1398
  rv.number_of_nodes(),
1010
1399
  )
1011
1400
  return rv
@@ -1017,11 +1406,21 @@ class Obo:
1017
1406
  "date": self.date and self.date.isoformat(),
1018
1407
  }
1019
1408
 
1409
+ def iterate_references(self, *, use_tqdm: bool = False) -> Iterable[Reference]:
1410
+ """Iterate over identifiers."""
1411
+ for stanza in self._iter_stanzas(
1412
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting identifiers"
1413
+ ):
1414
+ if self._in_ontology(stanza.reference):
1415
+ yield stanza.reference
1416
+
1020
1417
  def iterate_ids(self, *, use_tqdm: bool = False) -> Iterable[str]:
1021
1418
  """Iterate over identifiers."""
1022
- for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"):
1023
- if term.prefix == self.ontology:
1024
- yield term.identifier
1419
+ for stanza in self._iter_stanzas(
1420
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting identifiers"
1421
+ ):
1422
+ if self._in_ontology_strict(stanza.reference):
1423
+ yield stanza.identifier
1025
1424
 
1026
1425
  def get_ids(self, *, use_tqdm: bool = False) -> set[str]:
1027
1426
  """Get the set of identifiers."""
@@ -1029,9 +1428,11 @@ class Obo:
1029
1428
 
1030
1429
  def iterate_id_name(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
1031
1430
  """Iterate identifier name pairs."""
1032
- for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"):
1033
- if term.prefix == self.ontology and term.name:
1034
- yield term.identifier, term.name
1431
+ for stanza in self._iter_stanzas(
1432
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"
1433
+ ):
1434
+ if self._in_ontology(stanza.reference) and stanza.name:
1435
+ yield stanza.identifier, stanza.name
1035
1436
 
1036
1437
  def get_id_name_mapping(self, *, use_tqdm: bool = False) -> Mapping[str, str]:
1037
1438
  """Get a mapping from identifiers to names."""
@@ -1039,11 +1440,13 @@ class Obo:
1039
1440
 
1040
1441
  def iterate_id_definition(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
1041
1442
  """Iterate over pairs of terms' identifiers and their respective definitions."""
1042
- for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"):
1043
- if term.identifier and term.definition:
1443
+ for stanza in self._iter_stanzas(
1444
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"
1445
+ ):
1446
+ if stanza.identifier and stanza.definition:
1044
1447
  yield (
1045
- term.identifier,
1046
- term.definition.strip('"')
1448
+ stanza.identifier,
1449
+ stanza.definition.strip('"')
1047
1450
  .replace("\n", " ")
1048
1451
  .replace("\t", " ")
1049
1452
  .replace(" ", " "),
@@ -1056,11 +1459,11 @@ class Obo:
1056
1459
  def get_obsolete(self, *, use_tqdm: bool = False) -> set[str]:
1057
1460
  """Get the set of obsolete identifiers."""
1058
1461
  return {
1059
- term.identifier
1060
- for term in self._iter_terms(
1462
+ stanza.identifier
1463
+ for stanza in self._iter_stanzas(
1061
1464
  use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting obsolete"
1062
1465
  )
1063
- if term.identifier and term.is_obsolete
1466
+ if stanza.identifier and stanza.is_obsolete
1064
1467
  }
1065
1468
 
1066
1469
  ############
@@ -1068,18 +1471,19 @@ class Obo:
1068
1471
  ############
1069
1472
 
1070
1473
  def iterate_id_species(
1071
- self, *, prefix: Optional[str] = None, use_tqdm: bool = False
1474
+ self, *, prefix: str | None = None, use_tqdm: bool = False
1072
1475
  ) -> Iterable[tuple[str, str]]:
1073
1476
  """Iterate over terms' identifiers and respective species (if available)."""
1074
1477
  if prefix is None:
1075
1478
  prefix = NCBITAXON_PREFIX
1076
- for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting species"):
1077
- species = term.get_species(prefix=prefix)
1078
- if species:
1079
- yield term.identifier, species.identifier
1479
+ for stanza in self._iter_stanzas(
1480
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting species"
1481
+ ):
1482
+ if isinstance(stanza, Term) and (species := stanza.get_species(prefix=prefix)):
1483
+ yield stanza.identifier, species.identifier
1080
1484
 
1081
1485
  def get_id_species_mapping(
1082
- self, *, prefix: Optional[str] = None, use_tqdm: bool = False
1486
+ self, *, prefix: str | None = None, use_tqdm: bool = False
1083
1487
  ) -> Mapping[str, str]:
1084
1488
  """Get a mapping from identifiers to species."""
1085
1489
  return dict(self.iterate_id_species(prefix=prefix, use_tqdm=use_tqdm))
@@ -1109,42 +1513,103 @@ class Obo:
1109
1513
  # PROPS #
1110
1514
  #########
1111
1515
 
1112
- def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term, str, str]]:
1516
+ def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[Stanza, Annotation]]:
1113
1517
  """Iterate over tuples of terms, properties, and their values."""
1114
- # TODO if property_prefix is set, try removing that as a prefix from all prop strings.
1115
- for term in self._iter_terms(
1518
+ for stanza in self._iter_stanzas(
1116
1519
  use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting properties"
1117
1520
  ):
1118
- for prop, value in term.iterate_properties():
1119
- yield term, prop, value
1521
+ for property_tuple in stanza.get_property_annotations():
1522
+ yield stanza, property_tuple
1120
1523
 
1121
1524
  @property
1122
1525
  def properties_header(self):
1123
1526
  """Property dataframe header."""
1124
- return [f"{self.ontology}_id", "property", "value"]
1527
+ return [f"{self.ontology}_id", "property", "value", "datatype", "language"]
1125
1528
 
1126
- def iter_property_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
1529
+ @property
1530
+ def object_properties_header(self):
1531
+ """Property dataframe header."""
1532
+ return ["source", "predicate", "target"]
1533
+
1534
+ @property
1535
+ def literal_properties_header(self):
1536
+ """Property dataframe header."""
1537
+ return ["source", "predicate", "target", "datatype", "language"]
1538
+
1539
+ def _iter_property_rows(
1540
+ self, *, use_tqdm: bool = False
1541
+ ) -> Iterable[tuple[str, str, str, str, str]]:
1127
1542
  """Iterate property rows."""
1128
- for term, prop, value in self.iterate_properties(use_tqdm=use_tqdm):
1129
- yield term.identifier, prop, value
1543
+ for term, t in self.iterate_properties(use_tqdm=use_tqdm):
1544
+ pred = term._reference(t.predicate, ontology_prefix=self.ontology)
1545
+ match t.value:
1546
+ case OBOLiteral(value, datatype, language):
1547
+ yield (
1548
+ term.identifier,
1549
+ pred,
1550
+ value,
1551
+ get_preferred_curie(datatype),
1552
+ language or "",
1553
+ )
1554
+ case Reference() as obj:
1555
+ yield term.identifier, pred, get_preferred_curie(obj), "", ""
1556
+ case _:
1557
+ raise TypeError(f"got: {type(t)} - {t}")
1558
+
1559
+ def get_properties_df(self, *, use_tqdm: bool = False, drop_na: bool = True) -> pd.DataFrame:
1560
+ """Get all properties as a dataframe."""
1561
+ df = pd.DataFrame(
1562
+ self._iter_property_rows(use_tqdm=use_tqdm),
1563
+ columns=self.properties_header,
1564
+ )
1565
+ if drop_na:
1566
+ df.dropna(inplace=True)
1567
+ return df
1568
+
1569
+ def iter_object_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
1570
+ """Iterate over object property triples."""
1571
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
1572
+ for predicate, target in stanza.iterate_object_properties():
1573
+ yield stanza.curie, predicate.curie, target.curie
1130
1574
 
1131
- def get_properties_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
1575
+ def get_object_properties_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
1132
1576
  """Get all properties as a dataframe."""
1133
1577
  return pd.DataFrame(
1134
- list(self.iter_property_rows(use_tqdm=use_tqdm)),
1135
- columns=self.properties_header,
1578
+ self.iter_object_properties(use_tqdm=use_tqdm), columns=self.object_properties_header
1136
1579
  )
1137
1580
 
1581
+ def iter_literal_properties(
1582
+ self, *, use_tqdm: bool = False
1583
+ ) -> Iterable[tuple[str, str, str, str, str]]:
1584
+ """Iterate over literal properties quads."""
1585
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
1586
+ for predicate, target in stanza.iterate_literal_properties():
1587
+ yield (
1588
+ stanza.curie,
1589
+ predicate.curie,
1590
+ target.value,
1591
+ target.datatype.curie,
1592
+ target.language or "",
1593
+ )
1594
+
1595
+ def get_literal_properties_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
1596
+ """Get all properties as a dataframe."""
1597
+ return pd.DataFrame(self.iter_literal_properties(), columns=self.literal_properties_header)
1598
+
1138
1599
  def iterate_filtered_properties(
1139
- self, prop: str, *, use_tqdm: bool = False
1140
- ) -> Iterable[tuple[Term, str]]:
1600
+ self, prop: ReferenceHint, *, use_tqdm: bool = False
1601
+ ) -> Iterable[tuple[Stanza, str]]:
1141
1602
  """Iterate over tuples of terms and the values for the given property."""
1142
- for term in self._iter_terms(use_tqdm=use_tqdm):
1143
- for _prop, value in term.iterate_properties():
1144
- if _prop == prop:
1145
- yield term, value
1146
-
1147
- def get_filtered_properties_df(self, prop: str, *, use_tqdm: bool = False) -> pd.DataFrame:
1603
+ prop = _ensure_ref(prop)
1604
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
1605
+ for t in stanza.get_property_annotations():
1606
+ if t.predicate != prop:
1607
+ continue
1608
+ yield stanza, reference_or_literal_to_str(t.value)
1609
+
1610
+ def get_filtered_properties_df(
1611
+ self, prop: ReferenceHint, *, use_tqdm: bool = False
1612
+ ) -> pd.DataFrame:
1148
1613
  """Get a dataframe of terms' identifiers to the given property's values."""
1149
1614
  return pd.DataFrame(
1150
1615
  list(self.get_filtered_properties_mapping(prop, use_tqdm=use_tqdm).items()),
@@ -1152,7 +1617,7 @@ class Obo:
1152
1617
  )
1153
1618
 
1154
1619
  def get_filtered_properties_mapping(
1155
- self, prop: str, *, use_tqdm: bool = False
1620
+ self, prop: ReferenceHint, *, use_tqdm: bool = False
1156
1621
  ) -> Mapping[str, str]:
1157
1622
  """Get a mapping from a term's identifier to the property.
1158
1623
 
@@ -1164,7 +1629,7 @@ class Obo:
1164
1629
  }
1165
1630
 
1166
1631
  def get_filtered_properties_multimapping(
1167
- self, prop: str, *, use_tqdm: bool = False
1632
+ self, prop: ReferenceHint, *, use_tqdm: bool = False
1168
1633
  ) -> Mapping[str, list[str]]:
1169
1634
  """Get a mapping from a term's identifier to the property values."""
1170
1635
  return multidict(
@@ -1176,22 +1641,63 @@ class Obo:
1176
1641
  # RELATIONS #
1177
1642
  #############
1178
1643
 
1644
+ def iterate_edges(
1645
+ self, *, use_tqdm: bool = False
1646
+ ) -> Iterable[tuple[Stanza, TypeDef, Reference]]:
1647
+ """Iterate over triples of terms, relations, and their targets."""
1648
+ _warned: set[ReferenceTuple] = set()
1649
+ typedefs = self._index_typedefs()
1650
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm, desc=f"[{self.ontology}] edge"):
1651
+ for predicate, reference in stanza._iter_edges():
1652
+ if td := self._get_typedef(stanza, predicate, _warned, typedefs):
1653
+ yield stanza, td, reference
1654
+
1655
+ @property
1656
+ def edges_header(self) -> Sequence[str]:
1657
+ """Header for the edges dataframe."""
1658
+ return [":START_ID", ":TYPE", ":END_ID"]
1659
+
1179
1660
  def iterate_relations(
1180
1661
  self, *, use_tqdm: bool = False
1181
- ) -> Iterable[tuple[Term, TypeDef, Reference]]:
1182
- """Iterate over tuples of terms, relations, and their targets."""
1183
- for term in self._iter_terms(
1184
- use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting relations"
1185
- ):
1186
- for parent in term.parents:
1187
- yield term, is_a, parent
1188
- for typedef, reference in term.iterate_relations():
1189
- if (self.typedefs is None or typedef not in self.typedefs) and (
1190
- typedef.prefix,
1191
- typedef.identifier,
1192
- ) not in default_typedefs:
1193
- raise ValueError(f"Undefined typedef: {typedef.curie} ! {typedef.name}")
1194
- yield term, typedef, reference
1662
+ ) -> Iterable[tuple[Stanza, TypeDef, Reference]]:
1663
+ """Iterate over tuples of terms, relations, and their targets.
1664
+
1665
+ This only outputs stuff from the `relationship:` tag, not
1666
+ all possible triples. For that, see :func:`iterate_edges`.
1667
+ """
1668
+ _warned: set[ReferenceTuple] = set()
1669
+ typedefs = self._index_typedefs()
1670
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm, desc=f"[{self.ontology}] relation"):
1671
+ for predicate, reference in stanza.iterate_relations():
1672
+ if td := self._get_typedef(stanza, predicate, _warned, typedefs):
1673
+ yield stanza, td, reference
1674
+
1675
+ def get_edges_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
1676
+ """Get an edges dataframe."""
1677
+ return pd.DataFrame(self.iterate_edge_rows(use_tqdm=use_tqdm), columns=self.edges_header)
1678
+
1679
+ def iterate_edge_rows(self, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
1680
+ """Iterate the edge rows."""
1681
+ for term, typedef, reference in self.iterate_edges(use_tqdm=use_tqdm):
1682
+ yield term.curie, typedef.curie, reference.curie
1683
+
1684
+ def _get_typedef(
1685
+ self,
1686
+ term: Stanza,
1687
+ predicate: Reference,
1688
+ _warned: set[ReferenceTuple],
1689
+ typedefs: Mapping[ReferenceTuple, TypeDef],
1690
+ ) -> TypeDef | None:
1691
+ pp = predicate.pair
1692
+ if pp in typedefs:
1693
+ return typedefs[pp]
1694
+ if pp not in _warned:
1695
+ _warn_string = f"[{term.curie}] undefined typedef: {pp}"
1696
+ if predicate.name:
1697
+ _warn_string += f" ({predicate.name})"
1698
+ logger.warning(_warn_string)
1699
+ _warned.add(pp)
1700
+ return None
1195
1701
 
1196
1702
  def iter_relation_rows(
1197
1703
  self, use_tqdm: bool = False
@@ -1208,14 +1714,14 @@ class Obo:
1208
1714
 
1209
1715
  def iterate_filtered_relations(
1210
1716
  self,
1211
- relation: RelationHint,
1717
+ relation: ReferenceHint,
1212
1718
  *,
1213
1719
  use_tqdm: bool = False,
1214
- ) -> Iterable[tuple[Term, Reference]]:
1720
+ ) -> Iterable[tuple[Stanza, Reference]]:
1215
1721
  """Iterate over tuples of terms and ther targets for the given relation."""
1216
- _target_prefix, _target_identifier = get_reference_tuple(relation)
1217
- for term, typedef, reference in self.iterate_relations(use_tqdm=use_tqdm):
1218
- if typedef.prefix == _target_prefix and typedef.identifier == _target_identifier:
1722
+ _pair = _ensure_ref(relation, ontology_prefix=self.ontology).pair
1723
+ for term, predicate, reference in self.iterate_relations(use_tqdm=use_tqdm):
1724
+ if _pair == predicate.pair:
1219
1725
  yield term, reference
1220
1726
 
1221
1727
  @property
@@ -1232,7 +1738,7 @@ class Obo:
1232
1738
 
1233
1739
  def get_filtered_relations_df(
1234
1740
  self,
1235
- relation: RelationHint,
1741
+ relation: ReferenceHint,
1236
1742
  *,
1237
1743
  use_tqdm: bool = False,
1238
1744
  ) -> pd.DataFrame:
@@ -1247,11 +1753,11 @@ class Obo:
1247
1753
 
1248
1754
  def iterate_filtered_relations_filtered_targets(
1249
1755
  self,
1250
- relation: RelationHint,
1756
+ relation: ReferenceHint,
1251
1757
  target_prefix: str,
1252
1758
  *,
1253
1759
  use_tqdm: bool = False,
1254
- ) -> Iterable[tuple[Term, Reference]]:
1760
+ ) -> Iterable[tuple[Stanza, Reference]]:
1255
1761
  """Iterate over relationships between one identifier and another."""
1256
1762
  for term, reference in self.iterate_filtered_relations(
1257
1763
  relation=relation, use_tqdm=use_tqdm
@@ -1261,7 +1767,7 @@ class Obo:
1261
1767
 
1262
1768
  def get_relation_mapping(
1263
1769
  self,
1264
- relation: RelationHint,
1770
+ relation: ReferenceHint,
1265
1771
  target_prefix: str,
1266
1772
  *,
1267
1773
  use_tqdm: bool = False,
@@ -1272,8 +1778,8 @@ class Obo:
1272
1778
 
1273
1779
  Example usage: get homology between HGNC and MGI:
1274
1780
 
1275
- >>> from pyobo.sources.hgnc import get_obo
1276
- >>> obo = get_obo()
1781
+ >>> from pyobo.sources.hgnc import HGNCGetter
1782
+ >>> obo = HGNCGetter()
1277
1783
  >>> human_mapt_hgnc_id = "6893"
1278
1784
  >>> mouse_mapt_mgi_id = "97180"
1279
1785
  >>> hgnc_mgi_orthology_mapping = obo.get_relation_mapping("ro:HOM0000017", "mgi")
@@ -1291,15 +1797,15 @@ class Obo:
1291
1797
  def get_relation(
1292
1798
  self,
1293
1799
  source_identifier: str,
1294
- relation: RelationHint,
1800
+ relation: ReferenceHint,
1295
1801
  target_prefix: str,
1296
1802
  *,
1297
1803
  use_tqdm: bool = False,
1298
- ) -> Optional[str]:
1804
+ ) -> str | None:
1299
1805
  """Get the value for a bijective relation mapping between this resource and a target resource.
1300
1806
 
1301
- >>> from pyobo.sources.hgnc import get_obo
1302
- >>> obo = get_obo()
1807
+ >>> from pyobo.sources.hgnc import HGNCGetter
1808
+ >>> obo = HGNCGetter()
1303
1809
  >>> human_mapt_hgnc_id = "6893"
1304
1810
  >>> mouse_mapt_mgi_id = "97180"
1305
1811
  >>> assert mouse_mapt_mgi_id == obo.get_relation(human_mapt_hgnc_id, "ro:HOM0000017", "mgi")
@@ -1311,7 +1817,7 @@ class Obo:
1311
1817
 
1312
1818
  def get_relation_multimapping(
1313
1819
  self,
1314
- relation: RelationHint,
1820
+ relation: ReferenceHint,
1315
1821
  target_prefix: str,
1316
1822
  *,
1317
1823
  use_tqdm: bool = False,
@@ -1334,22 +1840,24 @@ class Obo:
1334
1840
  ) -> Mapping[str, list[Reference]]:
1335
1841
  """Get a mapping from identifiers to a list of all references for the given relation."""
1336
1842
  return multidict(
1337
- (term.identifier, reference)
1338
- for term in self._iter_terms(
1843
+ (stanza.identifier, reference)
1844
+ for stanza in self._iter_stanzas(
1339
1845
  use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting {typedef.curie}"
1340
1846
  )
1341
- for reference in term.get_relationships(typedef)
1847
+ for reference in stanza.get_relationships(typedef)
1342
1848
  )
1343
1849
 
1344
1850
  ############
1345
1851
  # SYNONYMS #
1346
1852
  ############
1347
1853
 
1348
- def iterate_synonyms(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term, Synonym]]:
1854
+ def iterate_synonyms(self, *, use_tqdm: bool = False) -> Iterable[tuple[Stanza, Synonym]]:
1349
1855
  """Iterate over pairs of term and synonym object."""
1350
- for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting synonyms"):
1351
- for synonym in sorted(term.synonyms, key=attrgetter("name")):
1352
- yield term, synonym
1856
+ for stanza in self._iter_stanzas(
1857
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting synonyms"
1858
+ ):
1859
+ for synonym in sorted(stanza.synonyms):
1860
+ yield stanza, synonym
1353
1861
 
1354
1862
  def iterate_synonym_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
1355
1863
  """Iterate over pairs of identifier and synonym text."""
@@ -1360,40 +1868,95 @@ class Obo:
1360
1868
  """Get a mapping from identifiers to a list of sorted synonym strings."""
1361
1869
  return multidict(self.iterate_synonym_rows(use_tqdm=use_tqdm))
1362
1870
 
1871
+ def get_literal_mappings(self) -> Iterable[ssslm.LiteralMapping]:
1872
+ """Get literal mappings in a standard data model."""
1873
+ stanzas: Iterable[Stanza] = itt.chain(self, self.typedefs or [])
1874
+ yield from itt.chain.from_iterable(
1875
+ stanza.get_literal_mappings()
1876
+ for stanza in stanzas
1877
+ if self._in_ontology(stanza.reference)
1878
+ )
1879
+
1880
+ def _in_ontology(self, reference: Reference | Referenced) -> bool:
1881
+ return self._in_ontology_strict(reference) or self._in_ontology_aux(reference)
1882
+
1883
+ def _in_ontology_strict(self, reference: Reference | Referenced) -> bool:
1884
+ return reference.prefix == self.ontology
1885
+
1886
+ def _in_ontology_aux(self, reference: Reference | Referenced) -> bool:
1887
+ return reference.prefix == "obo" and reference.identifier.startswith(self.ontology + "#")
1888
+
1363
1889
  #########
1364
1890
  # XREFS #
1365
1891
  #########
1366
1892
 
1367
- def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term, Reference]]:
1893
+ def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[tuple[Stanza, Reference]]:
1368
1894
  """Iterate over xrefs."""
1369
- for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting xrefs"):
1370
- for xref in term.xrefs:
1371
- yield term, xref
1895
+ for stanza in self._iter_stanzas(
1896
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting xrefs"
1897
+ ):
1898
+ xrefs = {xref for _, xref in stanza.get_mappings(add_context=False)}
1899
+ for xref in sorted(xrefs):
1900
+ yield stanza, xref
1372
1901
 
1373
1902
  def iterate_filtered_xrefs(
1374
1903
  self, prefix: str, *, use_tqdm: bool = False
1375
- ) -> Iterable[tuple[Term, Reference]]:
1904
+ ) -> Iterable[tuple[Stanza, Reference]]:
1376
1905
  """Iterate over xrefs to a given prefix."""
1377
1906
  for term, xref in self.iterate_xrefs(use_tqdm=use_tqdm):
1378
1907
  if xref.prefix == prefix:
1379
1908
  yield term, xref
1380
1909
 
1381
- def iterate_xref_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
1382
- """Iterate over terms' identifiers, xref prefixes, and xref identifiers."""
1383
- for term, xref in self.iterate_xrefs(use_tqdm=use_tqdm):
1384
- yield term.identifier, xref.prefix, xref.identifier
1910
+ def iterate_literal_mapping_rows(self) -> Iterable[ssslm.LiteralMappingTuple]:
1911
+ """Iterate over literal mapping rows."""
1912
+ for synonym in self.get_literal_mappings():
1913
+ yield synonym._as_row()
1385
1914
 
1386
- @property
1387
- def xrefs_header(self):
1388
- """The header for the xref dataframe."""
1389
- return [f"{self.ontology}_id", TARGET_PREFIX, TARGET_ID]
1915
+ def get_literal_mappings_df(self) -> pd.DataFrame:
1916
+ """Get a literal mappings dataframe."""
1917
+ return ssslm.literal_mappings_to_df(self.get_literal_mappings())
1390
1918
 
1391
- def get_xrefs_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
1392
- """Get a dataframe of all xrefs extracted from the OBO document."""
1393
- return pd.DataFrame(
1394
- list(self.iterate_xref_rows(use_tqdm=use_tqdm)),
1395
- columns=[f"{self.ontology}_id", TARGET_PREFIX, TARGET_ID],
1396
- ).drop_duplicates()
1919
+ def iterate_mapping_rows(
1920
+ self, *, use_tqdm: bool = False
1921
+ ) -> Iterable[tuple[str, str, str, str, str, float | None, str | None]]:
1922
+ """Iterate over SSSOM rows for mappings."""
1923
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
1924
+ for predicate, obj_ref, context in stanza.get_mappings(
1925
+ include_xrefs=True, add_context=True
1926
+ ):
1927
+ yield (
1928
+ get_preferred_curie(stanza),
1929
+ stanza.name,
1930
+ get_preferred_curie(obj_ref),
1931
+ get_preferred_curie(predicate),
1932
+ get_preferred_curie(context.justification),
1933
+ context.confidence if context.confidence is not None else None,
1934
+ get_preferred_curie(context.contributor) if context.contributor else None,
1935
+ )
1936
+
1937
+ def get_mappings_df(
1938
+ self,
1939
+ *,
1940
+ use_tqdm: bool = False,
1941
+ include_subject_labels: bool = False,
1942
+ include_mapping_source_column: bool = False,
1943
+ ) -> pd.DataFrame:
1944
+ """Get a dataframe with SSSOM extracted from the OBO document."""
1945
+ df = pd.DataFrame(self.iterate_mapping_rows(use_tqdm=use_tqdm), columns=SSSOM_DF_COLUMNS)
1946
+ if not include_subject_labels:
1947
+ del df["subject_label"]
1948
+
1949
+ # if no confidences/contributor, remove that column
1950
+ for c in ["confidence", "contributor"]:
1951
+ if df[c].isna().all():
1952
+ del df[c]
1953
+
1954
+ # append on the mapping_source
1955
+ # (https://mapping-commons.github.io/sssom/mapping_source/)
1956
+ if include_mapping_source_column:
1957
+ df["mapping_source"] = self.ontology
1958
+
1959
+ return df
1397
1960
 
1398
1961
  def get_filtered_xrefs_mapping(
1399
1962
  self, prefix: str, *, use_tqdm: bool = False
@@ -1417,11 +1980,12 @@ class Obo:
1417
1980
  # ALTS #
1418
1981
  ########
1419
1982
 
1420
- def iterate_alts(self) -> Iterable[tuple[Term, Reference]]:
1983
+ def iterate_alts(self) -> Iterable[tuple[Stanza, Reference]]:
1421
1984
  """Iterate over alternative identifiers."""
1422
- for term in self:
1423
- for alt in term.alt_ids:
1424
- yield term, alt
1985
+ for stanza in self._iter_stanzas():
1986
+ if self._in_ontology(stanza):
1987
+ for alt in stanza.alt_ids:
1988
+ yield stanza, alt
1425
1989
 
1426
1990
  def iterate_alt_rows(self) -> Iterable[tuple[str, str]]:
1427
1991
  """Iterate over pairs of terms' primary identifiers and alternate identifiers."""
@@ -1433,33 +1997,315 @@ class Obo:
1433
1997
  return multidict((term.identifier, alt.identifier) for term, alt in self.iterate_alts())
1434
1998
 
1435
1999
 
2000
+ @dataclass
2001
+ class TypeDef(Stanza):
2002
+ """A type definition in OBO.
2003
+
2004
+ See the subsection of https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.2.2.
2005
+ """
2006
+
2007
+ reference: Annotated[Reference, 1]
2008
+ is_anonymous: Annotated[bool | None, 2] = None
2009
+ # 3 - name is covered by reference
2010
+ namespace: Annotated[str | None, 4] = None
2011
+ # 5 alt_id is part of proerties
2012
+ definition: Annotated[str | None, 6] = None
2013
+ comment: Annotated[str | None, 7] = None
2014
+ subsets: Annotated[list[Reference], 8] = field(default_factory=list)
2015
+ synonyms: Annotated[list[Synonym], 9] = field(default_factory=list)
2016
+ xrefs: Annotated[list[Reference], 10] = field(default_factory=list)
2017
+ _axioms: AnnotationsDict = field(default_factory=lambda: defaultdict(list))
2018
+ properties: Annotated[PropertiesHint, 11] = field(default_factory=lambda: defaultdict(list))
2019
+ domain: Annotated[Reference | None, 12, "typedef-only"] = None
2020
+ range: Annotated[Reference | None, 13, "typedef-only"] = None
2021
+ builtin: Annotated[bool | None, 14] = None
2022
+ holds_over_chain: Annotated[list[list[Reference]], 15, "typedef-only"] = field(
2023
+ default_factory=list
2024
+ )
2025
+ is_anti_symmetric: Annotated[bool | None, 16, "typedef-only"] = None
2026
+ is_cyclic: Annotated[bool | None, 17, "typedef-only"] = None
2027
+ is_reflexive: Annotated[bool | None, 18, "typedef-only"] = None
2028
+ is_symmetric: Annotated[bool | None, 19, "typedef-only"] = None
2029
+ is_transitive: Annotated[bool | None, 20, "typedef-only"] = None
2030
+ is_functional: Annotated[bool | None, 21, "typedef-only"] = None
2031
+ is_inverse_functional: Annotated[bool | None, 22, "typedef-only"] = None
2032
+ parents: Annotated[list[Reference], 23] = field(default_factory=list)
2033
+ intersection_of: Annotated[IntersectionOfHint, 24] = field(default_factory=list)
2034
+ union_of: Annotated[list[Reference], 25] = field(default_factory=list)
2035
+ equivalent_to: Annotated[list[Reference], 26] = field(default_factory=list)
2036
+ disjoint_from: Annotated[list[Reference], 27] = field(default_factory=list)
2037
+ # TODO inverse should be inverse_of, cardinality any
2038
+ inverse: Annotated[Reference | None, 28, "typedef-only"] = None
2039
+ # TODO check if there are any examples of this being multiple
2040
+ transitive_over: Annotated[list[Reference], 29, "typedef-only"] = field(default_factory=list)
2041
+ equivalent_to_chain: Annotated[list[list[Reference]], 30, "typedef-only"] = field(
2042
+ default_factory=list
2043
+ )
2044
+ #: From the OBO spec:
2045
+ #:
2046
+ #: For example: spatially_disconnected_from is disjoint_over part_of, in that two
2047
+ #: disconnected entities have no parts in common. This can be translated to OWL as:
2048
+ #: ``disjoint_over(R S), R(A B) ==> (S some A) disjointFrom (S some B)``
2049
+ disjoint_over: Annotated[list[Reference], 31] = field(default_factory=list)
2050
+ relationships: Annotated[RelationsHint, 32] = field(default_factory=lambda: defaultdict(list))
2051
+ is_obsolete: Annotated[bool | None, 33] = None
2052
+ created_by: Annotated[str | None, 34] = None
2053
+ creation_date: Annotated[datetime.datetime | None, 35] = None
2054
+ # TODO expand_assertion_to
2055
+ # TODO expand_expression_to
2056
+ #: Whether this relationship is a metadata tag. Properties that are marked as metadata tags are
2057
+ #: used to record object metadata. Object metadata is additional information about an object
2058
+ #: that is useful to track, but does not impact the definition of the object or how it should
2059
+ #: be treated by a reasoner. Metadata tags might be used to record special term synonyms or
2060
+ #: structured notes about a term, for example.
2061
+ is_metadata_tag: Annotated[bool | None, 40, "typedef-only"] = None
2062
+ is_class_level: Annotated[bool | None, 41] = None
2063
+
2064
+ type: StanzaType = "TypeDef"
2065
+
2066
+ def __hash__(self) -> int:
2067
+ # have to re-define hash because of the @dataclass
2068
+ return hash((self.__class__, self.prefix, self.identifier))
2069
+
2070
+ def _get_references(self) -> dict[str, set[Reference]]:
2071
+ rv = super()._get_references()
2072
+
2073
+ def _add(r: Reference) -> None:
2074
+ rv[r.prefix].add(r)
2075
+
2076
+ if self.domain:
2077
+ _add(self.domain)
2078
+ if self.range:
2079
+ _add(self.range)
2080
+ if self.inverse:
2081
+ _add(self.inverse)
2082
+
2083
+ # TODO all of the properties, which are from oboInOwl
2084
+ for rr in itt.chain(self.transitive_over, self.disjoint_over):
2085
+ _add(rr)
2086
+ for part in itt.chain(self.holds_over_chain, self.equivalent_to_chain):
2087
+ for rr in part:
2088
+ _add(rr)
2089
+ return dict(rv)
2090
+
2091
+ def iterate_obo_lines(
2092
+ self,
2093
+ ontology_prefix: str,
2094
+ synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
2095
+ typedefs: Mapping[ReferenceTuple, TypeDef] | None = None,
2096
+ ) -> Iterable[str]:
2097
+ """Iterate over the lines to write in an OBO file.
2098
+
2099
+ :param ontology_prefix:
2100
+ The prefix of the ontology into which the type definition is being written.
2101
+ This is used for compressing builtin identifiers
2102
+ :yield:
2103
+ The lines to write to an OBO file
2104
+
2105
+ `S.3.5.5 <https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.3.5.5>`_
2106
+ of the OBO Flat File Specification v1.4 says tags should appear in the following order:
2107
+
2108
+ 1. id
2109
+ 2. is_anonymous
2110
+ 3. name
2111
+ 4. namespace
2112
+ 5. alt_id
2113
+ 6. def
2114
+ 7. comment
2115
+ 8. subset
2116
+ 9. synonym
2117
+ 10. xref
2118
+ 11. property_value
2119
+ 12. domain
2120
+ 13. range
2121
+ 14. builtin
2122
+ 15. holds_over_chain
2123
+ 16. is_anti_symmetric
2124
+ 17. is_cyclic
2125
+ 18. is_reflexive
2126
+ 19. is_symmetric
2127
+ 20. is_transitive
2128
+ 21. is_functional
2129
+ 22. is_inverse_functional
2130
+ 23. is_a
2131
+ 24. intersection_of
2132
+ 25. union_of
2133
+ 26. equivalent_to
2134
+ 27. disjoint_from
2135
+ 28. inverse_of
2136
+ 29. transitive_over
2137
+ 30. equivalent_to_chain
2138
+ 31. disjoint_over
2139
+ 32. relationship
2140
+ 33. is_obsolete
2141
+ 34. created_by
2142
+ 35. creation_date
2143
+ 36. replaced_by
2144
+ 37. consider
2145
+ 38. expand_assertion_to
2146
+ 39. expand_expression_to
2147
+ 40. is_metadata_tag
2148
+ 41. is_class_level
2149
+ """
2150
+ if synonym_typedefs is None:
2151
+ synonym_typedefs = {}
2152
+ if typedefs is None:
2153
+ typedefs = {}
2154
+
2155
+ yield "\n[Typedef]"
2156
+ # 1
2157
+ yield f"id: {reference_escape(self.reference, ontology_prefix=ontology_prefix)}"
2158
+ # 2
2159
+ yield from _boolean_tag("is_anonymous", self.is_anonymous)
2160
+ # 3
2161
+ if self.name:
2162
+ yield f"name: {self.name}"
2163
+ # 4
2164
+ if self.namespace:
2165
+ yield f"namespace: {self.namespace}"
2166
+ # 5
2167
+ yield from _reference_list_tag("alt_id", self.alt_ids, ontology_prefix)
2168
+ # 6
2169
+ if self.definition:
2170
+ yield f"def: {self._definition_fp()}"
2171
+ # 7
2172
+ if self.comment:
2173
+ yield f"comment: {self.comment}"
2174
+ # 8
2175
+ yield from _reference_list_tag("subset", self.subsets, ontology_prefix)
2176
+ # 9
2177
+ for synonym in self.synonyms:
2178
+ yield synonym.to_obo(ontology_prefix=ontology_prefix, synonym_typedefs=synonym_typedefs)
2179
+ # 10
2180
+ yield from self._iterate_xref_obo(ontology_prefix=ontology_prefix)
2181
+ # 11
2182
+ yield from self._iterate_obo_properties(
2183
+ ontology_prefix=ontology_prefix,
2184
+ skip_predicate_objects=v.SKIP_PROPERTY_PREDICATES_OBJECTS,
2185
+ skip_predicate_literals=v.SKIP_PROPERTY_PREDICATES_LITERAL,
2186
+ typedefs=typedefs,
2187
+ )
2188
+ # 12
2189
+ if self.domain:
2190
+ yield f"domain: {reference_escape(self.domain, ontology_prefix=ontology_prefix, add_name_comment=True)}"
2191
+ # 13
2192
+ if self.range:
2193
+ yield f"range: {reference_escape(self.range, ontology_prefix=ontology_prefix, add_name_comment=True)}"
2194
+ # 14
2195
+ yield from _boolean_tag("builtin", self.builtin)
2196
+ # 15
2197
+ yield from _chain_tag("holds_over_chain", self.holds_over_chain, ontology_prefix)
2198
+ # 16
2199
+ yield from _boolean_tag("is_anti_symmetric", self.is_anti_symmetric)
2200
+ # 17
2201
+ yield from _boolean_tag("is_cyclic", self.is_cyclic)
2202
+ # 18
2203
+ yield from _boolean_tag("is_reflexive", self.is_reflexive)
2204
+ # 19
2205
+ yield from _boolean_tag("is_symmetric", self.is_symmetric)
2206
+ # 20
2207
+ yield from _boolean_tag("is_transitive", self.is_transitive)
2208
+ # 21
2209
+ yield from _boolean_tag("is_functional", self.is_functional)
2210
+ # 22
2211
+ yield from _boolean_tag("is_inverse_functional", self.is_inverse_functional)
2212
+ # 23
2213
+ yield from _reference_list_tag("is_a", self.parents, ontology_prefix)
2214
+ # 24
2215
+ yield from self._iterate_intersection_of_obo(ontology_prefix=ontology_prefix)
2216
+ # 25
2217
+ yield from _reference_list_tag("union_of", self.union_of, ontology_prefix)
2218
+ # 26
2219
+ yield from _reference_list_tag("equivalent_to", self.equivalent_to, ontology_prefix)
2220
+ # 27
2221
+ yield from _reference_list_tag("disjoint_from", self.disjoint_from, ontology_prefix)
2222
+ # 28
2223
+ if self.inverse:
2224
+ yield f"inverse_of: {reference_escape(self.inverse, ontology_prefix=ontology_prefix, add_name_comment=True)}"
2225
+ # 29
2226
+ yield from _reference_list_tag("transitive_over", self.transitive_over, ontology_prefix)
2227
+ # 30
2228
+ yield from _chain_tag("equivalent_to_chain", self.equivalent_to_chain, ontology_prefix)
2229
+ # 31 disjoint_over, see https://github.com/search?q=%22disjoint_over%3A%22+path%3A*.obo&type=code
2230
+ yield from _reference_list_tag(
2231
+ "disjoint_over", self.disjoint_over, ontology_prefix=ontology_prefix
2232
+ )
2233
+ # 32
2234
+ yield from self._iterate_obo_relations(ontology_prefix=ontology_prefix, typedefs=typedefs)
2235
+ # 33
2236
+ yield from _boolean_tag("is_obsolete", self.is_obsolete)
2237
+ # 34
2238
+ if self.created_by:
2239
+ yield f"created_by: {self.created_by}"
2240
+ # 35
2241
+ if self.creation_date is not None:
2242
+ yield f"creation_date: {self.creation_date.isoformat()}"
2243
+ # 36
2244
+ yield from _tag_property_targets(
2245
+ "replaced_by", self, v.term_replaced_by, ontology_prefix=ontology_prefix
2246
+ )
2247
+ # 37
2248
+ yield from _tag_property_targets(
2249
+ "consider", self, v.see_also, ontology_prefix=ontology_prefix
2250
+ )
2251
+ # 38 TODO expand_assertion_to
2252
+ # 39 TODO expand_expression_to
2253
+ # 40
2254
+ yield from _boolean_tag("is_metadata_tag", self.is_metadata_tag)
2255
+ # 41
2256
+ yield from _boolean_tag("is_class_level", self.is_class_level)
2257
+
2258
+ @classmethod
2259
+ def from_triple(cls, prefix: str, identifier: str, name: str | None = None) -> TypeDef:
2260
+ """Create a typedef from a reference."""
2261
+ return cls(reference=Reference(prefix=prefix, identifier=identifier, name=name))
2262
+
2263
+ @classmethod
2264
+ def default(
2265
+ cls, prefix: str, identifier: str, *, name: str | None = None, is_metadata_tag: bool
2266
+ ) -> Self:
2267
+ """Construct a default type definition from within the OBO namespace."""
2268
+ return cls(
2269
+ reference=default_reference(prefix, identifier, name=name),
2270
+ is_metadata_tag=is_metadata_tag,
2271
+ )
2272
+
2273
+
2274
+ class AdHocOntologyBase(Obo):
2275
+ """A base class for ad-hoc ontologies."""
2276
+
2277
+
1436
2278
  def make_ad_hoc_ontology(
1437
2279
  _ontology: str,
1438
- _name: str,
1439
- _auto_generated_by: Optional[str] = None,
1440
- _format_version: str = "1.2",
1441
- _typedefs: Optional[list[TypeDef]] = None,
1442
- _synonym_typedefs: Optional[list[SynonymTypeDef]] = None,
1443
- _date: Optional[datetime] = None,
1444
- _data_version: Optional[str] = None,
1445
- _idspaces: Optional[Mapping[str, str]] = None,
1446
- _root_terms: Optional[list[Reference]] = None,
2280
+ _name: str | None = None,
2281
+ _auto_generated_by: str | None = None,
2282
+ _typedefs: list[TypeDef] | None = None,
2283
+ _synonym_typedefs: list[SynonymTypeDef] | None = None,
2284
+ _date: datetime.datetime | None = None,
2285
+ _data_version: str | None = None,
2286
+ _idspaces: Mapping[str, str] | None = None,
2287
+ _root_terms: list[Reference] | None = None,
2288
+ _subsetdefs: list[tuple[Reference, str]] | None = None,
2289
+ _property_values: list[Annotation] | None = None,
2290
+ _imports: list[str] | None = None,
1447
2291
  *,
1448
- terms: list[Term],
1449
- ) -> "Obo":
2292
+ terms: list[Term] | None = None,
2293
+ ) -> Obo:
1450
2294
  """Make an ad-hoc ontology."""
1451
2295
 
1452
- class AdHocOntology(Obo):
2296
+ class AdHocOntology(AdHocOntologyBase):
1453
2297
  """An ad hoc ontology created from an OBO file."""
1454
2298
 
1455
2299
  ontology = _ontology
1456
2300
  name = _name
1457
2301
  auto_generated_by = _auto_generated_by
1458
- format_version = _format_version
1459
2302
  typedefs = _typedefs
1460
2303
  synonym_typedefs = _synonym_typedefs
1461
2304
  idspaces = _idspaces
1462
2305
  root_terms = _root_terms
2306
+ subsetdefs = _subsetdefs
2307
+ property_values = _property_values
2308
+ imports = _imports
1463
2309
 
1464
2310
  def __post_init__(self):
1465
2311
  self.date = _date
@@ -1467,30 +2313,11 @@ def make_ad_hoc_ontology(
1467
2313
 
1468
2314
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
1469
2315
  """Iterate over terms in the ad hoc ontology."""
1470
- return terms
2316
+ return terms or []
1471
2317
 
1472
2318
  return AdHocOntology()
1473
2319
 
1474
2320
 
1475
- def _convert_typedefs(typedefs: Optional[Iterable[TypeDef]]) -> list[Mapping[str, Any]]:
1476
- """Convert the type defs."""
1477
- if not typedefs:
1478
- return []
1479
- return [_convert_typedef(typedef) for typedef in typedefs]
1480
-
1481
-
1482
- def _convert_typedef(typedef: TypeDef) -> Mapping[str, Any]:
1483
- """Convert a type def."""
1484
- # TODO add more later
1485
- return typedef.reference.model_dump()
1486
-
1487
-
1488
- def _convert_synonym_typedefs(synonym_typedefs: Optional[Iterable[SynonymTypeDef]]) -> list[str]:
1489
- """Convert the synonym type defs."""
1490
- if not synonym_typedefs:
1491
- return []
1492
- return [_convert_synonym_typedef(synonym_typedef) for synonym_typedef in synonym_typedefs]
1493
-
1494
-
1495
- def _convert_synonym_typedef(synonym_typedef: SynonymTypeDef) -> str:
1496
- return f'{synonym_typedef.preferred_curie} "{synonym_typedef.name}"'
2321
+ HUMAN_TERM = Term(reference=v.HUMAN)
2322
+ CHARLIE_TERM = Term(reference=v.CHARLIE, type="Instance").append_parent(HUMAN_TERM)
2323
+ PYOBO_INJECTED = "Injected by PyOBO"