pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
  203. pyobo/apps/__init__.py +0 -3
  204. pyobo/apps/cli.py +0 -24
  205. pyobo/apps/gilda/__init__.py +0 -3
  206. pyobo/apps/gilda/__main__.py +0 -8
  207. pyobo/apps/gilda/app.py +0 -48
  208. pyobo/apps/gilda/cli.py +0 -36
  209. pyobo/apps/gilda/templates/base.html +0 -33
  210. pyobo/apps/gilda/templates/home.html +0 -11
  211. pyobo/apps/gilda/templates/matches.html +0 -32
  212. pyobo/apps/mapper/__init__.py +0 -3
  213. pyobo/apps/mapper/__main__.py +0 -11
  214. pyobo/apps/mapper/cli.py +0 -37
  215. pyobo/apps/mapper/mapper.py +0 -187
  216. pyobo/apps/mapper/templates/base.html +0 -35
  217. pyobo/apps/mapper/templates/mapper_home.html +0 -64
  218. pyobo/aws.py +0 -162
  219. pyobo/cli/aws.py +0 -47
  220. pyobo/identifier_utils.py +0 -142
  221. pyobo/normalizer.py +0 -232
  222. pyobo/registries/__init__.py +0 -16
  223. pyobo/registries/metaregistry.json +0 -507
  224. pyobo/registries/metaregistry.py +0 -135
  225. pyobo/sources/icd11.py +0 -105
  226. pyobo/xrefdb/__init__.py +0 -1
  227. pyobo/xrefdb/canonicalizer.py +0 -214
  228. pyobo/xrefdb/priority.py +0 -59
  229. pyobo/xrefdb/sources/__init__.py +0 -60
  230. pyobo/xrefdb/sources/biomappings.py +0 -36
  231. pyobo/xrefdb/sources/cbms2019.py +0 -91
  232. pyobo/xrefdb/sources/chembl.py +0 -83
  233. pyobo/xrefdb/sources/compath.py +0 -82
  234. pyobo/xrefdb/sources/famplex.py +0 -64
  235. pyobo/xrefdb/sources/gilda.py +0 -50
  236. pyobo/xrefdb/sources/intact.py +0 -113
  237. pyobo/xrefdb/sources/ncit.py +0 -133
  238. pyobo/xrefdb/sources/pubchem.py +0 -27
  239. pyobo/xrefdb/sources/wikidata.py +0 -116
  240. pyobo-0.11.1.dist-info/RECORD +0 -173
  241. pyobo-0.11.1.dist-info/WHEEL +0 -5
  242. pyobo-0.11.1.dist-info/top_level.txt +0 -1
pyobo/struct/struct.py CHANGED
@@ -1,111 +1,209 @@
1
1
  """Data structures for OBO."""
2
2
 
3
- import gzip
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ import itertools as itt
4
7
  import json
5
8
  import logging
6
9
  import os
7
10
  import sys
8
- from collections import defaultdict
9
- from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence
11
+ import warnings
12
+ from collections import ChainMap, defaultdict
13
+ from collections.abc import Callable, Collection, Iterable, Iterator, Mapping, Sequence
10
14
  from dataclasses import dataclass, field
11
- from datetime import datetime
12
- from operator import attrgetter
13
15
  from pathlib import Path
14
16
  from textwrap import dedent
15
- from typing import (
16
- Any,
17
- Callable,
18
- ClassVar,
19
- Optional,
20
- TextIO,
21
- Union,
22
- )
17
+ from typing import Annotated, Any, ClassVar, TextIO
23
18
 
24
19
  import bioregistry
25
20
  import click
21
+ import curies
26
22
  import networkx as nx
27
23
  import pandas as pd
24
+ import ssslm
25
+ from curies import ReferenceTuple
26
+ from curies import vocabulary as _cv
28
27
  from more_click import force_option, verbose_option
29
28
  from tqdm.auto import tqdm
30
- from typing_extensions import Literal
31
-
32
- from .reference import Reference, Referenced
33
- from .typedef import (
34
- RelationHint,
35
- TypeDef,
36
- comment,
37
- default_typedefs,
38
- exact_match,
39
- from_species,
40
- get_reference_tuple,
41
- has_ontology_root_term,
42
- has_part,
43
- is_a,
44
- orthologous,
45
- part_of,
46
- see_also,
47
- term_replaced_by,
29
+ from typing_extensions import Self
30
+
31
+ from . import vocabulary as v
32
+ from .reference import (
33
+ OBOLiteral,
34
+ Reference,
35
+ Referenced,
36
+ _reference_list_tag,
37
+ comma_separate_references,
38
+ default_reference,
39
+ get_preferred_curie,
40
+ reference_escape,
41
+ reference_or_literal_to_str,
42
+ )
43
+ from .struct_utils import (
44
+ Annotation,
45
+ AnnotationsDict,
46
+ HasReferencesMixin,
47
+ IntersectionOfHint,
48
+ PropertiesHint,
49
+ ReferenceHint,
50
+ RelationsHint,
51
+ Stanza,
52
+ StanzaType,
53
+ UnionOfHint,
54
+ _chain_tag,
55
+ _ensure_ref,
56
+ _get_prefixes_from_annotations,
57
+ _get_references_from_annotations,
58
+ _tag_property_targets,
48
59
  )
49
- from .utils import comma_separate, obo_escape_slim
60
+ from .utils import _boolean_tag, obo_escape_slim
50
61
  from ..api.utils import get_version
51
62
  from ..constants import (
63
+ BUILD_SUBDIRECTORY_NAME,
52
64
  DATE_FORMAT,
65
+ DEFAULT_PREFIX_MAP,
53
66
  NCBITAXON_PREFIX,
54
67
  RELATION_ID,
55
68
  RELATION_PREFIX,
56
69
  TARGET_ID,
57
70
  TARGET_PREFIX,
58
71
  )
59
- from ..identifier_utils import normalize_curie
72
+ from ..utils.cache import write_gzipped_graph
60
73
  from ..utils.io import multidict, write_iterable_tsv
61
- from ..utils.misc import obo_to_owl
62
- from ..utils.path import get_prefix_obo_path, prefix_directory_join
74
+ from ..utils.path import (
75
+ CacheArtifact,
76
+ get_cache_path,
77
+ get_relation_cache_path,
78
+ prefix_directory_join,
79
+ )
80
+ from ..version import get_version as get_pyobo_version
63
81
 
64
82
  __all__ = [
83
+ "Obo",
65
84
  "Synonym",
66
85
  "SynonymTypeDef",
67
- "SynonymSpecificity",
68
- "SynonymSpecificities",
69
86
  "Term",
70
- "Obo",
71
- "make_ad_hoc_ontology",
72
87
  "abbreviation",
73
88
  "acronym",
89
+ "make_ad_hoc_ontology",
74
90
  ]
75
91
 
76
92
  logger = logging.getLogger(__name__)
77
93
 
78
- SynonymSpecificity = Literal["EXACT", "NARROW", "BROAD", "RELATED"]
79
- SynonymSpecificities: Sequence[SynonymSpecificity] = ("EXACT", "NARROW", "BROAD", "RELATED")
94
+ #: This is what happens if no specificity is given
95
+ DEFAULT_SPECIFICITY: _cv.SynonymScope = "RELATED"
96
+
97
+ #: Columns in the SSSOM dataframe
98
+ SSSOM_DF_COLUMNS = [
99
+ "subject_id",
100
+ "subject_label",
101
+ "object_id",
102
+ "predicate_id",
103
+ "mapping_justification",
104
+ "confidence",
105
+ "contributor",
106
+ ]
107
+ UNSPECIFIED_MATCHING_CURIE = "sempav:UnspecifiedMatching"
108
+ FORMAT_VERSION = "1.4"
80
109
 
81
110
 
82
111
  @dataclass
83
- class Synonym:
112
+ class Synonym(HasReferencesMixin):
84
113
  """A synonym with optional specificity and references."""
85
114
 
86
115
  #: The string representing the synonym
87
116
  name: str
88
117
 
89
118
  #: The specificity of the synonym
90
- specificity: SynonymSpecificity = "EXACT"
119
+ specificity: _cv.SynonymScope | None = None
91
120
 
92
121
  #: The type of synonym. Must be defined in OBO document!
93
- type: "SynonymTypeDef" = field(
94
- default_factory=lambda: DEFAULT_SYNONYM_TYPE # type:ignore
95
- )
122
+ type: Reference | None = None
96
123
 
97
124
  #: References to articles where the synonym appears
98
- provenance: list[Reference] = field(default_factory=list)
125
+ provenance: Sequence[Reference | OBOLiteral] = field(default_factory=list)
126
+
127
+ #: Extra annotations
128
+ annotations: list[Annotation] = field(default_factory=list)
129
+
130
+ #: Language tag for the synonym
131
+ language: str | None = None
132
+
133
+ def __lt__(self, other: Synonym) -> bool:
134
+ """Sort lexically by name."""
135
+ return self._sort_key() < other._sort_key()
136
+
137
+ def _get_references(self) -> defaultdict[str, set[Reference]]:
138
+ """Get all prefixes used by the typedef."""
139
+ rv: defaultdict[str, set[Reference]] = defaultdict(set)
140
+ rv[v.has_dbxref.prefix].add(v.has_dbxref)
141
+ if self.type is not None:
142
+ rv[self.type.prefix].add(self.type)
143
+ for provenance in self.provenance:
144
+ match provenance:
145
+ case Reference():
146
+ rv[provenance.prefix].add(provenance)
147
+ case OBOLiteral(_, datatype, _language):
148
+ rv[datatype.prefix].add(v._c(datatype))
149
+ for prefix, references in _get_references_from_annotations(self.annotations).items():
150
+ rv[prefix].update(references)
151
+ return rv
152
+
153
+ def _sort_key(self) -> tuple[str, _cv.SynonymScope, str]:
154
+ return (
155
+ self.name,
156
+ self.specificity or DEFAULT_SPECIFICITY,
157
+ self.type.curie if self.type else "",
158
+ )
159
+
160
+ @property
161
+ def predicate(self) -> curies.NamedReference:
162
+ """Get the specificity reference."""
163
+ return _cv.synonym_scopes[self.specificity or DEFAULT_SPECIFICITY]
99
164
 
100
- def to_obo(self) -> str:
165
+ def to_obo(
166
+ self,
167
+ ontology_prefix: str,
168
+ synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
169
+ ) -> str:
101
170
  """Write this synonym as an OBO line to appear in a [Term] stanza."""
102
- return f"synonym: {self._fp()}"
171
+ return f"synonym: {self._fp(ontology_prefix, synonym_typedefs)}"
103
172
 
104
- def _fp(self) -> str:
105
- x = f'"{self._escape(self.name)}" {self.specificity}'
106
- if self.type and self.type.pair != DEFAULT_SYNONYM_TYPE.pair:
107
- x = f"{x} {self.type.preferred_curie}"
108
- return f"{x} [{comma_separate(self.provenance)}]"
173
+ def _fp(
174
+ self,
175
+ ontology_prefix: str,
176
+ synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
177
+ ) -> str:
178
+ if synonym_typedefs is None:
179
+ synonym_typedefs = {}
180
+
181
+ x = f'"{self._escape(self.name)}"'
182
+
183
+ # Add on the specificity, e.g., EXACT
184
+ synonym_typedef = _synonym_typedef_warn(ontology_prefix, self.type, synonym_typedefs)
185
+ if synonym_typedef is not None and synonym_typedef.specificity is not None:
186
+ x = f"{x} {synonym_typedef.specificity}"
187
+ elif self.specificity is not None:
188
+ x = f"{x} {self.specificity}"
189
+ elif self.type is not None:
190
+ # it's not valid to have a synonym type without a specificity,
191
+ # so automatically assign one if we'll need it
192
+ x = f"{x} {DEFAULT_SPECIFICITY}"
193
+
194
+ # Add on the synonym type, if exists
195
+ if self.type is not None:
196
+ x = f"{x} {reference_escape(self.type, ontology_prefix=ontology_prefix)}"
197
+
198
+ # the provenance list is required, even if it's empty :/
199
+ x = f"{x} [{comma_separate_references(self.provenance)}]"
200
+
201
+ # OBO flat file format does not support language,
202
+ # but at least we can mention it here as a comment
203
+ if self.language:
204
+ x += f" ! language: {self.language}"
205
+
206
+ return x
109
207
 
110
208
  @staticmethod
111
209
  def _escape(s: str) -> str:
@@ -113,113 +211,100 @@ class Synonym:
113
211
 
114
212
 
115
213
  @dataclass
116
- class SynonymTypeDef(Referenced):
214
+ class SynonymTypeDef(Referenced, HasReferencesMixin):
117
215
  """A type definition for synonyms in OBO."""
118
216
 
119
217
  reference: Reference
120
- specificity: Optional[SynonymSpecificity] = None
218
+ specificity: _cv.SynonymScope | None = None
121
219
 
122
- def to_obo(self) -> str:
220
+ def __hash__(self) -> int:
221
+ # have to re-define hash because of the @dataclass
222
+ return hash((self.__class__, self.prefix, self.identifier))
223
+
224
+ def to_obo(self, ontology_prefix: str) -> str:
123
225
  """Serialize to OBO."""
124
- rv = f'synonymtypedef: {self.preferred_curie} "{self.name}"'
226
+ rv = f"synonymtypedef: {reference_escape(self.reference, ontology_prefix=ontology_prefix)}"
227
+ name = self.name or ""
228
+ rv = f'{rv} "{name}"'
125
229
  if self.specificity:
126
230
  rv = f"{rv} {self.specificity}"
127
231
  return rv
128
232
 
129
- @classmethod
130
- def from_text(
131
- cls,
132
- text: str,
133
- specificity: Optional[SynonymSpecificity] = None,
134
- *,
135
- lower: bool = True,
136
- ) -> "SynonymTypeDef":
137
- """Get a type definition from text that's normalized."""
138
- identifier = (
139
- text.replace("-", "_")
140
- .replace(" ", "_")
141
- .replace('"', "")
142
- .replace(")", "")
143
- .replace("(", "")
144
- )
145
- if lower:
146
- identifier = identifier.lower()
147
- return cls(
148
- reference=Reference(prefix="obo", identifier=identifier, name=text.replace('"', "")),
149
- specificity=specificity,
150
- )
233
+ def _get_references(self) -> dict[str, set[Reference]]:
234
+ """Get all references used by the typedef."""
235
+ rv: defaultdict[str, set[Reference]] = defaultdict(set)
236
+ rv[self.reference.prefix].add(self.reference)
237
+ if self.specificity is not None:
238
+ # weird syntax, but this just gets the synonym scope
239
+ # predicate as a pyobo reference
240
+ r = v._c(_cv.synonym_scopes[self.specificity])
241
+ rv[r.prefix].add(r)
242
+ return dict(rv)
151
243
 
152
244
 
153
245
  DEFAULT_SYNONYM_TYPE = SynonymTypeDef(
154
- reference=Reference(prefix="oboInOwl", identifier="SynonymType", name="Synonym"),
246
+ reference=Reference(prefix="oboInOwl", identifier="SynonymType", name="synonym type"),
155
247
  )
156
248
  abbreviation = SynonymTypeDef(
157
249
  reference=Reference(prefix="OMO", identifier="0003000", name="abbreviation")
158
250
  )
159
251
  acronym = SynonymTypeDef(reference=Reference(prefix="omo", identifier="0003012", name="acronym"))
160
-
161
-
162
- ReferenceHint = Union[Reference, "Term", tuple[str, str], str]
163
-
164
-
165
- def _ensure_ref(reference: ReferenceHint) -> Reference:
166
- if reference is None:
167
- raise ValueError("can not append null reference")
168
- if isinstance(reference, Term):
169
- return reference.reference
170
- if isinstance(reference, str):
171
- _rv = Reference.from_curie(reference)
172
- if _rv is None:
173
- raise ValueError(f"could not parse CURIE from {reference}")
174
- return _rv
175
- if isinstance(reference, tuple):
176
- return Reference(prefix=reference[0], identifier=reference[1])
177
- if isinstance(reference, Reference):
178
- return reference
179
- raise TypeError(f"invalid type given for a reference ({type(reference)}): {reference}")
252
+ uk_spelling = SynonymTypeDef(
253
+ reference=Reference(prefix="omo", identifier="0003005", name="UK spelling synonym")
254
+ )
255
+ default_synonym_typedefs: dict[ReferenceTuple, SynonymTypeDef] = {
256
+ abbreviation.pair: abbreviation,
257
+ acronym.pair: acronym,
258
+ uk_spelling.pair: uk_spelling,
259
+ }
180
260
 
181
261
 
182
262
  @dataclass
183
- class Term(Referenced):
263
+ class Term(Stanza):
184
264
  """A term in OBO."""
185
265
 
186
266
  #: The primary reference for the entity
187
267
  reference: Reference
188
268
 
189
269
  #: A description of the entity
190
- definition: Optional[str] = None
270
+ definition: str | None = None
191
271
 
192
- #: References to articles in which the term appears
193
- provenance: list[Reference] = field(default_factory=list)
272
+ #: Object properties
273
+ relationships: RelationsHint = field(default_factory=lambda: defaultdict(list))
194
274
 
195
- #: Relationships defined by [Typedef] stanzas
196
- relationships: dict[TypeDef, list[Reference]] = field(default_factory=lambda: defaultdict(list))
275
+ _axioms: AnnotationsDict = field(default_factory=lambda: defaultdict(list))
197
276
 
198
- #: Properties, which are not defined with Typedef and have scalar values instead of references.
199
- properties: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))
277
+ properties: PropertiesHint = field(default_factory=lambda: defaultdict(list))
200
278
 
201
279
  #: Relationships with the default "is_a"
202
280
  parents: list[Reference] = field(default_factory=list)
203
281
 
282
+ intersection_of: IntersectionOfHint = field(default_factory=list)
283
+ union_of: UnionOfHint = field(default_factory=list)
284
+ equivalent_to: list[Reference] = field(default_factory=list)
285
+ disjoint_from: list[Reference] = field(default_factory=list)
286
+
204
287
  #: Synonyms of this term
205
288
  synonyms: list[Synonym] = field(default_factory=list)
206
289
 
207
- #: Equivalent references
290
+ #: Database cross-references, see :func:`get_mappings` for
291
+ #: access to all mappings in an SSSOM-like interface
208
292
  xrefs: list[Reference] = field(default_factory=list)
209
- xref_types: list[Reference] = field(default_factory=list)
210
-
211
- #: Alternate Identifiers
212
- alt_ids: list[Reference] = field(default_factory=list)
213
293
 
214
294
  #: The sub-namespace within the ontology
215
- namespace: Optional[str] = None
295
+ namespace: str | None = None
216
296
 
217
297
  #: An annotation for obsolescence. By default, is None, but this means that it is not obsolete.
218
- is_obsolete: Optional[bool] = None
298
+ is_obsolete: bool | None = None
299
+
300
+ type: StanzaType = "Term"
219
301
 
220
- type: Literal["Term", "Instance"] = "Term"
302
+ builtin: bool | None = None
303
+ is_anonymous: bool | None = None
304
+ subsets: list[Reference] = field(default_factory=list)
221
305
 
222
- def __hash__(self):
306
+ def __hash__(self) -> int:
307
+ # have to re-define hash because of the @dataclass
223
308
  return hash((self.__class__, self.prefix, self.identifier))
224
309
 
225
310
  @classmethod
@@ -227,10 +312,10 @@ class Term(Referenced):
227
312
  cls,
228
313
  prefix: str,
229
314
  identifier: str,
230
- name: Optional[str] = None,
231
- definition: Optional[str] = None,
315
+ name: str | None = None,
316
+ definition: str | None = None,
232
317
  **kwargs,
233
- ) -> "Term":
318
+ ) -> Term:
234
319
  """Create a term from a reference."""
235
320
  return cls(
236
321
  reference=Reference(prefix=prefix, identifier=identifier, name=name),
@@ -239,245 +324,198 @@ class Term(Referenced):
239
324
  )
240
325
 
241
326
  @classmethod
242
- def auto(
243
- cls,
244
- prefix: str,
245
- identifier: str,
246
- ) -> "Term":
247
- """Create a term from a reference."""
248
- from ..api import get_definition
249
-
250
- return cls(
251
- reference=Reference.auto(prefix=prefix, identifier=identifier),
252
- definition=get_definition(prefix, identifier),
253
- )
254
-
255
- @classmethod
256
- def from_curie(cls, curie: str, name: Optional[str] = None) -> "Term":
257
- """Create a term directly from a CURIE and optional name."""
258
- prefix, identifier = normalize_curie(curie)
259
- if prefix is None or identifier is None:
260
- raise ValueError
261
- return cls.from_triple(prefix=prefix, identifier=identifier, name=name)
262
-
263
- def append_provenance(self, reference: ReferenceHint) -> None:
264
- """Add a provenance reference."""
265
- self.provenance.append(_ensure_ref(reference))
266
-
267
- def append_synonym(
268
- self,
269
- synonym: Union[str, Synonym],
270
- *,
271
- type: Optional[SynonymTypeDef] = None,
272
- specificity: Optional[SynonymSpecificity] = None,
273
- ) -> None:
274
- """Add a synonym."""
275
- if isinstance(synonym, str):
276
- synonym = Synonym(
277
- synonym, type=type or DEFAULT_SYNONYM_TYPE, specificity=specificity or "EXACT"
278
- )
279
- self.synonyms.append(synonym)
280
-
281
- def append_alt(self, alt: Union[str, Reference]) -> None:
282
- """Add an alternative identifier."""
283
- if isinstance(alt, str):
284
- alt = Reference(prefix=self.prefix, identifier=alt)
285
- self.alt_ids.append(alt)
327
+ def default(cls, prefix, identifier, name=None) -> Self:
328
+ """Create a default term."""
329
+ return cls(reference=default_reference(prefix=prefix, identifier=identifier, name=name))
286
330
 
287
- def append_see_also(self, reference: ReferenceHint) -> "Term":
288
- """Add a see also relationship."""
289
- self.relationships[see_also].append(_ensure_ref(reference))
290
- return self
291
-
292
- def append_comment(self, value: str) -> "Term":
293
- """Add a comment relationship."""
294
- self.append_property(comment.curie, value)
295
- return self
296
-
297
- def append_replaced_by(self, reference: ReferenceHint) -> "Term":
298
- """Add a replaced by relationship."""
299
- self.append_relationship(term_replaced_by, reference)
300
- return self
301
-
302
- def append_parent(self, reference: ReferenceHint) -> "Term":
303
- """Add a parent to this entity."""
304
- reference = _ensure_ref(reference)
305
- if reference not in self.parents:
306
- self.parents.append(reference)
307
- return self
331
+ def append_see_also_uri(self, uri: str) -> Self:
332
+ """Add a see also property."""
333
+ return self.annotate_uri(v.see_also, uri)
308
334
 
309
335
  def extend_parents(self, references: Collection[Reference]) -> None:
310
336
  """Add a collection of parents to this entity."""
337
+ warnings.warn("use append_parent", DeprecationWarning, stacklevel=2)
311
338
  if any(x is None for x in references):
312
339
  raise ValueError("can not append a collection of parents containing a null parent")
313
340
  self.parents.extend(references)
314
341
 
315
- def get_properties(self, prop) -> list[str]:
342
+ def get_property_literals(self, prop: ReferenceHint) -> list[str]:
316
343
  """Get properties from the given key."""
317
- return self.properties[prop]
344
+ return [reference_or_literal_to_str(t) for t in self.properties.get(_ensure_ref(prop), [])]
318
345
 
319
- def get_property(self, prop) -> Optional[str]:
346
+ def get_property(self, prop: ReferenceHint) -> str | None:
320
347
  """Get a single property of the given key."""
321
- r = self.get_properties(prop)
322
- if not r:
323
- return None
324
- if len(r) != 1:
325
- raise ValueError
326
- return r[0]
327
-
328
- def get_relationship(self, typedef: TypeDef) -> Optional[Reference]:
329
- """Get a single relationship of the given type."""
330
- r = self.get_relationships(typedef)
348
+ r = self.get_property_literals(prop)
331
349
  if not r:
332
350
  return None
333
351
  if len(r) != 1:
334
352
  raise ValueError
335
353
  return r[0]
336
354
 
337
- def get_relationships(self, typedef: TypeDef) -> list[Reference]:
338
- """Get relationships from the given type."""
339
- return self.relationships[typedef]
340
-
341
- def append_exact_match(self, reference: ReferenceHint):
355
+ def append_exact_match(
356
+ self,
357
+ reference: ReferenceHint,
358
+ *,
359
+ mapping_justification: Reference | None = None,
360
+ confidence: float | None = None,
361
+ contributor: Reference | None = None,
362
+ ) -> Self:
342
363
  """Append an exact match, also adding an xref."""
343
364
  reference = _ensure_ref(reference)
344
- self.append_relationship(exact_match, reference)
345
- self.append_xref(reference)
365
+ axioms = self._prepare_mapping_annotations(
366
+ mapping_justification=mapping_justification,
367
+ confidence=confidence,
368
+ contributor=contributor,
369
+ )
370
+ self.annotate_object(v.exact_match, reference, annotations=axioms)
346
371
  return self
347
372
 
348
- def append_xref(self, reference: ReferenceHint) -> None:
349
- """Append an xref."""
350
- self.xrefs.append(_ensure_ref(reference))
351
-
352
- def append_relationship(self, typedef: TypeDef, reference: ReferenceHint) -> None:
353
- """Append a relationship."""
354
- self.relationships[typedef].append(_ensure_ref(reference))
355
-
356
- def set_species(self, identifier: str, name: Optional[str] = None):
373
+ def set_species(self, identifier: str, name: str | None = None) -> Self:
357
374
  """Append the from_species relation."""
358
375
  if name is None:
359
376
  from pyobo.resources.ncbitaxon import get_ncbitaxon_name
360
377
 
361
378
  name = get_ncbitaxon_name(identifier)
362
- self.append_relationship(
363
- from_species, Reference(prefix=NCBITAXON_PREFIX, identifier=identifier, name=name)
379
+ return self.append_relationship(
380
+ v.from_species, Reference(prefix=NCBITAXON_PREFIX, identifier=identifier, name=name)
364
381
  )
365
382
 
366
- def get_species(self, prefix: str = NCBITAXON_PREFIX) -> Optional[Reference]:
383
+ def get_species(self, prefix: str = NCBITAXON_PREFIX) -> Reference | None:
367
384
  """Get the species if it exists.
368
385
 
369
386
  :param prefix: The prefix to use in case the term has several species annotations.
370
387
  """
371
- for species in self.relationships.get(from_species, []):
388
+ for species in self.get_relationships(v.from_species):
372
389
  if species.prefix == prefix:
373
390
  return species
374
391
  return None
375
392
 
376
- def extend_relationship(self, typedef: TypeDef, references: Iterable[Reference]) -> None:
393
+ def extend_relationship(self, typedef: ReferenceHint, references: Iterable[Reference]) -> None:
377
394
  """Append several relationships."""
395
+ warnings.warn("use append_relationship", DeprecationWarning, stacklevel=2)
378
396
  if any(x is None for x in references):
379
397
  raise ValueError("can not extend a collection that includes a null reference")
398
+ typedef = _ensure_ref(typedef)
380
399
  self.relationships[typedef].extend(references)
381
400
 
382
- def append_property(
383
- self, prop: Union[str, Reference, Referenced], value: Union[str, Reference, Referenced]
384
- ) -> None:
385
- """Append a property."""
386
- if isinstance(prop, (Reference, Referenced)):
387
- prop = prop.preferred_curie
388
- if isinstance(value, (Reference, Referenced)):
389
- value = value.preferred_curie
390
- self.properties[prop].append(value)
391
-
392
- def _definition_fp(self) -> str:
393
- if self.definition is None:
394
- raise AssertionError
395
- return f'"{obo_escape_slim(self.definition)}" [{comma_separate(self.provenance)}]'
396
-
397
- def iterate_relations(self) -> Iterable[tuple[TypeDef, Reference]]:
398
- """Iterate over pairs of typedefs and targets."""
399
- for typedef, targets in sorted(self.relationships.items(), key=_sort_relations):
400
- for target in sorted(targets, key=lambda ref: ref.preferred_curie):
401
- yield typedef, target
402
-
403
- def iterate_properties(self) -> Iterable[tuple[str, str]]:
404
- """Iterate over pairs of property and values."""
405
- for prop, values in sorted(self.properties.items()):
406
- for value in sorted(values):
407
- yield prop, value
408
-
409
- def iterate_obo_lines(self, *, ontology, typedefs) -> Iterable[str]:
401
+ def iterate_obo_lines(
402
+ self,
403
+ *,
404
+ ontology_prefix: str,
405
+ typedefs: Mapping[ReferenceTuple, TypeDef],
406
+ synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
407
+ emit_object_properties: bool = True,
408
+ emit_annotation_properties: bool = True,
409
+ ) -> Iterable[str]:
410
410
  """Iterate over the lines to write in an OBO file."""
411
411
  yield f"\n[{self.type}]"
412
- yield f"id: {self.preferred_curie}"
413
- if self.is_obsolete:
414
- yield "is_obsolete: true"
412
+ # 1
413
+ yield f"id: {self._reference(self.reference, ontology_prefix)}"
414
+ # 2
415
+ yield from _boolean_tag("is_anonymous", self.is_anonymous)
416
+ # 3
415
417
  if self.name:
416
418
  yield f"name: {obo_escape_slim(self.name)}"
419
+ # 4
417
420
  if self.namespace and self.namespace != "?":
418
421
  namespace_normalized = (
419
422
  self.namespace.replace(" ", "_").replace("-", "_").replace("(", "").replace(")", "")
420
423
  )
421
424
  yield f"namespace: {namespace_normalized}"
422
-
425
+ # 5
426
+ for alt in sorted(self.alt_ids):
427
+ yield f"alt_id: {self._reference(alt, ontology_prefix, add_name_comment=True)}"
428
+ # 6
423
429
  if self.definition:
424
430
  yield f"def: {self._definition_fp()}"
425
-
426
- for xref in sorted(self.xrefs, key=attrgetter("prefix", "identifier")):
427
- yield f"xref: {xref}" # __str__ bakes in the ! name
428
-
431
+ # 7
432
+ for x in self.get_property_values(v.comment):
433
+ if isinstance(x, OBOLiteral):
434
+ yield f'comment: "{x.value}"'
435
+ # 8
436
+ yield from _reference_list_tag("subset", self.subsets, ontology_prefix)
437
+ # 9
438
+ for synonym in sorted(self.synonyms):
439
+ yield synonym.to_obo(ontology_prefix=ontology_prefix, synonym_typedefs=synonym_typedefs)
440
+ # 10
441
+ yield from self._iterate_xref_obo(ontology_prefix=ontology_prefix)
442
+ # 11
443
+ yield from _boolean_tag("builtin", self.builtin)
444
+ # 12
445
+ if emit_annotation_properties:
446
+ yield from self._iterate_obo_properties(
447
+ ontology_prefix=ontology_prefix,
448
+ skip_predicate_objects=v.SKIP_PROPERTY_PREDICATES_OBJECTS,
449
+ skip_predicate_literals=v.SKIP_PROPERTY_PREDICATES_LITERAL,
450
+ typedefs=typedefs,
451
+ )
452
+ # 13
429
453
  parent_tag = "is_a" if self.type == "Term" else "instance_of"
430
- for parent in sorted(self.parents, key=attrgetter("prefix", "identifier")):
431
- yield f"{parent_tag}: {parent}" # __str__ bakes in the ! name
432
-
433
- for typedef, references in sorted(self.relationships.items(), key=_sort_relations):
434
- if (not typedefs or typedef not in typedefs) and (
435
- ontology,
436
- typedef.curie,
437
- ) not in _TYPEDEF_WARNINGS:
438
- logger.warning(f"[{ontology}] typedef not defined in OBO: {typedef.curie}")
439
- _TYPEDEF_WARNINGS.add((ontology, typedef.curie))
440
-
441
- typedef_preferred_curie = typedef.preferred_curie
442
- for reference in sorted(references, key=attrgetter("prefix", "identifier")):
443
- s = f"relationship: {typedef_preferred_curie} {reference.preferred_curie}"
444
- if typedef.name or reference.name:
445
- s += " !"
446
- if typedef.name:
447
- s += f" {typedef.name}"
448
- if reference.name:
449
- s += f" {reference.name}"
450
- yield s
451
-
452
- for prop, value in sorted(self.iterate_properties(), key=_sort_properties):
453
- # TODO deal with typedefs for properties
454
- yield f'property_value: {prop} "{value}" xsd:string' # TODO deal with types later
455
-
456
- for synonym in sorted(self.synonyms, key=attrgetter("name")):
457
- yield synonym.to_obo()
458
-
459
- @staticmethod
460
- def _escape(s) -> str:
461
- return s.replace("\n", "\\n").replace('"', '\\"')
454
+ yield from _reference_list_tag(parent_tag, self.parents, ontology_prefix)
455
+ # 14
456
+ yield from self._iterate_intersection_of_obo(ontology_prefix=ontology_prefix)
457
+ # 15
458
+ yield from _reference_list_tag("union_of", self.union_of, ontology_prefix=ontology_prefix)
459
+ # 16
460
+ yield from _reference_list_tag(
461
+ "equivalent_to", self.equivalent_to, ontology_prefix=ontology_prefix
462
+ )
463
+ # 17
464
+ yield from _reference_list_tag(
465
+ "disjoint_from", self.disjoint_from, ontology_prefix=ontology_prefix
466
+ )
467
+ # 18
468
+ if emit_object_properties:
469
+ yield from self._iterate_obo_relations(
470
+ ontology_prefix=ontology_prefix, typedefs=typedefs
471
+ )
472
+ # 19 TODO created_by
473
+ # 20
474
+ for x in self.get_property_values(v.obo_creation_date):
475
+ if isinstance(x, OBOLiteral):
476
+ yield f"creation_date: {x.value}"
477
+ # 21
478
+ yield from _boolean_tag("is_obsolete", self.is_obsolete)
479
+ # 22
480
+ yield from _tag_property_targets(
481
+ "replaced_by", self, v.term_replaced_by, ontology_prefix=ontology_prefix
482
+ )
483
+ # 23
484
+ yield from _tag_property_targets(
485
+ "consider", self, v.see_also, ontology_prefix=ontology_prefix
486
+ )
462
487
 
463
488
 
464
489
  #: A set of warnings, used to make sure we don't show the same one over and over
465
- _TYPEDEF_WARNINGS: set[tuple[str, str]] = set()
466
-
490
+ _SYNONYM_TYPEDEF_WARNINGS: set[tuple[str, Reference]] = set()
467
491
 
468
- def _sort_relations(r):
469
- typedef, _references = r
470
- return typedef.preferred_curie
471
492
 
472
-
473
- def _sort_properties(r):
474
- o = r[1]
475
- if isinstance(o, str):
476
- return o
477
- elif isinstance(o, Term):
478
- return o.curie
479
- else:
480
- raise TypeError(f"What {type(r)}: {r}")
493
+ def _synonym_typedef_warn(
494
+ prefix: str,
495
+ predicate: Reference | None,
496
+ synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
497
+ ) -> SynonymTypeDef | None:
498
+ if predicate is None or predicate.pair == DEFAULT_SYNONYM_TYPE.pair:
499
+ return None
500
+ if predicate.pair in default_synonym_typedefs:
501
+ return default_synonym_typedefs[predicate.pair]
502
+ if predicate.pair in synonym_typedefs:
503
+ return synonym_typedefs[predicate.pair]
504
+ key = prefix, predicate
505
+ if key not in _SYNONYM_TYPEDEF_WARNINGS:
506
+ _SYNONYM_TYPEDEF_WARNINGS.add(key)
507
+ predicate_preferred_curie = get_preferred_curie(predicate)
508
+ if predicate.prefix == "obo":
509
+ # Throw our hands up in the air. By using `obo` as the prefix,
510
+ # we already threw using "real" definitions out the window
511
+ logger.warning(
512
+ f"[{prefix}] synonym typedef with OBO prefix not defined: {predicate_preferred_curie}."
513
+ f"\n\tThis might be because you used an unqualified prefix in an OBO file, "
514
+ f"which automatically gets an OBO prefix."
515
+ )
516
+ else:
517
+ logger.warning(f"[{prefix}] synonym typedef not defined: {predicate_preferred_curie}")
518
+ return None
481
519
 
482
520
 
483
521
  class BioregistryError(ValueError):
@@ -495,6 +533,9 @@ class BioregistryError(ValueError):
495
533
  )
496
534
 
497
535
 
536
+ LOGGED_MISSING_URI: set[tuple[str, str]] = set()
537
+
538
+
498
539
  @dataclass
499
540
  class Obo:
500
541
  """An OBO document."""
@@ -506,22 +547,19 @@ class Obo:
506
547
  check_bioregistry_prefix: ClassVar[bool] = True
507
548
 
508
549
  #: The name of the ontology. If not given, tries looking up with the Bioregistry.
509
- name: ClassVar[Optional[str]] = None
510
-
511
- #: The OBO format
512
- format_version: ClassVar[str] = "1.2"
550
+ name: ClassVar[str | None] = None
513
551
 
514
552
  #: Type definitions
515
- typedefs: ClassVar[Optional[list[TypeDef]]] = None
553
+ typedefs: ClassVar[list[TypeDef] | None] = None
516
554
 
517
555
  #: Synonym type definitions
518
- synonym_typedefs: ClassVar[Optional[list[SynonymTypeDef]]] = None
556
+ synonym_typedefs: ClassVar[list[SynonymTypeDef] | None] = None
519
557
 
520
558
  #: An annotation about how an ontology was generated
521
- auto_generated_by: ClassVar[Optional[str]] = None
559
+ auto_generated_by: ClassVar[str | None] = None
522
560
 
523
561
  #: The idspaces used in the document
524
- idspaces: ClassVar[Optional[Mapping[str, str]]] = None
562
+ idspaces: ClassVar[Mapping[str, str] | None] = None
525
563
 
526
564
  #: For super-sized datasets that shouldn't be read into memory
527
565
  iter_only: ClassVar[bool] = False
@@ -530,28 +568,32 @@ class Obo:
530
568
  dynamic_version: ClassVar[bool] = False
531
569
 
532
570
  #: Set to a static version for the resource (i.e., the resource is not itself versioned)
533
- static_version: ClassVar[Optional[str]] = None
571
+ static_version: ClassVar[str | None] = None
534
572
 
535
- bioversions_key: ClassVar[Optional[str]] = None
573
+ bioversions_key: ClassVar[str | None] = None
536
574
 
537
575
  #: Root terms to use for the ontology
538
- root_terms: ClassVar[Optional[list[Reference]]] = None
576
+ root_terms: ClassVar[list[Reference] | None] = None
539
577
 
540
578
  #: The date the ontology was generated
541
- date: Optional[datetime] = field(default_factory=datetime.today)
579
+ date: datetime.datetime | None = field(default_factory=datetime.datetime.today)
542
580
 
543
581
  #: The ontology version
544
- data_version: Optional[str] = None
582
+ data_version: str | None = None
545
583
 
546
584
  #: Should this ontology be reloaded?
547
585
  force: bool = False
548
586
 
549
587
  #: The hierarchy of terms
550
- _hierarchy: Optional[nx.DiGraph] = field(init=False, default=None, repr=False)
588
+ _hierarchy: nx.DiGraph | None = field(init=False, default=None, repr=False)
551
589
  #: A cache of terms
552
- _items: Optional[list[Term]] = field(init=False, default=None, repr=False)
590
+ _items: list[Term] | None = field(init=False, default=None, repr=False)
591
+
592
+ subsetdefs: ClassVar[list[tuple[Reference, str]] | None] = None
553
593
 
554
- term_sort_key: ClassVar[Optional[Callable[["Obo", Term], int]]] = None
594
+ property_values: ClassVar[list[Annotation] | None] = None
595
+
596
+ imports: ClassVar[list[str] | None] = None
555
597
 
556
598
  def __post_init__(self):
557
599
  """Run post-init checks."""
@@ -576,9 +618,85 @@ class Obo:
576
618
  elif "/" in self.data_version:
577
619
  raise ValueError(f"{self.ontology} has a slash in version: {self.data_version}")
578
620
  if self.auto_generated_by is None:
579
- self.auto_generated_by = f"bio2obo:{self.ontology}" # type:ignore
621
+ self.auto_generated_by = f"PyOBO v{get_pyobo_version(with_git_hash=True)} on {datetime.datetime.now().isoformat()}" # type:ignore
622
+
623
+ def _get_clean_idspaces(self) -> dict[str, str]:
624
+ """Get normalized idspace dictionary."""
625
+ rv = dict(
626
+ ChainMap(
627
+ # Add reasonable defaults, most of which are
628
+ # mandated by the OWL spec anyway (except skos?)
629
+ DEFAULT_PREFIX_MAP,
630
+ dict(self.idspaces or {}),
631
+ # automatically detect all prefixes in reference in the ontology,
632
+ # then look up Bioregistry-approved URI prefixes
633
+ self._infer_prefix_map(),
634
+ )
635
+ )
636
+ return rv
580
637
 
581
- def _get_version(self) -> Optional[str]:
638
+ def _infer_prefix_map(self) -> dict[str, str]:
639
+ """Get a prefix map including all prefixes used in the ontology."""
640
+ rv = {}
641
+ for prefix in sorted(self._get_prefixes(), key=str.casefold):
642
+ resource = bioregistry.get_resource(prefix)
643
+ if resource is None:
644
+ raise ValueError
645
+ uri_prefix = resource.get_rdf_uri_prefix()
646
+ if uri_prefix is None:
647
+ uri_prefix = resource.get_uri_prefix()
648
+ if uri_prefix is None:
649
+ # This allows us an escape hatch, since some
650
+ # prefixes don't have an associated URI prefix
651
+ uri_prefix = f"https://bioregistry.io/{prefix}:"
652
+ if (self.ontology, prefix) not in LOGGED_MISSING_URI:
653
+ LOGGED_MISSING_URI.add((self.ontology, prefix))
654
+ logger.warning(
655
+ "[%s] uses prefix with no URI format: %s. Auto-generating Bioregistry link: %s",
656
+ self.ontology,
657
+ prefix,
658
+ uri_prefix,
659
+ )
660
+
661
+ pp = bioregistry.get_preferred_prefix(prefix) or str(prefix)
662
+ rv[pp] = uri_prefix
663
+ return rv
664
+
665
+ def _get_prefixes(self) -> set[str]:
666
+ """Get all prefixes used by the ontology."""
667
+ prefixes: set[str] = set(DEFAULT_PREFIX_MAP)
668
+ for stanza in self._iter_stanzas():
669
+ prefixes.update(stanza._get_prefixes())
670
+ for synonym_typedef in self.synonym_typedefs or []:
671
+ prefixes.update(synonym_typedef._get_prefixes())
672
+ prefixes.update(subset.prefix for subset, _ in self.subsetdefs or [])
673
+ # _iterate_property_pairs covers metadata, root terms,
674
+ # and properties in self.property_values
675
+ prefixes.update(_get_prefixes_from_annotations(self._iterate_property_pairs()))
676
+ if self.auto_generated_by:
677
+ prefixes.add("oboInOwl")
678
+ return prefixes
679
+
680
+ def _get_references(self) -> dict[str, set[Reference]]:
681
+ """Get all references used by the ontology."""
682
+ rv: defaultdict[str, set[Reference]] = defaultdict(set)
683
+
684
+ for rr in itt.chain(self, self.typedefs or [], self.synonym_typedefs or []):
685
+ for prefix, references in rr._get_references().items():
686
+ rv[prefix].update(references)
687
+ for subset, _ in self.subsetdefs or []:
688
+ rv[subset.prefix].add(subset)
689
+ # _iterate_property_pairs covers metadata, root terms,
690
+ # and properties in self.property_values
691
+ for prefix, references in _get_references_from_annotations(
692
+ self._iterate_property_pairs()
693
+ ).items():
694
+ rv[prefix].update(references)
695
+ if self.auto_generated_by:
696
+ rv[v.obo_autogenerated_by.prefix].add(v.obo_autogenerated_by)
697
+ return dict(rv)
698
+
699
+ def _get_version(self) -> str | None:
582
700
  if self.bioversions_key:
583
701
  try:
584
702
  return get_version(self.bioversions_key)
@@ -610,261 +728,417 @@ class Obo:
610
728
  path.write_text(graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
611
729
 
612
730
  @classmethod
613
- def cli(cls) -> None:
731
+ def cli(cls, *args, default_rewrite: bool = False) -> Any:
614
732
  """Run the CLI for this class."""
615
- cli = cls.get_cls_cli()
616
- cli()
733
+ cli = cls.get_cls_cli(default_rewrite=default_rewrite)
734
+ return cli(*args)
617
735
 
618
736
  @classmethod
619
- def get_cls_cli(cls) -> click.Command:
737
+ def get_cls_cli(cls, *, default_rewrite: bool = False) -> click.Command:
620
738
  """Get the CLI for this class."""
621
739
 
622
740
  @click.command()
623
741
  @verbose_option
624
742
  @force_option
625
- @click.option("--rewrite", "-r", is_flag=True)
743
+ @click.option(
744
+ "--rewrite/--no-rewrite",
745
+ "-r",
746
+ default=False,
747
+ is_flag=True,
748
+ help="Re-process the data, but don't download it again.",
749
+ )
626
750
  @click.option("--owl", is_flag=True, help="Write OWL via ROBOT")
627
- @click.option("--nodes", is_flag=True, help="Write nodes file")
751
+ @click.option("--ofn", is_flag=True, help="Write Functional OWL (OFN)")
752
+ @click.option("--ttl", is_flag=True, help="Write turtle RDF via OFN")
628
753
  @click.option(
629
754
  "--version", help="Specify data version to get. Use this if bioversions is acting up."
630
755
  )
631
- def _main(force: bool, owl: bool, nodes: bool, version: Optional[str], rewrite: bool):
756
+ def _main(force: bool, owl: bool, ofn: bool, ttl: bool, version: str | None, rewrite: bool):
757
+ rewrite = True
632
758
  try:
633
759
  inst = cls(force=force, data_version=version)
634
760
  except Exception as e:
635
761
  click.secho(f"[{cls.ontology}] Got an exception during instantiation - {type(e)}")
636
762
  sys.exit(1)
637
-
638
- try:
639
- inst.write_default(
640
- write_obograph=True,
641
- write_obo=True,
642
- write_owl=owl,
643
- write_nodes=nodes,
644
- force=force or rewrite,
645
- use_tqdm=True,
646
- )
647
- except Exception as e:
648
- click.secho(f"[{cls.ontology}] Got an exception during OBO writing {type(e)}")
649
- sys.exit(1)
763
+ inst.write_default(
764
+ write_obograph=True,
765
+ write_obo=True,
766
+ write_owl=owl,
767
+ write_ofn=ofn,
768
+ write_ttl=ttl,
769
+ write_nodes=True,
770
+ write_edges=True,
771
+ force=force or rewrite,
772
+ use_tqdm=True,
773
+ )
650
774
 
651
775
  return _main
652
776
 
653
777
  @property
654
778
  def date_formatted(self) -> str:
655
779
  """Get the date as a formatted string."""
656
- return (self.date if self.date else datetime.now()).strftime(DATE_FORMAT)
780
+ return (self.date if self.date else datetime.datetime.now()).strftime(DATE_FORMAT)
781
+
782
+ def _iter_terms_safe(self) -> Iterator[Term]:
783
+ if self.iter_only:
784
+ return iter(self.iter_terms(force=self.force))
785
+ return iter(self._items_accessor)
657
786
 
658
787
  def _iter_terms(self, use_tqdm: bool = False, desc: str = "terms") -> Iterable[Term]:
788
+ yv = self._iter_terms_safe()
659
789
  if use_tqdm:
660
- total: Optional[int]
790
+ total: int | None
661
791
  try:
662
792
  total = len(self._items_accessor)
663
793
  except TypeError:
664
794
  total = None
665
- yield from tqdm(self, desc=desc, unit_scale=True, unit="term", total=total)
666
- else:
667
- yield from self
795
+ yv = tqdm(yv, desc=desc, unit_scale=True, unit="term", total=total)
796
+ yield from yv
668
797
 
669
- def iterate_obo_lines(self) -> Iterable[str]:
670
- """Iterate over the lines to write in an OBO file."""
671
- yield f"format-version: {self.format_version}"
672
-
673
- if self.auto_generated_by is not None:
674
- yield f"auto-generated-by: {self.auto_generated_by}"
798
+ def _iter_stanzas(self, use_tqdm: bool = False, desc: str = "terms") -> Iterable[Stanza]:
799
+ yield from self._iter_terms(use_tqdm=use_tqdm, desc=desc)
800
+ yield from self.typedefs or []
675
801
 
676
- if self.data_version is not None:
802
+ def iterate_obo_lines(
803
+ self,
804
+ emit_object_properties: bool = True,
805
+ emit_annotation_properties: bool = True,
806
+ ) -> Iterable[str]:
807
+ """Iterate over the lines to write in an OBO file.
808
+
809
+ Here's the order:
810
+
811
+ 1. format-version (technically, this is the only required field)
812
+ 2. data-version
813
+ 3. date
814
+ 4. saved-by
815
+ 5. auto-generated-by
816
+ 6. import
817
+ 7. subsetdef
818
+ 8. synonymtypedef
819
+ 9. default-namespace
820
+ 10. namespace-id-rule
821
+ 11. idspace
822
+ 12. treat-xrefs-as-equivalent
823
+ 13. treat-xrefs-as-genus-differentia
824
+ 14. treat-xrefs-as-relationship
825
+ 15. treat-xrefs-as-is_a
826
+ 16. remark
827
+ 17. ontology
828
+ """
829
+ # 1
830
+ yield f"format-version: {FORMAT_VERSION}"
831
+ # 2
832
+ if self.data_version:
677
833
  yield f"data-version: {self.data_version}"
678
- else:
679
- yield f"date: {self.date_formatted}"
834
+ # 3
835
+ if self.date:
836
+ f"date: {self.date_formatted}"
837
+ # 4 TODO saved-by
838
+ # 5
839
+ if self.auto_generated_by:
840
+ yield f"auto-generated-by: {self.auto_generated_by}"
841
+ # 6
842
+ for imp in self.imports or []:
843
+ yield f"import: {imp}"
844
+ # 7
845
+ for subset, subset_remark in self.subsetdefs or []:
846
+ yield f'subsetdef: {reference_escape(subset, ontology_prefix=self.ontology)} "{subset_remark}"'
847
+ # 8
848
+ for synonym_typedef in sorted(self.synonym_typedefs or []):
849
+ if synonym_typedef.curie == DEFAULT_SYNONYM_TYPE.curie:
850
+ continue
851
+ yield synonym_typedef.to_obo(ontology_prefix=self.ontology)
852
+ # 9 TODO default-namespace
853
+ # 10 TODO namespace-id-rule
854
+ # 11
855
+ for prefix, url in sorted(self._get_clean_idspaces().items()):
856
+ if prefix in DEFAULT_PREFIX_MAP:
857
+ # we don't need to write out the 4 default prefixes from
858
+ # table 2 in https://www.w3.org/TR/owl2-syntax/#IRIs since
859
+ # they're considered to always be builtin
860
+ continue
680
861
 
681
- for prefix, url in sorted((self.idspaces or {}).items()):
682
- yield f"idspace: {prefix} {url}"
862
+ # additional assumptions about built in
863
+ if prefix in {"obo", "oboInOwl"}:
864
+ continue
683
865
 
684
- for synonym_typedef in sorted((self.synonym_typedefs or []), key=attrgetter("curie")):
685
- if synonym_typedef.curie == DEFAULT_SYNONYM_TYPE.curie:
866
+ # ROBOT assumes that all OBO foundry prefixes are builtin,
867
+ # so don't re-declare them
868
+ if bioregistry.is_obo_foundry(prefix):
686
869
  continue
687
- yield synonym_typedef.to_obo()
688
870
 
871
+ yv = f"idspace: {prefix} {url}"
872
+ if _yv_name := bioregistry.get_name(prefix):
873
+ yv += f' "{_yv_name}"'
874
+ yield yv
875
+ # 12-15 are handled only during reading, and
876
+ # PyOBO unmacros things before outputting
877
+ # 12 treat-xrefs-as-equivalent
878
+ # 13 treat-xrefs-as-genus-differentia
879
+ # 14 treat-xrefs-as-relationship
880
+ # 15 treat-xrefs-as-is_a
881
+ # 16 TODO remark
882
+ # 17
689
883
  yield f"ontology: {self.ontology}"
884
+ # 18 (secret)
885
+ yield from self._iterate_properties()
886
+
887
+ typedefs = self._index_typedefs()
888
+ synonym_typedefs = self._index_synonym_typedefs()
889
+
890
+ # PROPERTIES
891
+ for typedef in sorted(self.typedefs or []):
892
+ yield from typedef.iterate_obo_lines(
893
+ ontology_prefix=self.ontology,
894
+ typedefs=typedefs,
895
+ synonym_typedefs=synonym_typedefs,
896
+ )
690
897
 
691
- if self.name is None:
692
- raise ValueError("ontology is missing name")
693
- yield f'property_value: http://purl.org/dc/elements/1.1/title "{self.name}" xsd:string'
694
- license_spdx_id = bioregistry.get_license(self.ontology)
695
- if license_spdx_id:
696
- # TODO add SPDX to idspaces and use as a CURIE?
697
- yield f'property_value: http://purl.org/dc/terms/license "{license_spdx_id}" xsd:string'
698
- description = bioregistry.get_description(self.ontology)
699
- if description:
898
+ # TERMS AND INSTANCES
899
+ for term in self._iter_terms():
900
+ yield from term.iterate_obo_lines(
901
+ ontology_prefix=self.ontology,
902
+ typedefs=typedefs,
903
+ synonym_typedefs=synonym_typedefs,
904
+ emit_object_properties=emit_object_properties,
905
+ emit_annotation_properties=emit_annotation_properties,
906
+ )
907
+
908
+ def _iterate_properties(self) -> Iterable[str]:
909
+ for predicate, value in self._iterate_property_pairs():
910
+ match value:
911
+ case OBOLiteral():
912
+ end = f'"{obo_escape_slim(value.value)}" {reference_escape(value.datatype, ontology_prefix=self.ontology)}'
913
+ case Reference():
914
+ end = reference_escape(value, ontology_prefix=self.ontology)
915
+ yield f"property_value: {reference_escape(predicate, ontology_prefix=self.ontology)} {end}"
916
+
917
+ def _iterate_property_pairs(self) -> Iterable[Annotation]:
918
+ # Title
919
+ if self.name:
920
+ yield Annotation(v.has_title, OBOLiteral.string(self.name))
921
+
922
+ # License
923
+ # TODO add SPDX to idspaces and use as a CURIE?
924
+ if license_spdx_id := bioregistry.get_license(self.ontology):
925
+ if license_spdx_id.startswith("http"):
926
+ license_literal = OBOLiteral.uri(license_spdx_id)
927
+ else:
928
+ license_literal = OBOLiteral.string(license_spdx_id)
929
+ yield Annotation(v.has_license, license_literal)
930
+
931
+ # Description
932
+ if description := bioregistry.get_description(self.ontology):
700
933
  description = obo_escape_slim(description.strip())
701
- yield f'property_value: http://purl.org/dc/elements/1.1/description "{description}" xsd:string'
934
+ yield Annotation(v.has_description, OBOLiteral.string(description.strip()))
702
935
 
936
+ # Root terms
703
937
  for root_term in self.root_terms or []:
704
- yield f"property_value: {has_ontology_root_term.preferred_curie} {root_term.preferred_curie}"
938
+ yield Annotation(v.has_ontology_root_term, root_term)
939
+
940
+ # Extras
941
+ if self.property_values:
942
+ yield from self.property_values
705
943
 
706
- for typedef in sorted(self.typedefs or [], key=attrgetter("curie")):
707
- yield from typedef.iterate_obo_lines()
944
+ def _index_typedefs(self) -> Mapping[ReferenceTuple, TypeDef]:
945
+ from .typedef import default_typedefs
946
+
947
+ return ChainMap(
948
+ {t.pair: t for t in self.typedefs or []},
949
+ default_typedefs,
950
+ )
708
951
 
709
- for term in self:
710
- yield from term.iterate_obo_lines(ontology=self.ontology, typedefs=self.typedefs)
952
+ def _index_synonym_typedefs(self) -> Mapping[ReferenceTuple, SynonymTypeDef]:
953
+ return ChainMap(
954
+ {t.pair: t for t in self.synonym_typedefs or []},
955
+ default_synonym_typedefs,
956
+ )
711
957
 
712
958
  def write_obo(
713
- self, file: Union[None, str, TextIO, Path] = None, use_tqdm: bool = False
959
+ self,
960
+ file: None | str | TextIO | Path = None,
961
+ *,
962
+ use_tqdm: bool = False,
963
+ emit_object_properties: bool = True,
964
+ emit_annotation_properties: bool = True,
714
965
  ) -> None:
715
966
  """Write the OBO to a file."""
716
- it = self.iterate_obo_lines()
967
+ it = self.iterate_obo_lines(
968
+ emit_object_properties=emit_object_properties,
969
+ emit_annotation_properties=emit_annotation_properties,
970
+ )
717
971
  if use_tqdm:
718
- it = tqdm(it, desc=f"Writing {self.ontology}", unit_scale=True, unit="line")
719
- if isinstance(file, (str, Path, os.PathLike)):
972
+ it = tqdm(it, desc=f"[{self.ontology}] writing OBO", unit_scale=True, unit="line")
973
+ if isinstance(file, str | Path | os.PathLike):
720
974
  with open(file, "w") as fh:
721
975
  self._write_lines(it, fh)
722
976
  else:
723
977
  self._write_lines(it, file)
724
978
 
725
979
  @staticmethod
726
- def _write_lines(it, file: Optional[TextIO]):
980
+ def _write_lines(it, file: TextIO | None):
727
981
  for line in it:
728
982
  print(line, file=file)
729
983
 
730
- def write_obonet_gz(self, path: Union[str, Path]) -> None:
984
+ def write_obonet_gz(self, path: str | Path) -> None:
731
985
  """Write the OBO to a gzipped dump in Obonet JSON."""
732
986
  graph = self.to_obonet()
733
- with gzip.open(path, "wt") as file:
734
- json.dump(nx.node_link_data(graph), file)
987
+ write_gzipped_graph(path=path, graph=graph)
735
988
 
736
- def _path(self, *parts: str, name: Optional[str] = None) -> Path:
737
- return prefix_directory_join(self.ontology, *parts, name=name, version=self.data_version)
738
-
739
- def _cache(self, *parts: str, name: Optional[str] = None) -> Path:
740
- return self._path("cache", *parts, name=name)
741
-
742
- @property
743
- def _names_path(self) -> Path:
744
- return self._cache(name="names.tsv")
745
-
746
- @property
747
- def _definitions_path(self) -> Path:
748
- return self._cache(name="definitions.tsv")
989
+ def write_ofn(self, path: str | Path) -> None:
990
+ """Write as Functional OWL (OFN)."""
991
+ from .functional.obo_to_functional import get_ofn_from_obo
749
992
 
750
- @property
751
- def _species_path(self) -> Path:
752
- return self._cache(name="species.tsv")
993
+ ofn = get_ofn_from_obo(self)
994
+ ofn.write_funowl(path)
753
995
 
754
- @property
755
- def _synonyms_path(self) -> Path:
756
- return self._cache(name="synonyms.tsv")
996
+ def write_rdf(self, path: str | Path) -> None:
997
+ """Write as Turtle RDF."""
998
+ from .functional.obo_to_functional import get_ofn_from_obo
757
999
 
758
- @property
759
- def _alts_path(self):
760
- return self._cache(name="alt_ids.tsv")
1000
+ ofn = get_ofn_from_obo(self)
1001
+ ofn.write_rdf(path)
761
1002
 
762
- @property
763
- def _typedefs_path(self) -> Path:
764
- return self._cache(name="typedefs.tsv")
1003
+ def write_nodes(self, path: str | Path) -> None:
1004
+ """Write a nodes TSV file."""
1005
+ # TODO reimplement internally
1006
+ self.get_graph().get_nodes_df().to_csv(path, sep="\t", index=False)
765
1007
 
766
- @property
767
- def _xrefs_path(self) -> Path:
768
- return self._cache(name="xrefs.tsv")
1008
+ def write_edges(self, path: str | Path) -> None:
1009
+ """Write a edges TSV file."""
1010
+ write_iterable_tsv(
1011
+ path=path,
1012
+ header=self.edges_header,
1013
+ it=self.iterate_edge_rows(),
1014
+ )
769
1015
 
770
- @property
771
- def _relations_path(self) -> Path:
772
- return self._cache(name="relations.tsv")
1016
+ def _path(self, *parts: str, name: str | None = None) -> Path:
1017
+ return prefix_directory_join(self.ontology, *parts, name=name, version=self.data_version)
773
1018
 
774
- @property
775
- def _properties_path(self) -> Path:
776
- return self._cache(name="properties.tsv")
1019
+ def _get_cache_path(self, name: CacheArtifact) -> Path:
1020
+ return get_cache_path(self.ontology, name=name, version=self.data_version)
777
1021
 
778
1022
  @property
779
1023
  def _root_metadata_path(self) -> Path:
780
1024
  return prefix_directory_join(self.ontology, name="metadata.json")
781
1025
 
782
- @property
783
- def _versioned_metadata_path(self) -> Path:
784
- return self._cache(name="metadata.json")
785
-
786
1026
  @property
787
1027
  def _obo_path(self) -> Path:
788
- return get_prefix_obo_path(self.ontology, version=self.data_version)
1028
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.obo")
789
1029
 
790
1030
  @property
791
1031
  def _obograph_path(self) -> Path:
792
- return self._path(name=f"{self.ontology}.json")
1032
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.json")
793
1033
 
794
1034
  @property
795
1035
  def _owl_path(self) -> Path:
796
- return self._path(name=f"{self.ontology}.owl")
1036
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.owl")
797
1037
 
798
1038
  @property
799
1039
  def _obonet_gz_path(self) -> Path:
800
- return self._path(name=f"{self.ontology}.obonet.json.gz")
1040
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.obonet.json.gz")
801
1041
 
802
1042
  @property
803
- def _nodes_path(self) -> Path:
804
- return self._path(name=f"{self.ontology}.nodes.tsv")
805
-
806
- def write_default(
807
- self,
808
- use_tqdm: bool = False,
809
- force: bool = False,
810
- write_obo: bool = False,
811
- write_obonet: bool = False,
812
- write_obograph: bool = False,
813
- write_owl: bool = False,
814
- write_nodes: bool = False,
815
- ) -> None:
816
- """Write the OBO to the default path."""
817
- metadata = self.get_metadata()
818
- for path in (self._root_metadata_path, self._versioned_metadata_path):
819
- logger.debug("[%s v%s] caching metadata to %s", self.ontology, self.data_version, path)
820
- with path.open("w") as file:
821
- json.dump(metadata, file, indent=2)
1043
+ def _ofn_path(self) -> Path:
1044
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.ofn")
822
1045
 
823
- logger.debug(
824
- "[%s v%s] caching typedefs to %s", self.ontology, self.data_version, self._typedefs_path
825
- )
826
- typedef_df: pd.DataFrame = self.get_typedef_df()
827
- typedef_df.sort_values(list(typedef_df.columns), inplace=True)
828
- typedef_df.to_csv(self._typedefs_path, sep="\t", index=False)
1046
+ @property
1047
+ def _ttl_path(self) -> Path:
1048
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.ttl")
829
1049
 
830
- for label, path, header, fn in [
831
- ("names", self._names_path, [f"{self.ontology}_id", "name"], self.iterate_id_name),
1050
+ def _get_cache_config(self) -> list[tuple[CacheArtifact, Sequence[str], Callable]]:
1051
+ return [
1052
+ (CacheArtifact.names, [f"{self.ontology}_id", "name"], self.iterate_id_name),
832
1053
  (
833
- "definitions",
834
- self._definitions_path,
1054
+ CacheArtifact.definitions,
835
1055
  [f"{self.ontology}_id", "definition"],
836
1056
  self.iterate_id_definition,
837
1057
  ),
838
1058
  (
839
- "species",
840
- self._species_path,
1059
+ CacheArtifact.species,
841
1060
  [f"{self.ontology}_id", "taxonomy_id"],
842
1061
  self.iterate_id_species,
843
1062
  ),
844
1063
  (
845
- "synonyms",
846
- self._synonyms_path,
1064
+ # TODO deprecate this in favor of literal mappings output
1065
+ CacheArtifact.synonyms,
847
1066
  [f"{self.ontology}_id", "synonym"],
848
1067
  self.iterate_synonym_rows,
849
1068
  ),
850
- ("alts", self._alts_path, [f"{self.ontology}_id", "alt_id"], self.iterate_alt_rows),
851
- ("xrefs", self._xrefs_path, self.xrefs_header, self.iterate_xref_rows),
852
- ("relations", self._relations_path, self.relations_header, self.iter_relation_rows),
853
- ("properties", self._properties_path, self.properties_header, self.iter_property_rows),
854
- ]:
1069
+ (CacheArtifact.alts, [f"{self.ontology}_id", "alt_id"], self.iterate_alt_rows),
1070
+ (CacheArtifact.mappings, SSSOM_DF_COLUMNS, self.iterate_mapping_rows),
1071
+ (CacheArtifact.relations, self.relations_header, self.iter_relation_rows),
1072
+ (CacheArtifact.edges, self.edges_header, self.iterate_edge_rows),
1073
+ (
1074
+ # TODO deprecate this in favor of pair of literal and object properties
1075
+ CacheArtifact.properties,
1076
+ self.properties_header,
1077
+ self._iter_property_rows,
1078
+ ),
1079
+ (
1080
+ CacheArtifact.object_properties,
1081
+ self.object_properties_header,
1082
+ self.iter_object_properties,
1083
+ ),
1084
+ (
1085
+ CacheArtifact.literal_properties,
1086
+ self.literal_properties_header,
1087
+ self.iter_literal_properties,
1088
+ ),
1089
+ (
1090
+ CacheArtifact.literal_mappings,
1091
+ ssslm.LiteralMappingTuple._fields,
1092
+ self.iterate_literal_mapping_rows,
1093
+ ),
1094
+ ]
1095
+
1096
+ def write_metadata(self) -> None:
1097
+ """Write the metadata JSON file."""
1098
+ metadata = self.get_metadata()
1099
+ for path in (self._root_metadata_path, self._get_cache_path(CacheArtifact.metadata)):
1100
+ logger.debug("[%s v%s] caching metadata to %s", self.ontology, self.data_version, path)
1101
+ with path.open("w") as file:
1102
+ json.dump(metadata, file, indent=2)
1103
+
1104
+ def write_prefix_map(self) -> None:
1105
+ """Write a prefix map file that includes all prefixes used in this ontology."""
1106
+ with self._get_cache_path(CacheArtifact.prefixes).open("w") as file:
1107
+ json.dump(self._get_clean_idspaces(), file, indent=2)
1108
+
1109
+ def write_cache(self, *, force: bool = False) -> None:
1110
+ """Write cache parts."""
1111
+ typedefs_path = self._get_cache_path(CacheArtifact.typedefs)
1112
+ logger.debug(
1113
+ "[%s v%s] caching typedefs to %s",
1114
+ self.ontology,
1115
+ self.data_version,
1116
+ typedefs_path,
1117
+ )
1118
+ typedef_df: pd.DataFrame = self.get_typedef_df()
1119
+ typedef_df.sort_values(list(typedef_df.columns), inplace=True)
1120
+ typedef_df.to_csv(typedefs_path, sep="\t", index=False)
1121
+
1122
+ for cache_artifact, header, fn in self._get_cache_config():
1123
+ path = self._get_cache_path(cache_artifact)
855
1124
  if path.exists() and not force:
856
1125
  continue
857
- logger.debug("[%s v%s] caching %s to %s", self.ontology, self.data_version, label, path)
1126
+ tqdm.write(
1127
+ f"[{self.ontology} {self.data_version}] writing {cache_artifact.name} to {path}",
1128
+ )
858
1129
  write_iterable_tsv(
859
1130
  path=path,
860
1131
  header=header,
861
1132
  it=fn(), # type:ignore
862
1133
  )
863
1134
 
864
- for relation in (is_a, has_part, part_of, from_species, orthologous):
865
- if relation is not is_a and self.typedefs is not None and relation not in self.typedefs:
1135
+ typedefs = self._index_typedefs()
1136
+ for relation in (v.is_a, v.has_part, v.part_of, v.from_species, v.orthologous):
1137
+ if relation is not v.is_a and relation.pair not in typedefs:
866
1138
  continue
867
- relations_path = self._cache("relations", name=f"{relation.curie}.tsv")
1139
+ relations_path = get_relation_cache_path(
1140
+ self.ontology, reference=relation, version=self.data_version
1141
+ )
868
1142
  if relations_path.exists() and not force:
869
1143
  continue
870
1144
  logger.debug(
@@ -880,36 +1154,83 @@ class Obo:
880
1154
  relation_df.sort_values(list(relation_df.columns), inplace=True)
881
1155
  relation_df.to_csv(relations_path, sep="\t", index=False)
882
1156
 
883
- if (write_obo or write_owl) and (not self._obo_path.exists() or force):
1157
+ def write_default(
1158
+ self,
1159
+ use_tqdm: bool = False,
1160
+ force: bool = False,
1161
+ write_obo: bool = False,
1162
+ write_obonet: bool = False,
1163
+ write_obograph: bool = False,
1164
+ write_owl: bool = False,
1165
+ write_ofn: bool = False,
1166
+ write_ttl: bool = False,
1167
+ write_nodes: bool = True,
1168
+ write_edges: bool = True,
1169
+ obograph_use_internal: bool = False,
1170
+ write_cache: bool = True,
1171
+ ) -> None:
1172
+ """Write the OBO to the default path."""
1173
+ self.write_metadata()
1174
+ self.write_prefix_map()
1175
+ if write_cache:
1176
+ self.write_cache(force=force)
1177
+ if write_obo and (not self._obo_path.exists() or force):
1178
+ tqdm.write(f"[{self.ontology}] writing OBO to {self._obo_path}")
884
1179
  self.write_obo(self._obo_path, use_tqdm=use_tqdm)
1180
+ if (write_ofn or write_owl or write_obograph) and (not self._ofn_path.exists() or force):
1181
+ tqdm.write(f"[{self.ontology}] writing OFN to {self._ofn_path}")
1182
+ self.write_ofn(self._ofn_path)
885
1183
  if write_obograph and (not self._obograph_path.exists() or force):
886
- self.write_obograph(self._obograph_path)
1184
+ if obograph_use_internal:
1185
+ tqdm.write(f"[{self.ontology}] writing OBO Graph to {self._obograph_path}")
1186
+ self.write_obograph(self._obograph_path)
1187
+ else:
1188
+ import bioontologies.robot
1189
+
1190
+ tqdm.write(
1191
+ f"[{self.ontology}] converting OFN to OBO Graph at {self._obograph_path}"
1192
+ )
1193
+ bioontologies.robot.convert(
1194
+ self._ofn_path, self._obograph_path, debug=True, merge=False, reason=False
1195
+ )
887
1196
  if write_owl and (not self._owl_path.exists() or force):
888
- obo_to_owl(self._obo_path, self._owl_path)
1197
+ tqdm.write(f"[{self.ontology}] writing OWL to {self._owl_path}")
1198
+ import bioontologies.robot
1199
+
1200
+ bioontologies.robot.convert(
1201
+ self._ofn_path, self._owl_path, debug=True, merge=False, reason=False
1202
+ )
1203
+ if write_ttl and (not self._ttl_path.exists() or force):
1204
+ tqdm.write(f"[{self.ontology}] writing Turtle to {self._ttl_path}")
1205
+ self.write_rdf(self._ttl_path)
889
1206
  if write_obonet and (not self._obonet_gz_path.exists() or force):
890
- logger.debug("writing obonet to %s", self._obonet_gz_path)
1207
+ tqdm.write(f"[{self.ontology}] writing obonet to {self._obonet_gz_path}")
891
1208
  self.write_obonet_gz(self._obonet_gz_path)
892
1209
  if write_nodes:
893
- self.get_graph().get_nodes_df().to_csv(self._nodes_path, sep="\t", index=False)
1210
+ nodes_path = self._get_cache_path(CacheArtifact.nodes)
1211
+ tqdm.write(f"[{self.ontology}] writing nodes TSV to {nodes_path}")
1212
+ self.write_nodes(nodes_path)
894
1213
 
895
1214
  @property
896
- def _items_accessor(self):
1215
+ def _items_accessor(self) -> list[Term]:
897
1216
  if self._items is None:
898
- key = self.term_sort_key or attrgetter("curie")
899
- self._items = sorted(self.iter_terms(force=self.force), key=key)
1217
+ # if the term sort key is None, then the terms get sorted by their reference
1218
+ self._items = sorted(
1219
+ self.iter_terms(force=self.force),
1220
+ )
900
1221
  return self._items
901
1222
 
902
- def __iter__(self) -> Iterator["Term"]:
903
- if self.iter_only:
904
- return iter(self.iter_terms(force=self.force))
905
- return iter(self._items_accessor)
1223
+ def __iter__(self) -> Iterator[Term]:
1224
+ yield from self._iter_terms_safe()
906
1225
 
907
1226
  def ancestors(self, identifier: str) -> set[str]:
908
1227
  """Return a set of identifiers for parents of the given identifier."""
1228
+ # FIXME switch to references
909
1229
  return nx.descendants(self.hierarchy, identifier) # note this is backwards
910
1230
 
911
1231
  def descendants(self, identifier: str) -> set[str]:
912
1232
  """Return a set of identifiers for the children of the given identifier."""
1233
+ # FIXME switch to references
913
1234
  return nx.ancestors(self.hierarchy, identifier) # note this is backwards
914
1235
 
915
1236
  def is_descendant(self, descendant: str, ancestor: str) -> bool:
@@ -917,9 +1238,9 @@ class Obo:
917
1238
 
918
1239
  .. code-block:: python
919
1240
 
920
- from pyobo import get_obo
1241
+ from pyobo import get_ontology
921
1242
 
922
- obo = get_obo("go")
1243
+ obo = get_ontology("go")
923
1244
 
924
1245
  interleukin_10_complex = "1905571" # interleukin-10 receptor complex
925
1246
  all_complexes = "0032991"
@@ -935,21 +1256,22 @@ class Obo:
935
1256
 
936
1257
  .. code-block:: python
937
1258
 
938
- from pyobo import get_obo
1259
+ from pyobo import get_ontology
939
1260
 
940
- obo = get_obo("go")
1261
+ obo = get_ontology("go")
941
1262
 
942
1263
  identifier = "1905571" # interleukin-10 receptor complex
943
1264
  is_complex = "0032991" in nx.descendants(obo.hierarchy, identifier) # should be true
944
1265
  """
945
1266
  if self._hierarchy is None:
946
1267
  self._hierarchy = nx.DiGraph()
947
- for term in self._iter_terms(desc=f"[{self.ontology}] getting hierarchy"):
948
- for parent in term.parents:
949
- self._hierarchy.add_edge(term.identifier, parent.identifier)
1268
+ for stanza in self._iter_stanzas(desc=f"[{self.ontology}] getting hierarchy"):
1269
+ for parent in stanza.parents:
1270
+ # FIXME add referneces
1271
+ self._hierarchy.add_edge(stanza.identifier, parent.identifier)
950
1272
  return self._hierarchy
951
1273
 
952
- def to_obonet(self: "Obo", *, use_tqdm: bool = False) -> nx.MultiDiGraph:
1274
+ def to_obonet(self: Obo, *, use_tqdm: bool = False) -> nx.MultiDiGraph:
953
1275
  """Export as a :mod`obonet` style graph."""
954
1276
  rv = nx.MultiDiGraph()
955
1277
  rv.graph.update(
@@ -957,46 +1279,56 @@ class Obo:
957
1279
  "name": self.name,
958
1280
  "ontology": self.ontology,
959
1281
  "auto-generated-by": self.auto_generated_by,
960
- "typedefs": _convert_typedefs(self.typedefs),
961
- "format-version": self.format_version,
1282
+ "format-version": FORMAT_VERSION,
962
1283
  "data-version": self.data_version,
963
- "synonymtypedef": _convert_synonym_typedefs(self.synonym_typedefs),
964
1284
  "date": self.date_formatted,
1285
+ "typedefs": [typedef.reference.model_dump() for typedef in self.typedefs or []],
1286
+ "synonymtypedef": [
1287
+ synonym_typedef.to_obo(ontology_prefix=self.ontology)
1288
+ for synonym_typedef in self.synonym_typedefs or []
1289
+ ],
965
1290
  }
966
1291
  )
967
1292
 
968
1293
  nodes = {}
1294
+ #: a list of 3-tuples u,v,k
969
1295
  links = []
970
- for term in self._iter_terms(use_tqdm=use_tqdm):
1296
+ typedefs = self._index_typedefs()
1297
+ synonym_typedefs = self._index_synonym_typedefs()
1298
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
971
1299
  parents = []
972
- for parent in term.parents:
1300
+ for parent in stanza.parents:
973
1301
  if parent is None:
974
1302
  raise ValueError("parent should not be none!")
975
- links.append((term.curie, "is_a", parent.curie))
1303
+ links.append((stanza.curie, "is_a", parent.curie))
976
1304
  parents.append(parent.curie)
977
1305
 
978
1306
  relations = []
979
- for typedef, target in term.iterate_relations():
980
- if target is None:
981
- raise ValueError("target should not be none!")
1307
+ for typedef, target in stanza.iterate_relations():
982
1308
  relations.append(f"{typedef.curie} {target.curie}")
983
- links.append((term.curie, typedef.curie, target.curie))
1309
+ links.append((stanza.curie, typedef.curie, target.curie))
1310
+
1311
+ for typedef, targets in sorted(stanza.properties.items()):
1312
+ for target_or_literal in targets:
1313
+ if isinstance(target_or_literal, curies.Reference):
1314
+ links.append((stanza.curie, typedef.curie, target_or_literal.curie))
984
1315
 
985
1316
  d = {
986
- "id": term.curie,
987
- "name": term.name,
988
- "def": term.definition and term._definition_fp(),
989
- "xref": [xref.curie for xref in term.xrefs],
1317
+ "id": stanza.curie,
1318
+ "name": stanza.name,
1319
+ "def": stanza.definition and stanza._definition_fp(),
1320
+ "xref": [xref.curie for xref in stanza.xrefs],
990
1321
  "is_a": parents,
991
1322
  "relationship": relations,
992
- "synonym": [synonym._fp() for synonym in term.synonyms],
993
- "property_value": [
994
- f"{prop} {value}"
995
- for prop, values in term.properties.items()
996
- for value in values
1323
+ "synonym": [
1324
+ synonym._fp(ontology_prefix=self.ontology, synonym_typedefs=synonym_typedefs)
1325
+ for synonym in stanza.synonyms
997
1326
  ],
1327
+ "property_value": list(
1328
+ stanza._iterate_obo_properties(ontology_prefix=self.ontology, typedefs=typedefs)
1329
+ ),
998
1330
  }
999
- nodes[term.curie] = {k: v for k, v in d.items() if v}
1331
+ nodes[stanza.curie] = {k: v for k, v in d.items() if v}
1000
1332
 
1001
1333
  rv.add_nodes_from(nodes.items())
1002
1334
  for _source, _key, _target in links:
@@ -1017,11 +1349,21 @@ class Obo:
1017
1349
  "date": self.date and self.date.isoformat(),
1018
1350
  }
1019
1351
 
1352
+ def iterate_references(self, *, use_tqdm: bool = False) -> Iterable[Reference]:
1353
+ """Iterate over identifiers."""
1354
+ for stanza in self._iter_stanzas(
1355
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting identifiers"
1356
+ ):
1357
+ if self._in_ontology(stanza.reference):
1358
+ yield stanza.reference
1359
+
1020
1360
  def iterate_ids(self, *, use_tqdm: bool = False) -> Iterable[str]:
1021
1361
  """Iterate over identifiers."""
1022
- for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"):
1023
- if term.prefix == self.ontology:
1024
- yield term.identifier
1362
+ for stanza in self._iter_stanzas(
1363
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting identifiers"
1364
+ ):
1365
+ if self._in_ontology_strict(stanza.reference):
1366
+ yield stanza.identifier
1025
1367
 
1026
1368
  def get_ids(self, *, use_tqdm: bool = False) -> set[str]:
1027
1369
  """Get the set of identifiers."""
@@ -1029,9 +1371,11 @@ class Obo:
1029
1371
 
1030
1372
  def iterate_id_name(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
1031
1373
  """Iterate identifier name pairs."""
1032
- for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"):
1033
- if term.prefix == self.ontology and term.name:
1034
- yield term.identifier, term.name
1374
+ for stanza in self._iter_stanzas(
1375
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"
1376
+ ):
1377
+ if self._in_ontology(stanza.reference) and stanza.name:
1378
+ yield stanza.identifier, stanza.name
1035
1379
 
1036
1380
  def get_id_name_mapping(self, *, use_tqdm: bool = False) -> Mapping[str, str]:
1037
1381
  """Get a mapping from identifiers to names."""
@@ -1039,11 +1383,13 @@ class Obo:
1039
1383
 
1040
1384
  def iterate_id_definition(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
1041
1385
  """Iterate over pairs of terms' identifiers and their respective definitions."""
1042
- for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"):
1043
- if term.identifier and term.definition:
1386
+ for stanza in self._iter_stanzas(
1387
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"
1388
+ ):
1389
+ if stanza.identifier and stanza.definition:
1044
1390
  yield (
1045
- term.identifier,
1046
- term.definition.strip('"')
1391
+ stanza.identifier,
1392
+ stanza.definition.strip('"')
1047
1393
  .replace("\n", " ")
1048
1394
  .replace("\t", " ")
1049
1395
  .replace(" ", " "),
@@ -1056,11 +1402,11 @@ class Obo:
1056
1402
  def get_obsolete(self, *, use_tqdm: bool = False) -> set[str]:
1057
1403
  """Get the set of obsolete identifiers."""
1058
1404
  return {
1059
- term.identifier
1060
- for term in self._iter_terms(
1405
+ stanza.identifier
1406
+ for stanza in self._iter_stanzas(
1061
1407
  use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting obsolete"
1062
1408
  )
1063
- if term.identifier and term.is_obsolete
1409
+ if stanza.identifier and stanza.is_obsolete
1064
1410
  }
1065
1411
 
1066
1412
  ############
@@ -1068,18 +1414,19 @@ class Obo:
1068
1414
  ############
1069
1415
 
1070
1416
  def iterate_id_species(
1071
- self, *, prefix: Optional[str] = None, use_tqdm: bool = False
1417
+ self, *, prefix: str | None = None, use_tqdm: bool = False
1072
1418
  ) -> Iterable[tuple[str, str]]:
1073
1419
  """Iterate over terms' identifiers and respective species (if available)."""
1074
1420
  if prefix is None:
1075
1421
  prefix = NCBITAXON_PREFIX
1076
- for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting species"):
1077
- species = term.get_species(prefix=prefix)
1078
- if species:
1079
- yield term.identifier, species.identifier
1422
+ for stanza in self._iter_stanzas(
1423
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting species"
1424
+ ):
1425
+ if isinstance(stanza, Term) and (species := stanza.get_species(prefix=prefix)):
1426
+ yield stanza.identifier, species.identifier
1080
1427
 
1081
1428
  def get_id_species_mapping(
1082
- self, *, prefix: Optional[str] = None, use_tqdm: bool = False
1429
+ self, *, prefix: str | None = None, use_tqdm: bool = False
1083
1430
  ) -> Mapping[str, str]:
1084
1431
  """Get a mapping from identifiers to species."""
1085
1432
  return dict(self.iterate_id_species(prefix=prefix, use_tqdm=use_tqdm))
@@ -1109,42 +1456,103 @@ class Obo:
1109
1456
  # PROPS #
1110
1457
  #########
1111
1458
 
1112
- def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term, str, str]]:
1459
+ def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[Stanza, Annotation]]:
1113
1460
  """Iterate over tuples of terms, properties, and their values."""
1114
- # TODO if property_prefix is set, try removing that as a prefix from all prop strings.
1115
- for term in self._iter_terms(
1461
+ for stanza in self._iter_stanzas(
1116
1462
  use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting properties"
1117
1463
  ):
1118
- for prop, value in term.iterate_properties():
1119
- yield term, prop, value
1464
+ for property_tuple in stanza.get_property_annotations():
1465
+ yield stanza, property_tuple
1120
1466
 
1121
1467
  @property
1122
1468
  def properties_header(self):
1123
1469
  """Property dataframe header."""
1124
- return [f"{self.ontology}_id", "property", "value"]
1470
+ return [f"{self.ontology}_id", "property", "value", "datatype", "language"]
1471
+
1472
+ @property
1473
+ def object_properties_header(self):
1474
+ """Property dataframe header."""
1475
+ return ["source", "predicate", "target"]
1476
+
1477
+ @property
1478
+ def literal_properties_header(self):
1479
+ """Property dataframe header."""
1480
+ return ["source", "predicate", "target", "datatype", "language"]
1125
1481
 
1126
- def iter_property_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
1482
+ def _iter_property_rows(
1483
+ self, *, use_tqdm: bool = False
1484
+ ) -> Iterable[tuple[str, str, str, str, str]]:
1127
1485
  """Iterate property rows."""
1128
- for term, prop, value in self.iterate_properties(use_tqdm=use_tqdm):
1129
- yield term.identifier, prop, value
1486
+ for term, t in self.iterate_properties(use_tqdm=use_tqdm):
1487
+ pred = term._reference(t.predicate, ontology_prefix=self.ontology)
1488
+ match t.value:
1489
+ case OBOLiteral(value, datatype, language):
1490
+ yield (
1491
+ term.identifier,
1492
+ pred,
1493
+ value,
1494
+ get_preferred_curie(datatype),
1495
+ language or "",
1496
+ )
1497
+ case Reference() as obj:
1498
+ yield term.identifier, pred, get_preferred_curie(obj), "", ""
1499
+ case _:
1500
+ raise TypeError(f"got: {type(t)} - {t}")
1501
+
1502
+ def get_properties_df(self, *, use_tqdm: bool = False, drop_na: bool = True) -> pd.DataFrame:
1503
+ """Get all properties as a dataframe."""
1504
+ df = pd.DataFrame(
1505
+ self._iter_property_rows(use_tqdm=use_tqdm),
1506
+ columns=self.properties_header,
1507
+ )
1508
+ if drop_na:
1509
+ df.dropna(inplace=True)
1510
+ return df
1511
+
1512
+ def iter_object_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
1513
+ """Iterate over object property triples."""
1514
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
1515
+ for predicate, target in stanza.iterate_object_properties():
1516
+ yield stanza.curie, predicate.curie, target.curie
1130
1517
 
1131
- def get_properties_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
1518
+ def get_object_properties_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
1132
1519
  """Get all properties as a dataframe."""
1133
1520
  return pd.DataFrame(
1134
- list(self.iter_property_rows(use_tqdm=use_tqdm)),
1135
- columns=self.properties_header,
1521
+ self.iter_object_properties(use_tqdm=use_tqdm), columns=self.object_properties_header
1136
1522
  )
1137
1523
 
1524
+ def iter_literal_properties(
1525
+ self, *, use_tqdm: bool = False
1526
+ ) -> Iterable[tuple[str, str, str, str, str]]:
1527
+ """Iterate over literal properties quads."""
1528
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
1529
+ for predicate, target in stanza.iterate_literal_properties():
1530
+ yield (
1531
+ stanza.curie,
1532
+ predicate.curie,
1533
+ target.value,
1534
+ target.datatype.curie,
1535
+ target.language or "",
1536
+ )
1537
+
1538
+ def get_literal_properties_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
1539
+ """Get all properties as a dataframe."""
1540
+ return pd.DataFrame(self.iter_literal_properties(), columns=self.literal_properties_header)
1541
+
1138
1542
  def iterate_filtered_properties(
1139
- self, prop: str, *, use_tqdm: bool = False
1140
- ) -> Iterable[tuple[Term, str]]:
1543
+ self, prop: ReferenceHint, *, use_tqdm: bool = False
1544
+ ) -> Iterable[tuple[Stanza, str]]:
1141
1545
  """Iterate over tuples of terms and the values for the given property."""
1142
- for term in self._iter_terms(use_tqdm=use_tqdm):
1143
- for _prop, value in term.iterate_properties():
1144
- if _prop == prop:
1145
- yield term, value
1146
-
1147
- def get_filtered_properties_df(self, prop: str, *, use_tqdm: bool = False) -> pd.DataFrame:
1546
+ prop = _ensure_ref(prop)
1547
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
1548
+ for t in stanza.get_property_annotations():
1549
+ if t.predicate != prop:
1550
+ continue
1551
+ yield stanza, reference_or_literal_to_str(t.value)
1552
+
1553
+ def get_filtered_properties_df(
1554
+ self, prop: ReferenceHint, *, use_tqdm: bool = False
1555
+ ) -> pd.DataFrame:
1148
1556
  """Get a dataframe of terms' identifiers to the given property's values."""
1149
1557
  return pd.DataFrame(
1150
1558
  list(self.get_filtered_properties_mapping(prop, use_tqdm=use_tqdm).items()),
@@ -1152,7 +1560,7 @@ class Obo:
1152
1560
  )
1153
1561
 
1154
1562
  def get_filtered_properties_mapping(
1155
- self, prop: str, *, use_tqdm: bool = False
1563
+ self, prop: ReferenceHint, *, use_tqdm: bool = False
1156
1564
  ) -> Mapping[str, str]:
1157
1565
  """Get a mapping from a term's identifier to the property.
1158
1566
 
@@ -1164,7 +1572,7 @@ class Obo:
1164
1572
  }
1165
1573
 
1166
1574
  def get_filtered_properties_multimapping(
1167
- self, prop: str, *, use_tqdm: bool = False
1575
+ self, prop: ReferenceHint, *, use_tqdm: bool = False
1168
1576
  ) -> Mapping[str, list[str]]:
1169
1577
  """Get a mapping from a term's identifier to the property values."""
1170
1578
  return multidict(
@@ -1176,22 +1584,63 @@ class Obo:
1176
1584
  # RELATIONS #
1177
1585
  #############
1178
1586
 
1587
+ def iterate_edges(
1588
+ self, *, use_tqdm: bool = False
1589
+ ) -> Iterable[tuple[Stanza, TypeDef, Reference]]:
1590
+ """Iterate over triples of terms, relations, and their targets."""
1591
+ _warned: set[ReferenceTuple] = set()
1592
+ typedefs = self._index_typedefs()
1593
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm, desc=f"[{self.ontology}] edge"):
1594
+ for predicate, reference in stanza._iter_edges():
1595
+ if td := self._get_typedef(stanza, predicate, _warned, typedefs):
1596
+ yield stanza, td, reference
1597
+
1598
+ @property
1599
+ def edges_header(self) -> Sequence[str]:
1600
+ """Header for the edges dataframe."""
1601
+ return [":START_ID", ":TYPE", ":END_ID"]
1602
+
1179
1603
  def iterate_relations(
1180
1604
  self, *, use_tqdm: bool = False
1181
- ) -> Iterable[tuple[Term, TypeDef, Reference]]:
1182
- """Iterate over tuples of terms, relations, and their targets."""
1183
- for term in self._iter_terms(
1184
- use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting relations"
1185
- ):
1186
- for parent in term.parents:
1187
- yield term, is_a, parent
1188
- for typedef, reference in term.iterate_relations():
1189
- if (self.typedefs is None or typedef not in self.typedefs) and (
1190
- typedef.prefix,
1191
- typedef.identifier,
1192
- ) not in default_typedefs:
1193
- raise ValueError(f"Undefined typedef: {typedef.curie} ! {typedef.name}")
1194
- yield term, typedef, reference
1605
+ ) -> Iterable[tuple[Stanza, TypeDef, Reference]]:
1606
+ """Iterate over tuples of terms, relations, and their targets.
1607
+
1608
+ This only outputs stuff from the `relationship:` tag, not
1609
+ all possible triples. For that, see :func:`iterate_edges`.
1610
+ """
1611
+ _warned: set[ReferenceTuple] = set()
1612
+ typedefs = self._index_typedefs()
1613
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm, desc=f"[{self.ontology}] relation"):
1614
+ for predicate, reference in stanza.iterate_relations():
1615
+ if td := self._get_typedef(stanza, predicate, _warned, typedefs):
1616
+ yield stanza, td, reference
1617
+
1618
+ def get_edges_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
1619
+ """Get an edges dataframe."""
1620
+ return pd.DataFrame(self.iterate_edge_rows(use_tqdm=use_tqdm), columns=self.edges_header)
1621
+
1622
+ def iterate_edge_rows(self, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
1623
+ """Iterate the edge rows."""
1624
+ for term, typedef, reference in self.iterate_edges(use_tqdm=use_tqdm):
1625
+ yield term.curie, typedef.curie, reference.curie
1626
+
1627
+ def _get_typedef(
1628
+ self,
1629
+ term: Stanza,
1630
+ predicate: Reference,
1631
+ _warned: set[ReferenceTuple],
1632
+ typedefs: Mapping[ReferenceTuple, TypeDef],
1633
+ ) -> TypeDef | None:
1634
+ pp = predicate.pair
1635
+ if pp in typedefs:
1636
+ return typedefs[pp]
1637
+ if pp not in _warned:
1638
+ _warn_string = f"[{term.curie}] undefined typedef: {pp}"
1639
+ if predicate.name:
1640
+ _warn_string += f" ({predicate.name})"
1641
+ logger.warning(_warn_string)
1642
+ _warned.add(pp)
1643
+ return None
1195
1644
 
1196
1645
  def iter_relation_rows(
1197
1646
  self, use_tqdm: bool = False
@@ -1208,14 +1657,14 @@ class Obo:
1208
1657
 
1209
1658
  def iterate_filtered_relations(
1210
1659
  self,
1211
- relation: RelationHint,
1660
+ relation: ReferenceHint,
1212
1661
  *,
1213
1662
  use_tqdm: bool = False,
1214
- ) -> Iterable[tuple[Term, Reference]]:
1663
+ ) -> Iterable[tuple[Stanza, Reference]]:
1215
1664
  """Iterate over tuples of terms and ther targets for the given relation."""
1216
- _target_prefix, _target_identifier = get_reference_tuple(relation)
1217
- for term, typedef, reference in self.iterate_relations(use_tqdm=use_tqdm):
1218
- if typedef.prefix == _target_prefix and typedef.identifier == _target_identifier:
1665
+ _pair = _ensure_ref(relation, ontology_prefix=self.ontology).pair
1666
+ for term, predicate, reference in self.iterate_relations(use_tqdm=use_tqdm):
1667
+ if _pair == predicate.pair:
1219
1668
  yield term, reference
1220
1669
 
1221
1670
  @property
@@ -1232,7 +1681,7 @@ class Obo:
1232
1681
 
1233
1682
  def get_filtered_relations_df(
1234
1683
  self,
1235
- relation: RelationHint,
1684
+ relation: ReferenceHint,
1236
1685
  *,
1237
1686
  use_tqdm: bool = False,
1238
1687
  ) -> pd.DataFrame:
@@ -1247,11 +1696,11 @@ class Obo:
1247
1696
 
1248
1697
  def iterate_filtered_relations_filtered_targets(
1249
1698
  self,
1250
- relation: RelationHint,
1699
+ relation: ReferenceHint,
1251
1700
  target_prefix: str,
1252
1701
  *,
1253
1702
  use_tqdm: bool = False,
1254
- ) -> Iterable[tuple[Term, Reference]]:
1703
+ ) -> Iterable[tuple[Stanza, Reference]]:
1255
1704
  """Iterate over relationships between one identifier and another."""
1256
1705
  for term, reference in self.iterate_filtered_relations(
1257
1706
  relation=relation, use_tqdm=use_tqdm
@@ -1261,7 +1710,7 @@ class Obo:
1261
1710
 
1262
1711
  def get_relation_mapping(
1263
1712
  self,
1264
- relation: RelationHint,
1713
+ relation: ReferenceHint,
1265
1714
  target_prefix: str,
1266
1715
  *,
1267
1716
  use_tqdm: bool = False,
@@ -1272,8 +1721,8 @@ class Obo:
1272
1721
 
1273
1722
  Example usage: get homology between HGNC and MGI:
1274
1723
 
1275
- >>> from pyobo.sources.hgnc import get_obo
1276
- >>> obo = get_obo()
1724
+ >>> from pyobo.sources.hgnc import HGNCGetter
1725
+ >>> obo = HGNCGetter()
1277
1726
  >>> human_mapt_hgnc_id = "6893"
1278
1727
  >>> mouse_mapt_mgi_id = "97180"
1279
1728
  >>> hgnc_mgi_orthology_mapping = obo.get_relation_mapping("ro:HOM0000017", "mgi")
@@ -1291,15 +1740,15 @@ class Obo:
1291
1740
  def get_relation(
1292
1741
  self,
1293
1742
  source_identifier: str,
1294
- relation: RelationHint,
1743
+ relation: ReferenceHint,
1295
1744
  target_prefix: str,
1296
1745
  *,
1297
1746
  use_tqdm: bool = False,
1298
- ) -> Optional[str]:
1747
+ ) -> str | None:
1299
1748
  """Get the value for a bijective relation mapping between this resource and a target resource.
1300
1749
 
1301
- >>> from pyobo.sources.hgnc import get_obo
1302
- >>> obo = get_obo()
1750
+ >>> from pyobo.sources.hgnc import HGNCGetter
1751
+ >>> obo = HGNCGetter()
1303
1752
  >>> human_mapt_hgnc_id = "6893"
1304
1753
  >>> mouse_mapt_mgi_id = "97180"
1305
1754
  >>> assert mouse_mapt_mgi_id == obo.get_relation(human_mapt_hgnc_id, "ro:HOM0000017", "mgi")
@@ -1311,7 +1760,7 @@ class Obo:
1311
1760
 
1312
1761
  def get_relation_multimapping(
1313
1762
  self,
1314
- relation: RelationHint,
1763
+ relation: ReferenceHint,
1315
1764
  target_prefix: str,
1316
1765
  *,
1317
1766
  use_tqdm: bool = False,
@@ -1334,22 +1783,24 @@ class Obo:
1334
1783
  ) -> Mapping[str, list[Reference]]:
1335
1784
  """Get a mapping from identifiers to a list of all references for the given relation."""
1336
1785
  return multidict(
1337
- (term.identifier, reference)
1338
- for term in self._iter_terms(
1786
+ (stanza.identifier, reference)
1787
+ for stanza in self._iter_stanzas(
1339
1788
  use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting {typedef.curie}"
1340
1789
  )
1341
- for reference in term.get_relationships(typedef)
1790
+ for reference in stanza.get_relationships(typedef)
1342
1791
  )
1343
1792
 
1344
1793
  ############
1345
1794
  # SYNONYMS #
1346
1795
  ############
1347
1796
 
1348
- def iterate_synonyms(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term, Synonym]]:
1797
+ def iterate_synonyms(self, *, use_tqdm: bool = False) -> Iterable[tuple[Stanza, Synonym]]:
1349
1798
  """Iterate over pairs of term and synonym object."""
1350
- for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting synonyms"):
1351
- for synonym in sorted(term.synonyms, key=attrgetter("name")):
1352
- yield term, synonym
1799
+ for stanza in self._iter_stanzas(
1800
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting synonyms"
1801
+ ):
1802
+ for synonym in sorted(stanza.synonyms):
1803
+ yield stanza, synonym
1353
1804
 
1354
1805
  def iterate_synonym_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
1355
1806
  """Iterate over pairs of identifier and synonym text."""
@@ -1360,40 +1811,95 @@ class Obo:
1360
1811
  """Get a mapping from identifiers to a list of sorted synonym strings."""
1361
1812
  return multidict(self.iterate_synonym_rows(use_tqdm=use_tqdm))
1362
1813
 
1814
+ def get_literal_mappings(self) -> Iterable[ssslm.LiteralMapping]:
1815
+ """Get literal mappings in a standard data model."""
1816
+ stanzas: Iterable[Stanza] = itt.chain(self, self.typedefs or [])
1817
+ yield from itt.chain.from_iterable(
1818
+ stanza.get_literal_mappings()
1819
+ for stanza in stanzas
1820
+ if self._in_ontology(stanza.reference)
1821
+ )
1822
+
1823
+ def _in_ontology(self, reference: Reference | Referenced) -> bool:
1824
+ return self._in_ontology_strict(reference) or self._in_ontology_aux(reference)
1825
+
1826
+ def _in_ontology_strict(self, reference: Reference | Referenced) -> bool:
1827
+ return reference.prefix == self.ontology
1828
+
1829
+ def _in_ontology_aux(self, reference: Reference | Referenced) -> bool:
1830
+ return reference.prefix == "obo" and reference.identifier.startswith(self.ontology + "#")
1831
+
1363
1832
  #########
1364
1833
  # XREFS #
1365
1834
  #########
1366
1835
 
1367
- def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term, Reference]]:
1836
+ def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[tuple[Stanza, Reference]]:
1368
1837
  """Iterate over xrefs."""
1369
- for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting xrefs"):
1370
- for xref in term.xrefs:
1371
- yield term, xref
1838
+ for stanza in self._iter_stanzas(
1839
+ use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting xrefs"
1840
+ ):
1841
+ xrefs = {xref for _, xref in stanza.get_mappings(add_context=False)}
1842
+ for xref in sorted(xrefs):
1843
+ yield stanza, xref
1372
1844
 
1373
1845
  def iterate_filtered_xrefs(
1374
1846
  self, prefix: str, *, use_tqdm: bool = False
1375
- ) -> Iterable[tuple[Term, Reference]]:
1847
+ ) -> Iterable[tuple[Stanza, Reference]]:
1376
1848
  """Iterate over xrefs to a given prefix."""
1377
1849
  for term, xref in self.iterate_xrefs(use_tqdm=use_tqdm):
1378
1850
  if xref.prefix == prefix:
1379
1851
  yield term, xref
1380
1852
 
1381
- def iterate_xref_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
1382
- """Iterate over terms' identifiers, xref prefixes, and xref identifiers."""
1383
- for term, xref in self.iterate_xrefs(use_tqdm=use_tqdm):
1384
- yield term.identifier, xref.prefix, xref.identifier
1853
+ def iterate_literal_mapping_rows(self) -> Iterable[ssslm.LiteralMappingTuple]:
1854
+ """Iterate over literal mapping rows."""
1855
+ for synonym in self.get_literal_mappings():
1856
+ yield synonym._as_row()
1385
1857
 
1386
- @property
1387
- def xrefs_header(self):
1388
- """The header for the xref dataframe."""
1389
- return [f"{self.ontology}_id", TARGET_PREFIX, TARGET_ID]
1858
+ def get_literal_mappings_df(self) -> pd.DataFrame:
1859
+ """Get a literal mappings dataframe."""
1860
+ return ssslm.literal_mappings_to_df(self.get_literal_mappings())
1390
1861
 
1391
- def get_xrefs_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
1392
- """Get a dataframe of all xrefs extracted from the OBO document."""
1393
- return pd.DataFrame(
1394
- list(self.iterate_xref_rows(use_tqdm=use_tqdm)),
1395
- columns=[f"{self.ontology}_id", TARGET_PREFIX, TARGET_ID],
1396
- ).drop_duplicates()
1862
+ def iterate_mapping_rows(
1863
+ self, *, use_tqdm: bool = False
1864
+ ) -> Iterable[tuple[str, str, str, str, str, float | None, str | None]]:
1865
+ """Iterate over SSSOM rows for mappings."""
1866
+ for stanza in self._iter_stanzas(use_tqdm=use_tqdm):
1867
+ for predicate, obj_ref, context in stanza.get_mappings(
1868
+ include_xrefs=True, add_context=True
1869
+ ):
1870
+ yield (
1871
+ get_preferred_curie(stanza),
1872
+ stanza.name,
1873
+ get_preferred_curie(obj_ref),
1874
+ get_preferred_curie(predicate),
1875
+ get_preferred_curie(context.justification),
1876
+ context.confidence if context.confidence is not None else None,
1877
+ get_preferred_curie(context.contributor) if context.contributor else None,
1878
+ )
1879
+
1880
+ def get_mappings_df(
1881
+ self,
1882
+ *,
1883
+ use_tqdm: bool = False,
1884
+ include_subject_labels: bool = False,
1885
+ include_mapping_source_column: bool = False,
1886
+ ) -> pd.DataFrame:
1887
+ """Get a dataframe with SSSOM extracted from the OBO document."""
1888
+ df = pd.DataFrame(self.iterate_mapping_rows(use_tqdm=use_tqdm), columns=SSSOM_DF_COLUMNS)
1889
+ if not include_subject_labels:
1890
+ del df["subject_label"]
1891
+
1892
+ # if no confidences/contributor, remove that column
1893
+ for c in ["confidence", "contributor"]:
1894
+ if df[c].isna().all():
1895
+ del df[c]
1896
+
1897
+ # append on the mapping_source
1898
+ # (https://mapping-commons.github.io/sssom/mapping_source/)
1899
+ if include_mapping_source_column:
1900
+ df["mapping_source"] = self.ontology
1901
+
1902
+ return df
1397
1903
 
1398
1904
  def get_filtered_xrefs_mapping(
1399
1905
  self, prefix: str, *, use_tqdm: bool = False
@@ -1417,11 +1923,12 @@ class Obo:
1417
1923
  # ALTS #
1418
1924
  ########
1419
1925
 
1420
- def iterate_alts(self) -> Iterable[tuple[Term, Reference]]:
1926
+ def iterate_alts(self) -> Iterable[tuple[Stanza, Reference]]:
1421
1927
  """Iterate over alternative identifiers."""
1422
- for term in self:
1423
- for alt in term.alt_ids:
1424
- yield term, alt
1928
+ for stanza in self._iter_stanzas():
1929
+ if self._in_ontology(stanza):
1930
+ for alt in stanza.alt_ids:
1931
+ yield stanza, alt
1425
1932
 
1426
1933
  def iterate_alt_rows(self) -> Iterable[tuple[str, str]]:
1427
1934
  """Iterate over pairs of terms' primary identifiers and alternate identifiers."""
@@ -1433,33 +1940,315 @@ class Obo:
1433
1940
  return multidict((term.identifier, alt.identifier) for term, alt in self.iterate_alts())
1434
1941
 
1435
1942
 
1943
+ @dataclass
1944
+ class TypeDef(Stanza):
1945
+ """A type definition in OBO.
1946
+
1947
+ See the subsection of https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.2.2.
1948
+ """
1949
+
1950
+ reference: Annotated[Reference, 1]
1951
+ is_anonymous: Annotated[bool | None, 2] = None
1952
+ # 3 - name is covered by reference
1953
+ namespace: Annotated[str | None, 4] = None
1954
+ # 5 alt_id is part of proerties
1955
+ definition: Annotated[str | None, 6] = None
1956
+ comment: Annotated[str | None, 7] = None
1957
+ subsets: Annotated[list[Reference], 8] = field(default_factory=list)
1958
+ synonyms: Annotated[list[Synonym], 9] = field(default_factory=list)
1959
+ xrefs: Annotated[list[Reference], 10] = field(default_factory=list)
1960
+ _axioms: AnnotationsDict = field(default_factory=lambda: defaultdict(list))
1961
+ properties: Annotated[PropertiesHint, 11] = field(default_factory=lambda: defaultdict(list))
1962
+ domain: Annotated[Reference | None, 12, "typedef-only"] = None
1963
+ range: Annotated[Reference | None, 13, "typedef-only"] = None
1964
+ builtin: Annotated[bool | None, 14] = None
1965
+ holds_over_chain: Annotated[list[list[Reference]], 15, "typedef-only"] = field(
1966
+ default_factory=list
1967
+ )
1968
+ is_anti_symmetric: Annotated[bool | None, 16, "typedef-only"] = None
1969
+ is_cyclic: Annotated[bool | None, 17, "typedef-only"] = None
1970
+ is_reflexive: Annotated[bool | None, 18, "typedef-only"] = None
1971
+ is_symmetric: Annotated[bool | None, 19, "typedef-only"] = None
1972
+ is_transitive: Annotated[bool | None, 20, "typedef-only"] = None
1973
+ is_functional: Annotated[bool | None, 21, "typedef-only"] = None
1974
+ is_inverse_functional: Annotated[bool | None, 22, "typedef-only"] = None
1975
+ parents: Annotated[list[Reference], 23] = field(default_factory=list)
1976
+ intersection_of: Annotated[IntersectionOfHint, 24] = field(default_factory=list)
1977
+ union_of: Annotated[list[Reference], 25] = field(default_factory=list)
1978
+ equivalent_to: Annotated[list[Reference], 26] = field(default_factory=list)
1979
+ disjoint_from: Annotated[list[Reference], 27] = field(default_factory=list)
1980
+ # TODO inverse should be inverse_of, cardinality any
1981
+ inverse: Annotated[Reference | None, 28, "typedef-only"] = None
1982
+ # TODO check if there are any examples of this being multiple
1983
+ transitive_over: Annotated[list[Reference], 29, "typedef-only"] = field(default_factory=list)
1984
+ equivalent_to_chain: Annotated[list[list[Reference]], 30, "typedef-only"] = field(
1985
+ default_factory=list
1986
+ )
1987
+ #: From the OBO spec:
1988
+ #:
1989
+ #: For example: spatially_disconnected_from is disjoint_over part_of, in that two
1990
+ #: disconnected entities have no parts in common. This can be translated to OWL as:
1991
+ #: ``disjoint_over(R S), R(A B) ==> (S some A) disjointFrom (S some B)``
1992
+ disjoint_over: Annotated[list[Reference], 31] = field(default_factory=list)
1993
+ relationships: Annotated[RelationsHint, 32] = field(default_factory=lambda: defaultdict(list))
1994
+ is_obsolete: Annotated[bool | None, 33] = None
1995
+ created_by: Annotated[str | None, 34] = None
1996
+ creation_date: Annotated[datetime.datetime | None, 35] = None
1997
+ # TODO expand_assertion_to
1998
+ # TODO expand_expression_to
1999
+ #: Whether this relationship is a metadata tag. Properties that are marked as metadata tags are
2000
+ #: used to record object metadata. Object metadata is additional information about an object
2001
+ #: that is useful to track, but does not impact the definition of the object or how it should
2002
+ #: be treated by a reasoner. Metadata tags might be used to record special term synonyms or
2003
+ #: structured notes about a term, for example.
2004
+ is_metadata_tag: Annotated[bool | None, 40, "typedef-only"] = None
2005
+ is_class_level: Annotated[bool | None, 41] = None
2006
+
2007
+ type: StanzaType = "TypeDef"
2008
+
2009
+ def __hash__(self) -> int:
2010
+ # have to re-define hash because of the @dataclass
2011
+ return hash((self.__class__, self.prefix, self.identifier))
2012
+
2013
+ def _get_references(self) -> dict[str, set[Reference]]:
2014
+ rv = super()._get_references()
2015
+
2016
+ def _add(r: Reference) -> None:
2017
+ rv[r.prefix].add(r)
2018
+
2019
+ if self.domain:
2020
+ _add(self.domain)
2021
+ if self.range:
2022
+ _add(self.range)
2023
+ if self.inverse:
2024
+ _add(self.inverse)
2025
+
2026
+ # TODO all of the properties, which are from oboInOwl
2027
+ for rr in itt.chain(self.transitive_over, self.disjoint_over):
2028
+ _add(rr)
2029
+ for part in itt.chain(self.holds_over_chain, self.equivalent_to_chain):
2030
+ for rr in part:
2031
+ _add(rr)
2032
+ return dict(rv)
2033
+
2034
+ def iterate_obo_lines(
2035
+ self,
2036
+ ontology_prefix: str,
2037
+ synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
2038
+ typedefs: Mapping[ReferenceTuple, TypeDef] | None = None,
2039
+ ) -> Iterable[str]:
2040
+ """Iterate over the lines to write in an OBO file.
2041
+
2042
+ :param ontology_prefix:
2043
+ The prefix of the ontology into which the type definition is being written.
2044
+ This is used for compressing builtin identifiers
2045
+ :yield:
2046
+ The lines to write to an OBO file
2047
+
2048
+ `S.3.5.5 <https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.3.5.5>`_
2049
+ of the OBO Flat File Specification v1.4 says tags should appear in the following order:
2050
+
2051
+ 1. id
2052
+ 2. is_anonymous
2053
+ 3. name
2054
+ 4. namespace
2055
+ 5. alt_id
2056
+ 6. def
2057
+ 7. comment
2058
+ 8. subset
2059
+ 9. synonym
2060
+ 10. xref
2061
+ 11. property_value
2062
+ 12. domain
2063
+ 13. range
2064
+ 14. builtin
2065
+ 15. holds_over_chain
2066
+ 16. is_anti_symmetric
2067
+ 17. is_cyclic
2068
+ 18. is_reflexive
2069
+ 19. is_symmetric
2070
+ 20. is_transitive
2071
+ 21. is_functional
2072
+ 22. is_inverse_functional
2073
+ 23. is_a
2074
+ 24. intersection_of
2075
+ 25. union_of
2076
+ 26. equivalent_to
2077
+ 27. disjoint_from
2078
+ 28. inverse_of
2079
+ 29. transitive_over
2080
+ 30. equivalent_to_chain
2081
+ 31. disjoint_over
2082
+ 32. relationship
2083
+ 33. is_obsolete
2084
+ 34. created_by
2085
+ 35. creation_date
2086
+ 36. replaced_by
2087
+ 37. consider
2088
+ 38. expand_assertion_to
2089
+ 39. expand_expression_to
2090
+ 40. is_metadata_tag
2091
+ 41. is_class_level
2092
+ """
2093
+ if synonym_typedefs is None:
2094
+ synonym_typedefs = {}
2095
+ if typedefs is None:
2096
+ typedefs = {}
2097
+
2098
+ yield "\n[Typedef]"
2099
+ # 1
2100
+ yield f"id: {reference_escape(self.reference, ontology_prefix=ontology_prefix)}"
2101
+ # 2
2102
+ yield from _boolean_tag("is_anonymous", self.is_anonymous)
2103
+ # 3
2104
+ if self.name:
2105
+ yield f"name: {self.name}"
2106
+ # 4
2107
+ if self.namespace:
2108
+ yield f"namespace: {self.namespace}"
2109
+ # 5
2110
+ yield from _reference_list_tag("alt_id", self.alt_ids, ontology_prefix)
2111
+ # 6
2112
+ if self.definition:
2113
+ yield f"def: {self._definition_fp()}"
2114
+ # 7
2115
+ if self.comment:
2116
+ yield f"comment: {self.comment}"
2117
+ # 8
2118
+ yield from _reference_list_tag("subset", self.subsets, ontology_prefix)
2119
+ # 9
2120
+ for synonym in self.synonyms:
2121
+ yield synonym.to_obo(ontology_prefix=ontology_prefix, synonym_typedefs=synonym_typedefs)
2122
+ # 10
2123
+ yield from self._iterate_xref_obo(ontology_prefix=ontology_prefix)
2124
+ # 11
2125
+ yield from self._iterate_obo_properties(
2126
+ ontology_prefix=ontology_prefix,
2127
+ skip_predicate_objects=v.SKIP_PROPERTY_PREDICATES_OBJECTS,
2128
+ skip_predicate_literals=v.SKIP_PROPERTY_PREDICATES_LITERAL,
2129
+ typedefs=typedefs,
2130
+ )
2131
+ # 12
2132
+ if self.domain:
2133
+ yield f"domain: {reference_escape(self.domain, ontology_prefix=ontology_prefix, add_name_comment=True)}"
2134
+ # 13
2135
+ if self.range:
2136
+ yield f"range: {reference_escape(self.range, ontology_prefix=ontology_prefix, add_name_comment=True)}"
2137
+ # 14
2138
+ yield from _boolean_tag("builtin", self.builtin)
2139
+ # 15
2140
+ yield from _chain_tag("holds_over_chain", self.holds_over_chain, ontology_prefix)
2141
+ # 16
2142
+ yield from _boolean_tag("is_anti_symmetric", self.is_anti_symmetric)
2143
+ # 17
2144
+ yield from _boolean_tag("is_cyclic", self.is_cyclic)
2145
+ # 18
2146
+ yield from _boolean_tag("is_reflexive", self.is_reflexive)
2147
+ # 19
2148
+ yield from _boolean_tag("is_symmetric", self.is_symmetric)
2149
+ # 20
2150
+ yield from _boolean_tag("is_transitive", self.is_transitive)
2151
+ # 21
2152
+ yield from _boolean_tag("is_functional", self.is_functional)
2153
+ # 22
2154
+ yield from _boolean_tag("is_inverse_functional", self.is_inverse_functional)
2155
+ # 23
2156
+ yield from _reference_list_tag("is_a", self.parents, ontology_prefix)
2157
+ # 24
2158
+ yield from self._iterate_intersection_of_obo(ontology_prefix=ontology_prefix)
2159
+ # 25
2160
+ yield from _reference_list_tag("union_of", self.union_of, ontology_prefix)
2161
+ # 26
2162
+ yield from _reference_list_tag("equivalent_to", self.equivalent_to, ontology_prefix)
2163
+ # 27
2164
+ yield from _reference_list_tag("disjoint_from", self.disjoint_from, ontology_prefix)
2165
+ # 28
2166
+ if self.inverse:
2167
+ yield f"inverse_of: {reference_escape(self.inverse, ontology_prefix=ontology_prefix, add_name_comment=True)}"
2168
+ # 29
2169
+ yield from _reference_list_tag("transitive_over", self.transitive_over, ontology_prefix)
2170
+ # 30
2171
+ yield from _chain_tag("equivalent_to_chain", self.equivalent_to_chain, ontology_prefix)
2172
+ # 31 disjoint_over, see https://github.com/search?q=%22disjoint_over%3A%22+path%3A*.obo&type=code
2173
+ yield from _reference_list_tag(
2174
+ "disjoint_over", self.disjoint_over, ontology_prefix=ontology_prefix
2175
+ )
2176
+ # 32
2177
+ yield from self._iterate_obo_relations(ontology_prefix=ontology_prefix, typedefs=typedefs)
2178
+ # 33
2179
+ yield from _boolean_tag("is_obsolete", self.is_obsolete)
2180
+ # 34
2181
+ if self.created_by:
2182
+ yield f"created_by: {self.created_by}"
2183
+ # 35
2184
+ if self.creation_date is not None:
2185
+ yield f"creation_date: {self.creation_date.isoformat()}"
2186
+ # 36
2187
+ yield from _tag_property_targets(
2188
+ "replaced_by", self, v.term_replaced_by, ontology_prefix=ontology_prefix
2189
+ )
2190
+ # 37
2191
+ yield from _tag_property_targets(
2192
+ "consider", self, v.see_also, ontology_prefix=ontology_prefix
2193
+ )
2194
+ # 38 TODO expand_assertion_to
2195
+ # 39 TODO expand_expression_to
2196
+ # 40
2197
+ yield from _boolean_tag("is_metadata_tag", self.is_metadata_tag)
2198
+ # 41
2199
+ yield from _boolean_tag("is_class_level", self.is_class_level)
2200
+
2201
+ @classmethod
2202
+ def from_triple(cls, prefix: str, identifier: str, name: str | None = None) -> TypeDef:
2203
+ """Create a typedef from a reference."""
2204
+ return cls(reference=Reference(prefix=prefix, identifier=identifier, name=name))
2205
+
2206
+ @classmethod
2207
+ def default(
2208
+ cls, prefix: str, identifier: str, *, name: str | None = None, is_metadata_tag: bool
2209
+ ) -> Self:
2210
+ """Construct a default type definition from within the OBO namespace."""
2211
+ return cls(
2212
+ reference=default_reference(prefix, identifier, name=name),
2213
+ is_metadata_tag=is_metadata_tag,
2214
+ )
2215
+
2216
+
2217
+ class AdHocOntologyBase(Obo):
2218
+ """A base class for ad-hoc ontologies."""
2219
+
2220
+
1436
2221
  def make_ad_hoc_ontology(
1437
2222
  _ontology: str,
1438
- _name: str,
1439
- _auto_generated_by: Optional[str] = None,
1440
- _format_version: str = "1.2",
1441
- _typedefs: Optional[list[TypeDef]] = None,
1442
- _synonym_typedefs: Optional[list[SynonymTypeDef]] = None,
1443
- _date: Optional[datetime] = None,
1444
- _data_version: Optional[str] = None,
1445
- _idspaces: Optional[Mapping[str, str]] = None,
1446
- _root_terms: Optional[list[Reference]] = None,
2223
+ _name: str | None = None,
2224
+ _auto_generated_by: str | None = None,
2225
+ _typedefs: list[TypeDef] | None = None,
2226
+ _synonym_typedefs: list[SynonymTypeDef] | None = None,
2227
+ _date: datetime.datetime | None = None,
2228
+ _data_version: str | None = None,
2229
+ _idspaces: Mapping[str, str] | None = None,
2230
+ _root_terms: list[Reference] | None = None,
2231
+ _subsetdefs: list[tuple[Reference, str]] | None = None,
2232
+ _property_values: list[Annotation] | None = None,
2233
+ _imports: list[str] | None = None,
1447
2234
  *,
1448
- terms: list[Term],
1449
- ) -> "Obo":
2235
+ terms: list[Term] | None = None,
2236
+ ) -> Obo:
1450
2237
  """Make an ad-hoc ontology."""
1451
2238
 
1452
- class AdHocOntology(Obo):
2239
+ class AdHocOntology(AdHocOntologyBase):
1453
2240
  """An ad hoc ontology created from an OBO file."""
1454
2241
 
1455
2242
  ontology = _ontology
1456
2243
  name = _name
1457
2244
  auto_generated_by = _auto_generated_by
1458
- format_version = _format_version
1459
2245
  typedefs = _typedefs
1460
2246
  synonym_typedefs = _synonym_typedefs
1461
2247
  idspaces = _idspaces
1462
2248
  root_terms = _root_terms
2249
+ subsetdefs = _subsetdefs
2250
+ property_values = _property_values
2251
+ imports = _imports
1463
2252
 
1464
2253
  def __post_init__(self):
1465
2254
  self.date = _date
@@ -1467,30 +2256,11 @@ def make_ad_hoc_ontology(
1467
2256
 
1468
2257
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
1469
2258
  """Iterate over terms in the ad hoc ontology."""
1470
- return terms
2259
+ return terms or []
1471
2260
 
1472
2261
  return AdHocOntology()
1473
2262
 
1474
2263
 
1475
- def _convert_typedefs(typedefs: Optional[Iterable[TypeDef]]) -> list[Mapping[str, Any]]:
1476
- """Convert the type defs."""
1477
- if not typedefs:
1478
- return []
1479
- return [_convert_typedef(typedef) for typedef in typedefs]
1480
-
1481
-
1482
- def _convert_typedef(typedef: TypeDef) -> Mapping[str, Any]:
1483
- """Convert a type def."""
1484
- # TODO add more later
1485
- return typedef.reference.model_dump()
1486
-
1487
-
1488
- def _convert_synonym_typedefs(synonym_typedefs: Optional[Iterable[SynonymTypeDef]]) -> list[str]:
1489
- """Convert the synonym type defs."""
1490
- if not synonym_typedefs:
1491
- return []
1492
- return [_convert_synonym_typedef(synonym_typedef) for synonym_typedef in synonym_typedefs]
1493
-
1494
-
1495
- def _convert_synonym_typedef(synonym_typedef: SynonymTypeDef) -> str:
1496
- return f'{synonym_typedef.preferred_curie} "{synonym_typedef.name}"'
2264
+ HUMAN_TERM = Term(reference=v.HUMAN)
2265
+ CHARLIE_TERM = Term(reference=v.CHARLIE, type="Instance").append_parent(HUMAN_TERM)
2266
+ PYOBO_INJECTED = "Injected by PyOBO"