pyobo 0.12.4__py3-none-any.whl → 0.12.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +6 -0
  3. pyobo/api/__init__.py +3 -0
  4. pyobo/api/embedding.py +118 -0
  5. pyobo/api/utils.py +0 -10
  6. pyobo/cli/cli.py +1 -6
  7. pyobo/constants.py +23 -0
  8. pyobo/getters.py +52 -35
  9. pyobo/sources/__init__.py +14 -1
  10. pyobo/sources/chembl/__init__.py +6 -0
  11. pyobo/sources/chembl/chembl_cell.py +94 -0
  12. pyobo/sources/chembl/chembl_mechanism.py +81 -0
  13. pyobo/sources/chembl/chembl_tissue.py +70 -0
  14. pyobo/sources/clinicaltrials.py +32 -33
  15. pyobo/sources/complexportal.py +5 -1
  16. pyobo/sources/hgnc/hgnc.py +13 -6
  17. pyobo/sources/iana_media_type.py +100 -0
  18. pyobo/sources/mesh.py +82 -29
  19. pyobo/sources/reactome.py +10 -3
  20. pyobo/sources/spdx.py +85 -0
  21. pyobo/sources/uniprot/uniprot.py +2 -2
  22. pyobo/sources/wikipathways.py +92 -7
  23. pyobo/struct/__init__.py +2 -0
  24. pyobo/struct/functional/dsl.py +10 -1
  25. pyobo/struct/functional/ontology.py +3 -3
  26. pyobo/struct/obo/reader.py +17 -53
  27. pyobo/struct/obograph/export.py +2 -2
  28. pyobo/struct/struct.py +115 -8
  29. pyobo/struct/struct_utils.py +10 -0
  30. pyobo/struct/typedef.py +15 -3
  31. pyobo/struct/vocabulary.py +8 -0
  32. pyobo/utils/cache.py +4 -3
  33. pyobo/utils/io.py +18 -56
  34. pyobo/utils/misc.py +135 -1
  35. pyobo/utils/path.py +34 -2
  36. pyobo/version.py +1 -1
  37. {pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/METADATA +5 -5
  38. {pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/RECORD +41 -35
  39. {pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/WHEEL +0 -0
  40. {pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/entry_points.txt +0 -0
  41. {pyobo-0.12.4.dist-info → pyobo-0.12.5.dist-info}/licenses/LICENSE +0 -0
pyobo/sources/spdx.py ADDED
@@ -0,0 +1,85 @@
1
+ """Convert SPDX to an ontology."""
2
+
3
+ from collections.abc import Iterable
4
+ from typing import Any
5
+
6
+ from pydantic import ValidationError
7
+ from tqdm import tqdm
8
+
9
+ from pyobo.struct import Obo, Reference, Term, TypeDef
10
+ from pyobo.struct.typedef import see_also
11
+ from pyobo.struct.vocabulary import xsd_boolean
12
+ from pyobo.utils.path import ensure_json
13
+
14
+ __all__ = [
15
+ "SPDXLicenseGetter",
16
+ ]
17
+
18
+ DATA_URL = "https://github.com/spdx/license-list-data/raw/refs/heads/main/json/licenses.json"
19
+ LICENSE_PREFIX = "spdx"
20
+ TERM_PREFIX = "spdx.term"
21
+
22
+ ROOT = Term.from_triple(TERM_PREFIX, "ListedLicense", "listed license")
23
+ IS_OSI = TypeDef(
24
+ reference=Reference(prefix=TERM_PREFIX, identifier="isOsiApproved", name="is OSI approved"),
25
+ is_metadata_tag=True,
26
+ domain=ROOT.reference,
27
+ range=xsd_boolean,
28
+ )
29
+ IS_FSF = TypeDef(
30
+ reference=Reference(prefix=TERM_PREFIX, identifier="isFsfLibre", name="is FSF Libre"),
31
+ is_metadata_tag=True,
32
+ domain=ROOT.reference,
33
+ range=xsd_boolean,
34
+ )
35
+
36
+
37
+ def get_terms(version: str) -> Iterable[Term]:
38
+ """Iterate over terms."""
39
+ yield ROOT
40
+ data = ensure_json(
41
+ LICENSE_PREFIX,
42
+ url=DATA_URL,
43
+ version=version,
44
+ )
45
+ for record in data["licenses"]:
46
+ if term := _get_term(record):
47
+ yield term
48
+
49
+
50
+ def _get_term(record: dict[str, Any]) -> Term | None:
51
+ try:
52
+ reference = Reference(
53
+ prefix=LICENSE_PREFIX, identifier=record["licenseId"], name=record["name"]
54
+ )
55
+ except ValidationError:
56
+ tqdm.write(f"invalid: {record['licenseId']}")
57
+ return None
58
+ term = Term(
59
+ reference=reference,
60
+ is_obsolete=True if record.get("isDeprecatedLicenseId") else None,
61
+ # type="Instance",
62
+ ).append_parent(ROOT)
63
+ if record.get("isOsiApproved"):
64
+ term.annotate_boolean(IS_OSI, True)
65
+ if record.get("isFsfLibre"):
66
+ term.annotate_boolean(IS_FSF, True)
67
+ for uri in record.get("seeAlso", []):
68
+ term.annotate_uri(see_also, uri)
69
+ return term
70
+
71
+
72
+ class SPDXLicenseGetter(Obo):
73
+ """An ontology representation of the SPDX Licenses."""
74
+
75
+ bioversions_key = ontology = LICENSE_PREFIX
76
+ typedefs = [see_also, IS_FSF, IS_OSI]
77
+ root_terms = [ROOT.reference]
78
+
79
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
80
+ """Iterate over terms in the ontology."""
81
+ return get_terms(version=self._version_or_raise)
82
+
83
+
84
+ if __name__ == "__main__":
85
+ SPDXLicenseGetter.cli(["--obo", "--owl", "--rewrite"])
@@ -4,6 +4,7 @@ from collections.abc import Iterable
4
4
  from pathlib import Path
5
5
  from typing import cast
6
6
 
7
+ from pystow.utils import safe_open_reader
7
8
  from tqdm.auto import tqdm
8
9
 
9
10
  from pyobo import Obo, Reference
@@ -22,7 +23,6 @@ from pyobo.struct import (
22
23
  participates_in,
23
24
  )
24
25
  from pyobo.struct.typedef import gene_product_of, located_in, molecularly_interacts_with
25
- from pyobo.utils.io import open_reader
26
26
 
27
27
  PREFIX = "uniprot"
28
28
  BASE_URL = "https://rest.uniprot.org/uniprotkb/stream"
@@ -78,7 +78,7 @@ class UniProtGetter(Obo):
78
78
 
79
79
  def iter_terms(version: str | None = None) -> Iterable[Term]:
80
80
  """Iterate over UniProt Terms."""
81
- with open_reader(ensure(version=version)) as reader:
81
+ with safe_open_reader(ensure(version=version)) as reader:
82
82
  _ = next(reader) # header
83
83
  for (
84
84
  uniprot_id,
@@ -3,13 +3,14 @@
3
3
  import logging
4
4
  from collections.abc import Iterable
5
5
 
6
- from pystow.utils import DownloadError
6
+ import pystow
7
+ from pystow.utils import DownloadError, read_zipfile_rdf
7
8
  from tqdm import tqdm
8
9
 
9
10
  from .gmt_utils import parse_wikipathways_gmt
10
11
  from ..constants import SPECIES_REMAPPING
11
12
  from ..struct import Obo, Reference, Term, from_species
12
- from ..struct.typedef import has_participant
13
+ from ..struct.typedef import contributes_to_condition, has_depiction, has_participant, located_in
13
14
  from ..utils.path import ensure_path
14
15
 
15
16
  __all__ = [
@@ -20,6 +21,7 @@ logger = logging.getLogger(__name__)
20
21
 
21
22
  PREFIX = "wikipathways"
22
23
 
24
+ ROOT = Reference(prefix="pw", identifier="0000001", name="pathway")
23
25
  _PATHWAY_INFO = [
24
26
  ("Anopheles_gambiae", "7165"),
25
27
  ("Arabidopsis_thaliana", "3702"),
@@ -46,17 +48,27 @@ class WikiPathwaysGetter(Obo):
46
48
  """An ontology representation of WikiPathways' pathway database."""
47
49
 
48
50
  ontology = bioversions_key = PREFIX
49
- typedefs = [from_species, has_participant]
51
+ typedefs = [from_species, has_participant, contributes_to_condition, located_in, has_depiction]
52
+ root_terms = [ROOT]
50
53
 
51
54
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
52
55
  """Iterate over terms in the ontology."""
53
- return iter_terms(version=self._version_or_raise)
56
+ yield Term(reference=ROOT)
57
+ yield from iter_terms(version=self._version_or_raise)
54
58
 
55
59
 
56
- def iter_terms(version: str) -> Iterable[Term]:
60
+ PW_PREFIX = "http://purl.obolibrary.org/obo/PW_"
61
+ DOID_PREFIX = "http://purl.obolibrary.org/obo/DOID_"
62
+ CL_PREFIX = "http://purl.obolibrary.org/obo/CL_"
63
+
64
+
65
+ def iter_terms(version: str, *, include_descriptions: bool = False) -> Iterable[Term]:
57
66
  """Get WikiPathways terms."""
58
- base_url = f"http://data.wikipathways.org/{version}/gmt/wikipathways-{version}-gmt"
67
+ archive_url = f"https://data.wikipathways.org/current/rdf/wikipathways-{version}-rdf-wp.zip"
68
+ archive = pystow.ensure(PREFIX, url=archive_url, version=version)
59
69
 
70
+ base_url = f"http://data.wikipathways.org/{version}/gmt/wikipathways-{version}-gmt"
71
+ pw_references = set()
60
72
  for species_code, taxonomy_id in tqdm(_PATHWAY_INFO, desc=f"[{PREFIX}]", unit="species"):
61
73
  url = f"{base_url}-{species_code}.gmt"
62
74
  try:
@@ -68,15 +80,88 @@ def iter_terms(version: str) -> Iterable[Term]:
68
80
  taxonomy_name = SPECIES_REMAPPING.get(species_code, species_code)
69
81
 
70
82
  for identifier, _version, _revision, name, _species, genes in parse_wikipathways_gmt(path):
71
- term = Term(reference=Reference(prefix=PREFIX, identifier=identifier, name=name))
83
+ graph = read_zipfile_rdf(archive, inner_path=f"wp/{identifier}.ttl")
84
+ uri = f"https://identifiers.org/wikipathways/{identifier}"
85
+
86
+ definition: str | None = None
87
+ if include_descriptions:
88
+ # TODO deal with weird characters breaking OFN
89
+ description_results = list(
90
+ graph.query(
91
+ f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/dcterms:description ?p }} LIMIT 1"
92
+ )
93
+ )
94
+ if description_results:
95
+ definition = str(description_results[0][0]) # type:ignore[index]
96
+
97
+ term = Term(
98
+ reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
99
+ definition=definition,
100
+ )
72
101
  term.set_species(taxonomy_id, taxonomy_name)
102
+ term.annotate_uri(
103
+ has_depiction,
104
+ f"https://www.wikipathways.org/wikipathways-assets/pathways/{identifier}/{identifier}.svg",
105
+ )
73
106
  for ncbigene_id in genes:
74
107
  term.annotate_object(
75
108
  has_participant,
76
109
  Reference(prefix="ncbigene", identifier=ncbigene_id),
77
110
  )
111
+ # TODO switch query over to including chemicals from RDF SPARQL query
112
+ # TODO get description from SPARQL
113
+ parents = [ # type:ignore[misc]
114
+ p
115
+ for (p,) in graph.query(
116
+ f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/wp:pathwayOntologyTag ?p }}"
117
+ )
118
+ ]
119
+ for parent in parents:
120
+ if parent.startswith(PW_PREFIX):
121
+ ref = Reference(prefix="pw", identifier=parent.removeprefix(PW_PREFIX))
122
+ pw_references.add(ref)
123
+ term.append_parent(ref)
124
+ if not parents:
125
+ tqdm.write(f"[{term.curie}] could not find parent")
126
+ term.append_parent(ROOT)
127
+
128
+ diseases = graph.query(
129
+ f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/wp:diseaseOntologyTag ?p }}"
130
+ )
131
+ for (disease,) in diseases: # type:ignore[misc]
132
+ if disease.startswith(DOID_PREFIX):
133
+ term.annotate_object(
134
+ contributes_to_condition,
135
+ Reference(prefix="doid", identifier=disease.removeprefix(DOID_PREFIX)),
136
+ )
137
+
138
+ cells = graph.query(
139
+ f"SELECT ?p WHERE {{ <{uri}> pav:hasVersion/wp:cellTypeOntologyTag ?p }}"
140
+ )
141
+ for (cell,) in cells: # type:ignore[misc]
142
+ if cell.startswith(CL_PREFIX):
143
+ term.annotate_object(
144
+ located_in,
145
+ Reference(prefix="cl", identifier=cell.removeprefix(CL_PREFIX)),
146
+ )
147
+
78
148
  yield term
79
149
 
150
+ from ..api import get_ancestors
151
+ from ..getters import get_ontology
152
+
153
+ for pw_reference in list(pw_references):
154
+ pw_references.update(get_ancestors(pw_reference) or set())
155
+
156
+ for pw_term in get_ontology("pw"):
157
+ if pw_term.reference in pw_references:
158
+ yield Term(
159
+ reference=pw_term.reference,
160
+ definition=pw_term.definition,
161
+ # PW has issues in hierarchy - there are lots of leaves with no root
162
+ parents=pw_term.parents or [ROOT],
163
+ )
164
+
80
165
 
81
166
  if __name__ == "__main__":
82
167
  WikiPathwaysGetter.cli()
pyobo/struct/__init__.py CHANGED
@@ -16,6 +16,7 @@ from .struct import (
16
16
  SynonymTypeDef,
17
17
  Term,
18
18
  TypeDef,
19
+ build_ontology,
19
20
  make_ad_hoc_ontology,
20
21
  )
21
22
  from .struct_utils import Annotation, Stanza, StanzaType
@@ -57,6 +58,7 @@ __all__ = [
57
58
  "Term",
58
59
  "TypeDef",
59
60
  "_parse_str_or_curie_or_uri",
61
+ "build_ontology",
60
62
  "default_reference",
61
63
  "derives_from",
62
64
  "enables",
@@ -211,7 +211,16 @@ class LiteralBox(Box):
211
211
 
212
212
  def to_funowl(self) -> str:
213
213
  """Represent this literal for functional OWL."""
214
- return self.literal.n3(self._namespace_manager)
214
+ rv = self.literal.n3(self._namespace_manager)
215
+ # it appears that the OFN format doesn't use triple quotes
216
+ if rv.startswith('"""') and rv.endswith('"""^^xsd:string'):
217
+ # strip them off
218
+ rv = rv.removeprefix('"""').removesuffix('"""^^xsd:string')
219
+ # escape quotes
220
+ rv = rv.replace('"', '\\"')
221
+ # stick back quotes and xsd tag
222
+ rv = '"' + rv + '"^^xsd:string'
223
+ return rv
215
224
 
216
225
  def to_funowl_args(self) -> str: # pragma: no cover
217
226
  """Get the inside of the functional OWL tag representing the literal (unused)."""
@@ -8,6 +8,7 @@ from collections.abc import Sequence
8
8
  from pathlib import Path
9
9
 
10
10
  from curies import Converter
11
+ from pystow.utils import safe_open
11
12
  from rdflib import OWL, RDF, Graph, term
12
13
 
13
14
  from pyobo.struct.functional.dsl import Annotation, Annotations, Axiom, Box
@@ -16,7 +17,6 @@ from pyobo.struct.functional.utils import (
16
17
  FunctionalOWLSerializable,
17
18
  list_to_funowl,
18
19
  )
19
- from pyobo.utils.io import safe_open
20
20
 
21
21
  __all__ = [
22
22
  "Document",
@@ -108,9 +108,9 @@ class Document:
108
108
  return graph
109
109
 
110
110
  def write_funowl(self, path: str | Path) -> None:
111
- """Write functional OWL to a file.."""
111
+ """Write functional OWL to a file."""
112
112
  path = Path(path).expanduser().resolve()
113
- with safe_open(path, read=False) as file:
113
+ with safe_open(path, operation="write") as file:
114
114
  file.write(self.to_funowl())
115
115
 
116
116
  def to_funowl(self) -> str:
@@ -18,6 +18,7 @@ from curies import ReferenceTuple
18
18
  from curies.preprocessing import BlocklistError
19
19
  from curies.vocabulary import SynonymScope
20
20
  from more_itertools import pairwise
21
+ from pystow.utils import safe_open
21
22
  from tqdm.auto import tqdm
22
23
 
23
24
  from .reader_utils import (
@@ -52,8 +53,7 @@ from ...identifier_utils import (
52
53
  get_rules,
53
54
  )
54
55
  from ...utils.cache import write_gzipped_graph
55
- from ...utils.io import safe_open
56
- from ...utils.misc import STATIC_VERSION_REWRITES, cleanup_version
56
+ from ...utils.misc import _prioritize_version
57
57
 
58
58
  __all__ = [
59
59
  "from_obo_path",
@@ -90,7 +90,7 @@ def from_obo_path(
90
90
  )
91
91
  else:
92
92
  logger.info("[%s] parsing OBO with obonet from %s", prefix or "<unknown>", path)
93
- with safe_open(path, read=True) as file:
93
+ with safe_open(path, operation="read") as file:
94
94
  graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
95
95
 
96
96
  if prefix:
@@ -157,7 +157,7 @@ def from_obonet(
157
157
  upgrade: bool = True,
158
158
  use_tqdm: bool = False,
159
159
  ) -> Obo:
160
- """Get all of the terms from a OBO graph."""
160
+ """Get all the terms from a OBO graph."""
161
161
  ontology_prefix_raw = graph.graph["ontology"]
162
162
  ontology_prefix = _normalize_prefix_strict(ontology_prefix_raw)
163
163
  logger.info("[%s] extracting OBO using obonet", ontology_prefix)
@@ -168,8 +168,11 @@ def from_obonet(
168
168
 
169
169
  macro_config = MacroConfig(graph.graph, strict=strict, ontology_prefix=ontology_prefix)
170
170
 
171
- data_version = _clean_graph_version(
172
- graph, ontology_prefix=ontology_prefix, version=version, date=date
171
+ data_version = _prioritize_version(
172
+ data_version=graph.graph.get("data-version") or None,
173
+ ontology_prefix=ontology_prefix,
174
+ version=version,
175
+ date=date,
173
176
  )
174
177
  if data_version and "/" in data_version:
175
178
  raise ValueError(
@@ -533,17 +536,22 @@ def _process_subsets(stanza: Stanza, data, *, ontology_prefix: str, strict: bool
533
536
  stanza.append_subset(reference)
534
537
 
535
538
 
539
+ # needed to parse OPMI
540
+ _BOOLEAN_TRUE_VALUES = {"true", "1", 1}
541
+ _BOOLEAN_FALSE_VALUES = {"false", "0", 0}
542
+
543
+
536
544
  def _get_boolean(data: Mapping[str, Any], tag: str) -> bool | None:
537
545
  value = data.get(tag)
538
546
  if value is None:
539
547
  return None
540
548
  if isinstance(value, list):
541
549
  value = value[0]
542
- if value == "false":
550
+ if value in _BOOLEAN_FALSE_VALUES:
543
551
  return False
544
- if value == "true":
552
+ if value in _BOOLEAN_TRUE_VALUES:
545
553
  return True
546
- raise ValueError(value)
554
+ raise ValueError(f"unhandled value for boolean: ({type(value)}) {value}")
547
555
 
548
556
 
549
557
  def _get_reference(
@@ -703,50 +711,6 @@ def _clean_graph_ontology(graph, prefix: str) -> None:
703
711
  graph.graph["ontology"] = prefix
704
712
 
705
713
 
706
- def _clean_graph_version(
707
- graph, ontology_prefix: str, version: str | None, date: datetime | None
708
- ) -> str | None:
709
- if ontology_prefix in STATIC_VERSION_REWRITES:
710
- return STATIC_VERSION_REWRITES[ontology_prefix]
711
-
712
- data_version: str | None = graph.graph.get("data-version") or None
713
- if version:
714
- clean_injected_version = cleanup_version(version, prefix=ontology_prefix)
715
- if not data_version:
716
- logger.debug(
717
- "[%s] did not have a version, overriding with %s",
718
- ontology_prefix,
719
- clean_injected_version,
720
- )
721
- return clean_injected_version
722
-
723
- clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
724
- if clean_data_version != clean_injected_version:
725
- # in this case, we're going to trust the one that's passed
726
- # through explicitly more than the graph's content
727
- logger.debug(
728
- "[%s] had version %s, overriding with %s", ontology_prefix, data_version, version
729
- )
730
- return clean_injected_version
731
-
732
- if data_version:
733
- clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
734
- logger.debug("[%s] using version %s", ontology_prefix, clean_data_version)
735
- return clean_data_version
736
-
737
- if date is not None:
738
- derived_date_version = date.strftime("%Y-%m-%d")
739
- logger.debug(
740
- "[%s] does not report a version. falling back to date: %s",
741
- ontology_prefix,
742
- derived_date_version,
743
- )
744
- return derived_date_version
745
-
746
- logger.debug("[%s] does not report a version nor a date", ontology_prefix)
747
- return None
748
-
749
-
750
714
  def _iter_obo_graph(
751
715
  graph: nx.MultiDiGraph,
752
716
  *,
@@ -8,11 +8,11 @@ import curies
8
8
  import obographs as og
9
9
  from curies import Converter, ReferenceTuple
10
10
  from curies import vocabulary as v
11
+ from pystow.utils import safe_open
11
12
 
12
13
  from pyobo.identifier_utils.api import get_converter
13
14
  from pyobo.struct import Obo, OBOLiteral, Stanza, Term, TypeDef
14
15
  from pyobo.struct import typedef as tdv
15
- from pyobo.utils.io import safe_open
16
16
 
17
17
  __all__ = [
18
18
  "to_obograph",
@@ -25,7 +25,7 @@ def write_obograph(obo: Obo, path: str | Path, *, converter: Converter | None =
25
25
  """Write an ontology to a file as OBO Graph JSON."""
26
26
  path = Path(path).expanduser().resolve()
27
27
  raw_graph = to_obograph(obo, converter=converter)
28
- with safe_open(path, read=False) as file:
28
+ with safe_open(path, operation="write") as file:
29
29
  file.write(raw_graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
30
30
 
31
31
 
pyobo/struct/struct.py CHANGED
@@ -25,6 +25,7 @@ import ssslm
25
25
  from curies import Converter, ReferenceTuple
26
26
  from curies import vocabulary as _cv
27
27
  from more_click import force_option, verbose_option
28
+ from pystow.utils import safe_open
28
29
  from tqdm.auto import tqdm
29
30
  from typing_extensions import Self
30
31
 
@@ -70,7 +71,7 @@ from ..constants import (
70
71
  TARGET_PREFIX,
71
72
  )
72
73
  from ..utils.cache import write_gzipped_graph
73
- from ..utils.io import multidict, safe_open, write_iterable_tsv
74
+ from ..utils.io import multidict, write_iterable_tsv
74
75
  from ..utils.path import (
75
76
  CacheArtifact,
76
77
  get_cache_path,
@@ -87,6 +88,7 @@ __all__ = [
87
88
  "TypeDef",
88
89
  "abbreviation",
89
90
  "acronym",
91
+ "build_ontology",
90
92
  "make_ad_hoc_ontology",
91
93
  ]
92
94
 
@@ -746,13 +748,23 @@ class Obo:
746
748
  help="Re-process the data, but don't download it again.",
747
749
  )
748
750
  @click.option("--owl", is_flag=True, help="Write OWL via ROBOT")
751
+ @click.option("--obo", is_flag=True, help="Write OBO")
749
752
  @click.option("--ofn", is_flag=True, help="Write Functional OWL (OFN)")
750
753
  @click.option("--ttl", is_flag=True, help="Write turtle RDF via OFN")
754
+ @click.option("--cache/--no-cache", is_flag=True, help="Write the cache", default=True)
751
755
  @click.option(
752
756
  "--version", help="Specify data version to get. Use this if bioversions is acting up."
753
757
  )
754
- def _main(force: bool, owl: bool, ofn: bool, ttl: bool, version: str | None, rewrite: bool):
755
- rewrite = True
758
+ def _main(
759
+ force: bool,
760
+ obo: bool,
761
+ owl: bool,
762
+ ofn: bool,
763
+ ttl: bool,
764
+ version: str | None,
765
+ rewrite: bool,
766
+ cache: bool,
767
+ ) -> None:
756
768
  try:
757
769
  inst = cls(force=force, data_version=version)
758
770
  except Exception as e:
@@ -760,13 +772,14 @@ class Obo:
760
772
  sys.exit(1)
761
773
  inst.write_default(
762
774
  write_obograph=False,
763
- write_obo=False,
775
+ write_obo=obo,
764
776
  write_owl=owl,
765
777
  write_ofn=ofn,
766
778
  write_ttl=ttl,
767
779
  write_nodes=True,
768
780
  force=force or rewrite,
769
781
  use_tqdm=True,
782
+ write_cache=cache,
770
783
  )
771
784
 
772
785
  return _main
@@ -909,6 +922,8 @@ class Obo:
909
922
  end = f'"{obo_escape_slim(value.value)}" {reference_escape(value.datatype, ontology_prefix=self.ontology)}'
910
923
  case Reference():
911
924
  end = reference_escape(value, ontology_prefix=self.ontology)
925
+ case _:
926
+ raise TypeError(f"Invalid property value: {value}")
912
927
  yield f"property_value: {reference_escape(predicate, ontology_prefix=self.ontology)} {end}"
913
928
 
914
929
  def _iterate_property_pairs(self) -> Iterable[Annotation]:
@@ -925,10 +940,21 @@ class Obo:
925
940
  license_literal = OBOLiteral.string(license_spdx_id)
926
941
  yield Annotation(v.has_license, license_literal)
927
942
 
928
- # Description
929
943
  if description := bioregistry.get_description(self.ontology):
930
- description = obo_escape_slim(description.strip())
931
944
  yield Annotation(v.has_description, OBOLiteral.string(description.strip()))
945
+ if homepage := bioregistry.get_homepage(self.ontology):
946
+ yield Annotation(v.has_homepage, OBOLiteral.uri(homepage))
947
+ if repository := bioregistry.get_repository(self.ontology):
948
+ yield Annotation(v.has_repository, OBOLiteral.uri(repository))
949
+ if logo := bioregistry.get_logo(self.ontology):
950
+ yield Annotation(v.has_logo, OBOLiteral.uri(logo))
951
+ if mailing_list := bioregistry.get_mailing_list(self.ontology):
952
+ yield Annotation(v.has_mailing_list, OBOLiteral.string(mailing_list))
953
+ if (maintainer := bioregistry.get_contact(self.ontology)) and maintainer.orcid:
954
+ yield Annotation(
955
+ v.has_maintainer,
956
+ Reference(prefix="orcid", identifier=maintainer.orcid, name=maintainer.name),
957
+ )
932
958
 
933
959
  # Root terms
934
960
  for root_term in self.root_terms or []:
@@ -973,7 +999,7 @@ class Obo:
973
999
  unit="line",
974
1000
  )
975
1001
  if isinstance(file, str | Path | os.PathLike):
976
- with safe_open(file, read=False) as fh:
1002
+ with safe_open(file, operation="write") as fh:
977
1003
  self._write_lines(it, fh)
978
1004
  else:
979
1005
  self._write_lines(it, file)
@@ -1149,7 +1175,7 @@ class Obo:
1149
1175
  metadata = self.get_metadata()
1150
1176
  for path in (self._root_metadata_path, self._get_cache_path(CacheArtifact.metadata)):
1151
1177
  logger.debug("[%s] caching metadata to %s", self._prefix_version, path)
1152
- with safe_open(path, read=False) as file:
1178
+ with safe_open(path, operation="write") as file:
1153
1179
  json.dump(metadata, file, indent=2)
1154
1180
 
1155
1181
  def write_prefix_map(self) -> None:
@@ -2265,6 +2291,87 @@ class AdHocOntologyBase(Obo):
2265
2291
  """A base class for ad-hoc ontologies."""
2266
2292
 
2267
2293
 
2294
+ def build_ontology(
2295
+ prefix: str,
2296
+ *,
2297
+ terms: list[Term] | None = None,
2298
+ synonym_typedefs: list[SynonymTypeDef] | None = None,
2299
+ typedefs: list[TypeDef] | None = None,
2300
+ name: str | None = None, # inferred
2301
+ version: str | None = None,
2302
+ idspaces: dict[str, str] | None = None,
2303
+ root_terms: list[Reference] | None = None,
2304
+ subsetdefs: list[tuple[Reference, str]] | None = None,
2305
+ properties: list[Annotation] | None = None,
2306
+ imports: list[str] | None = None,
2307
+ description: str | None = None,
2308
+ homepage: str | None = None,
2309
+ mailing_list: str | None = None,
2310
+ logo: str | None = None,
2311
+ repository: str | None = None,
2312
+ ) -> Obo:
2313
+ """Build an ontology from parts."""
2314
+ if name is None:
2315
+ name = bioregistry.get_name(prefix)
2316
+ # TODO auto-populate license and other properties
2317
+
2318
+ if properties is None:
2319
+ properties = []
2320
+ if typedefs is None:
2321
+ typedefs = []
2322
+
2323
+ if description:
2324
+ from .typedef import has_description
2325
+
2326
+ properties.append(Annotation.string(has_description.reference, description))
2327
+ if has_description not in typedefs:
2328
+ typedefs.append(has_description) # TODO get proper typedef
2329
+
2330
+ if homepage:
2331
+ from .typedef import has_homepage
2332
+
2333
+ properties.append(Annotation.uri(has_homepage.reference, homepage))
2334
+ if has_homepage not in typedefs:
2335
+ typedefs.append(has_homepage)
2336
+
2337
+ if logo:
2338
+ from .typedef import has_depiction
2339
+
2340
+ properties.append(Annotation.uri(has_depiction.reference, logo))
2341
+ if has_depiction not in typedefs:
2342
+ typedefs.append(has_depiction)
2343
+
2344
+ if mailing_list:
2345
+ from .typedef import has_mailing_list
2346
+
2347
+ properties.append(Annotation.string(has_mailing_list.reference, mailing_list))
2348
+ if has_mailing_list not in typedefs:
2349
+ typedefs.append(has_mailing_list)
2350
+
2351
+ if repository:
2352
+ from .typedef import has_repository
2353
+
2354
+ properties.append(Annotation.uri(has_repository.reference, repository))
2355
+ if has_repository not in typedefs:
2356
+ typedefs.append(has_repository)
2357
+
2358
+ return make_ad_hoc_ontology(
2359
+ _ontology=prefix,
2360
+ _name=name,
2361
+ # _auto_generated_by
2362
+ _typedefs=typedefs,
2363
+ _synonym_typedefs=synonym_typedefs,
2364
+ # _date: datetime.datetime | None = None,
2365
+ _data_version=version,
2366
+ _idspaces=idspaces,
2367
+ _root_terms=root_terms,
2368
+ _subsetdefs=subsetdefs,
2369
+ _property_values=properties,
2370
+ _imports=imports,
2371
+ terms=terms,
2372
+ )
2373
+
2374
+
2268
2375
  def make_ad_hoc_ontology(
2269
2376
  _ontology: str,
2270
2377
  _name: str | None = None,
@@ -63,6 +63,16 @@ class Annotation(NamedTuple):
63
63
  """Return a literal property for a float."""
64
64
  return cls(predicate, OBOLiteral.float(value))
65
65
 
66
+ @classmethod
67
+ def uri(cls, predicate: Reference, uri: str) -> Self:
68
+ """Return a literal property for a URI."""
69
+ return cls(predicate, OBOLiteral.uri(uri))
70
+
71
+ @classmethod
72
+ def string(cls, predicate: Reference, value: str, *, language: str | None = None) -> Self:
73
+ """Return a literal property for a float."""
74
+ return cls(predicate, OBOLiteral.string(value, language=language))
75
+
66
76
  @staticmethod
67
77
  def _sort_key(x: Annotation):
68
78
  return x.predicate, _reference_or_literal_key(x.value)