pyobo 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +6 -0
  3. pyobo/api/__init__.py +3 -0
  4. pyobo/api/embedding.py +118 -0
  5. pyobo/api/utils.py +0 -10
  6. pyobo/cli/cli.py +1 -6
  7. pyobo/cli/database.py +7 -1
  8. pyobo/constants.py +23 -0
  9. pyobo/getters.py +52 -35
  10. pyobo/identifier_utils/api.py +3 -1
  11. pyobo/sources/__init__.py +14 -1
  12. pyobo/sources/chembl/__init__.py +6 -0
  13. pyobo/sources/chembl/chembl_cell.py +94 -0
  14. pyobo/sources/chembl/chembl_mechanism.py +81 -0
  15. pyobo/sources/chembl/chembl_tissue.py +70 -0
  16. pyobo/sources/clinicaltrials.py +32 -33
  17. pyobo/sources/complexportal.py +5 -1
  18. pyobo/sources/drugcentral.py +2 -1
  19. pyobo/sources/hgnc/hgnc.py +13 -6
  20. pyobo/sources/iana_media_type.py +100 -0
  21. pyobo/sources/mesh.py +82 -29
  22. pyobo/sources/reactome.py +10 -3
  23. pyobo/sources/spdx.py +89 -0
  24. pyobo/sources/uniprot/uniprot.py +2 -2
  25. pyobo/sources/wikipathways.py +92 -7
  26. pyobo/struct/__init__.py +2 -0
  27. pyobo/struct/functional/dsl.py +10 -1
  28. pyobo/struct/functional/ontology.py +3 -3
  29. pyobo/struct/obo/reader.py +17 -53
  30. pyobo/struct/obograph/export.py +2 -2
  31. pyobo/struct/struct.py +125 -8
  32. pyobo/struct/struct_utils.py +10 -0
  33. pyobo/struct/typedef.py +15 -3
  34. pyobo/struct/vocabulary.py +8 -0
  35. pyobo/utils/cache.py +4 -3
  36. pyobo/utils/io.py +18 -56
  37. pyobo/utils/misc.py +142 -1
  38. pyobo/utils/path.py +34 -2
  39. pyobo/version.py +1 -1
  40. {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/METADATA +11 -7
  41. {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/RECORD +44 -38
  42. {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/WHEEL +0 -0
  43. {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/entry_points.txt +0 -0
  44. {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/licenses/LICENSE +0 -0
pyobo/struct/struct.py CHANGED
@@ -8,6 +8,7 @@ import json
8
8
  import logging
9
9
  import os
10
10
  import sys
11
+ import tempfile
11
12
  import warnings
12
13
  from collections import ChainMap, defaultdict
13
14
  from collections.abc import Callable, Collection, Iterable, Iterator, Mapping, Sequence
@@ -25,6 +26,7 @@ import ssslm
25
26
  from curies import Converter, ReferenceTuple
26
27
  from curies import vocabulary as _cv
27
28
  from more_click import force_option, verbose_option
29
+ from pystow.utils import safe_open
28
30
  from tqdm.auto import tqdm
29
31
  from typing_extensions import Self
30
32
 
@@ -70,7 +72,7 @@ from ..constants import (
70
72
  TARGET_PREFIX,
71
73
  )
72
74
  from ..utils.cache import write_gzipped_graph
73
- from ..utils.io import multidict, safe_open, write_iterable_tsv
75
+ from ..utils.io import multidict, write_iterable_tsv
74
76
  from ..utils.path import (
75
77
  CacheArtifact,
76
78
  get_cache_path,
@@ -87,6 +89,7 @@ __all__ = [
87
89
  "TypeDef",
88
90
  "abbreviation",
89
91
  "acronym",
92
+ "build_ontology",
90
93
  "make_ad_hoc_ontology",
91
94
  ]
92
95
 
@@ -746,13 +749,23 @@ class Obo:
746
749
  help="Re-process the data, but don't download it again.",
747
750
  )
748
751
  @click.option("--owl", is_flag=True, help="Write OWL via ROBOT")
752
+ @click.option("--obo", is_flag=True, help="Write OBO")
749
753
  @click.option("--ofn", is_flag=True, help="Write Functional OWL (OFN)")
750
754
  @click.option("--ttl", is_flag=True, help="Write turtle RDF via OFN")
755
+ @click.option("--cache/--no-cache", is_flag=True, help="Write the cache", default=True)
751
756
  @click.option(
752
757
  "--version", help="Specify data version to get. Use this if bioversions is acting up."
753
758
  )
754
- def _main(force: bool, owl: bool, ofn: bool, ttl: bool, version: str | None, rewrite: bool):
755
- rewrite = True
759
+ def _main(
760
+ force: bool,
761
+ obo: bool,
762
+ owl: bool,
763
+ ofn: bool,
764
+ ttl: bool,
765
+ version: str | None,
766
+ rewrite: bool,
767
+ cache: bool,
768
+ ) -> None:
756
769
  try:
757
770
  inst = cls(force=force, data_version=version)
758
771
  except Exception as e:
@@ -760,13 +773,14 @@ class Obo:
760
773
  sys.exit(1)
761
774
  inst.write_default(
762
775
  write_obograph=False,
763
- write_obo=False,
776
+ write_obo=obo,
764
777
  write_owl=owl,
765
778
  write_ofn=ofn,
766
779
  write_ttl=ttl,
767
780
  write_nodes=True,
768
781
  force=force or rewrite,
769
782
  use_tqdm=True,
783
+ write_cache=cache,
770
784
  )
771
785
 
772
786
  return _main
@@ -909,6 +923,8 @@ class Obo:
909
923
  end = f'"{obo_escape_slim(value.value)}" {reference_escape(value.datatype, ontology_prefix=self.ontology)}'
910
924
  case Reference():
911
925
  end = reference_escape(value, ontology_prefix=self.ontology)
926
+ case _:
927
+ raise TypeError(f"Invalid property value: {value}")
912
928
  yield f"property_value: {reference_escape(predicate, ontology_prefix=self.ontology)} {end}"
913
929
 
914
930
  def _iterate_property_pairs(self) -> Iterable[Annotation]:
@@ -925,10 +941,21 @@ class Obo:
925
941
  license_literal = OBOLiteral.string(license_spdx_id)
926
942
  yield Annotation(v.has_license, license_literal)
927
943
 
928
- # Description
929
944
  if description := bioregistry.get_description(self.ontology):
930
- description = obo_escape_slim(description.strip())
931
945
  yield Annotation(v.has_description, OBOLiteral.string(description.strip()))
946
+ if homepage := bioregistry.get_homepage(self.ontology):
947
+ yield Annotation(v.has_homepage, OBOLiteral.uri(homepage))
948
+ if repository := bioregistry.get_repository(self.ontology):
949
+ yield Annotation(v.has_repository, OBOLiteral.uri(repository))
950
+ if logo := bioregistry.get_logo(self.ontology):
951
+ yield Annotation(v.has_logo, OBOLiteral.uri(logo))
952
+ if mailing_list := bioregistry.get_mailing_list(self.ontology):
953
+ yield Annotation(v.has_mailing_list, OBOLiteral.string(mailing_list))
954
+ if (maintainer := bioregistry.get_contact(self.ontology)) and maintainer.orcid:
955
+ yield Annotation(
956
+ v.has_maintainer,
957
+ Reference(prefix="orcid", identifier=maintainer.orcid, name=maintainer.name),
958
+ )
932
959
 
933
960
  # Root terms
934
961
  for root_term in self.root_terms or []:
@@ -973,7 +1000,7 @@ class Obo:
973
1000
  unit="line",
974
1001
  )
975
1002
  if isinstance(file, str | Path | os.PathLike):
976
- with safe_open(file, read=False) as fh:
1003
+ with safe_open(file, operation="write") as fh:
977
1004
  self._write_lines(it, fh)
978
1005
  else:
979
1006
  self._write_lines(it, file)
@@ -995,6 +1022,15 @@ class Obo:
995
1022
  ofn = get_ofn_from_obo(self)
996
1023
  ofn.write_funowl(path)
997
1024
 
1025
+ def write_owl(self, path: str | Path) -> None:
1026
+ """Write OWL, by first outputting OFN then converting with ROBOT."""
1027
+ from bioontologies import robot
1028
+
1029
+ with tempfile.TemporaryDirectory() as directory:
1030
+ ofn_path = Path(directory).joinpath("tmp.ofn")
1031
+ self.write_ofn(ofn_path)
1032
+ robot.convert(ofn_path, path)
1033
+
998
1034
  def write_rdf(self, path: str | Path) -> None:
999
1035
  """Write as Turtle RDF."""
1000
1036
  from .functional.obo_to_functional import get_ofn_from_obo
@@ -1149,7 +1185,7 @@ class Obo:
1149
1185
  metadata = self.get_metadata()
1150
1186
  for path in (self._root_metadata_path, self._get_cache_path(CacheArtifact.metadata)):
1151
1187
  logger.debug("[%s] caching metadata to %s", self._prefix_version, path)
1152
- with safe_open(path, read=False) as file:
1188
+ with safe_open(path, operation="write") as file:
1153
1189
  json.dump(metadata, file, indent=2)
1154
1190
 
1155
1191
  def write_prefix_map(self) -> None:
@@ -2265,6 +2301,87 @@ class AdHocOntologyBase(Obo):
2265
2301
  """A base class for ad-hoc ontologies."""
2266
2302
 
2267
2303
 
2304
+ def build_ontology(
2305
+ prefix: str,
2306
+ *,
2307
+ terms: list[Term] | None = None,
2308
+ synonym_typedefs: list[SynonymTypeDef] | None = None,
2309
+ typedefs: list[TypeDef] | None = None,
2310
+ name: str | None = None, # inferred
2311
+ version: str | None = None,
2312
+ idspaces: dict[str, str] | None = None,
2313
+ root_terms: list[Reference] | None = None,
2314
+ subsetdefs: list[tuple[Reference, str]] | None = None,
2315
+ properties: list[Annotation] | None = None,
2316
+ imports: list[str] | None = None,
2317
+ description: str | None = None,
2318
+ homepage: str | None = None,
2319
+ mailing_list: str | None = None,
2320
+ logo: str | None = None,
2321
+ repository: str | None = None,
2322
+ ) -> Obo:
2323
+ """Build an ontology from parts."""
2324
+ if name is None:
2325
+ name = bioregistry.get_name(prefix)
2326
+ # TODO auto-populate license and other properties
2327
+
2328
+ if properties is None:
2329
+ properties = []
2330
+ if typedefs is None:
2331
+ typedefs = []
2332
+
2333
+ if description:
2334
+ from .typedef import has_description
2335
+
2336
+ properties.append(Annotation.string(has_description.reference, description))
2337
+ if has_description not in typedefs:
2338
+ typedefs.append(has_description) # TODO get proper typedef
2339
+
2340
+ if homepage:
2341
+ from .typedef import has_homepage
2342
+
2343
+ properties.append(Annotation.uri(has_homepage.reference, homepage))
2344
+ if has_homepage not in typedefs:
2345
+ typedefs.append(has_homepage)
2346
+
2347
+ if logo:
2348
+ from .typedef import has_depiction
2349
+
2350
+ properties.append(Annotation.uri(has_depiction.reference, logo))
2351
+ if has_depiction not in typedefs:
2352
+ typedefs.append(has_depiction)
2353
+
2354
+ if mailing_list:
2355
+ from .typedef import has_mailing_list
2356
+
2357
+ properties.append(Annotation.string(has_mailing_list.reference, mailing_list))
2358
+ if has_mailing_list not in typedefs:
2359
+ typedefs.append(has_mailing_list)
2360
+
2361
+ if repository:
2362
+ from .typedef import has_repository
2363
+
2364
+ properties.append(Annotation.uri(has_repository.reference, repository))
2365
+ if has_repository not in typedefs:
2366
+ typedefs.append(has_repository)
2367
+
2368
+ return make_ad_hoc_ontology(
2369
+ _ontology=prefix,
2370
+ _name=name,
2371
+ # _auto_generated_by
2372
+ _typedefs=typedefs,
2373
+ _synonym_typedefs=synonym_typedefs,
2374
+ # _date: datetime.datetime | None = None,
2375
+ _data_version=version,
2376
+ _idspaces=idspaces,
2377
+ _root_terms=root_terms,
2378
+ _subsetdefs=subsetdefs,
2379
+ _property_values=properties,
2380
+ _imports=imports,
2381
+ terms=terms,
2382
+ )
2383
+
2384
+
2268
2385
  def make_ad_hoc_ontology(
2269
2386
  _ontology: str,
2270
2387
  _name: str | None = None,
@@ -63,6 +63,16 @@ class Annotation(NamedTuple):
63
63
  """Return a literal property for a float."""
64
64
  return cls(predicate, OBOLiteral.float(value))
65
65
 
66
+ @classmethod
67
+ def uri(cls, predicate: Reference, uri: str) -> Self:
68
+ """Return a literal property for a URI."""
69
+ return cls(predicate, OBOLiteral.uri(uri))
70
+
71
+ @classmethod
72
+ def string(cls, predicate: Reference, value: str, *, language: str | None = None) -> Self:
73
+ """Return a literal property for a float."""
74
+ return cls(predicate, OBOLiteral.string(value, language=language))
75
+
66
76
  @staticmethod
67
77
  def _sort_key(x: Annotation):
68
78
  return x.predicate, _reference_or_literal_key(x.value)
pyobo/struct/typedef.py CHANGED
@@ -15,7 +15,9 @@ __all__ = [
15
15
  "alternative_term",
16
16
  "broad_match",
17
17
  "close_match",
18
+ "contributes_to_condition",
18
19
  "default_typedefs",
20
+ "derives_from_organism",
19
21
  "editor_note",
20
22
  "enables",
21
23
  "exact_match",
@@ -24,10 +26,12 @@ __all__ = [
24
26
  "gene_product_member_of",
25
27
  "has_contributor",
26
28
  "has_dbxref",
29
+ "has_depiction",
27
30
  "has_end_date",
28
31
  "has_gene_product",
29
32
  "has_homepage",
30
33
  "has_inchi",
34
+ "has_mailbox",
31
35
  "has_mature",
32
36
  "has_member",
33
37
  "has_part",
@@ -103,12 +107,18 @@ has_component = TypeDef(
103
107
  derives_from = TypeDef(
104
108
  reference=Reference(prefix=RO_PREFIX, identifier="0001000", name="derives from"),
105
109
  )
110
+ derives_from_organism = TypeDef(
111
+ reference=Reference(prefix="CLO", identifier="0037207", name="derives from organism")
112
+ )
106
113
  molecularly_interacts_with = TypeDef(
107
114
  reference=Reference(prefix=RO_PREFIX, identifier="0002436", name="molecularly interacts with"),
108
115
  )
109
116
  located_in = TypeDef(
110
117
  reference=Reference(prefix=RO_PREFIX, identifier="0001025", name="located in"),
111
118
  )
119
+ contributes_to_condition = TypeDef(
120
+ reference=Reference(prefix=RO_PREFIX, identifier="0003304", name="contributes to condition"),
121
+ )
112
122
  exact_match = TypeDef(reference=v.exact_match, is_metadata_tag=True)
113
123
  narrow_match = TypeDef(reference=v.narrow_match, is_metadata_tag=True)
114
124
  broad_match = TypeDef(reference=v.broad_match, is_metadata_tag=True)
@@ -257,9 +267,11 @@ has_smiles = TypeDef(reference=v.has_smiles, is_metadata_tag=True).append_xref(v
257
267
 
258
268
  has_inchi = TypeDef(reference=v.has_inchi, is_metadata_tag=True).append_xref(v.debio_has_inchi)
259
269
 
260
- has_homepage = TypeDef(
261
- reference=Reference(prefix="foaf", identifier="homepage", name="homepage"), is_metadata_tag=True
262
- )
270
+ has_homepage = TypeDef(reference=v.has_homepage, is_metadata_tag=True)
271
+ has_depiction = TypeDef(reference=v.has_depiction, is_metadata_tag=True)
272
+ has_mailbox = TypeDef(reference=v.has_mailbox, is_metadata_tag=True)
273
+ has_mailing_list = TypeDef(reference=v.has_mailing_list, is_metadata_tag=True)
274
+ has_repository = TypeDef(reference=v.has_repository, is_metadata_tag=True)
263
275
 
264
276
  has_category = TypeDef(
265
277
  reference=Reference(prefix="biolink", identifier="category", name="has category"),
@@ -90,6 +90,14 @@ has_description = _c(_v.has_description)
90
90
  has_license = _c(_v.has_license)
91
91
  has_title = _c(_v.has_title)
92
92
 
93
+ has_homepage = Reference(prefix="foaf", identifier="homepage", name="has homepage")
94
+ has_logo = Reference(prefix="foaf", identifier="logo", name="has logo")
95
+ has_mailbox = Reference(prefix="foaf", identifier="mbox", name="has mailbox")
96
+ has_depiction = Reference(prefix="foaf", identifier="depicted_by", name="depicted by")
97
+ has_repository = Reference(prefix="doap", identifier="repository", name="has repository")
98
+ has_mailing_list = Reference(prefix="doap", identifier="mailing-list", name="has mailing list")
99
+ has_maintainer = Reference(prefix="doap", identifier="maintainer", name="has maintainer")
100
+
93
101
  has_part = Reference(prefix=BFO_PREFIX, identifier="0000051", name="has part")
94
102
  part_of = Reference(prefix=BFO_PREFIX, identifier="0000050", name="part of")
95
103
  orthologous = Reference(
pyobo/utils/cache.py CHANGED
@@ -12,8 +12,9 @@ from pystow.cache import CachedCollection as cached_collection # noqa:N813
12
12
  from pystow.cache import CachedDataFrame as cached_df # noqa:N813
13
13
  from pystow.cache import CachedJSON as cached_json # noqa:N813
14
14
  from pystow.cache import CachedPickle as cached_pickle # noqa:N813
15
+ from pystow.utils import safe_open
15
16
 
16
- from .io import open_map_tsv, open_multimap_tsv, safe_open, write_map_tsv, write_multimap_tsv
17
+ from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
17
18
 
18
19
  __all__ = [
19
20
  "cached_collection",
@@ -69,13 +70,13 @@ NODE_LINK_STYLE = "links" # TODO update to "edges"
69
70
 
70
71
  def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
71
72
  """Read a graph that's gzipped nodelink."""
72
- with safe_open(path, read=True) as file:
73
+ with safe_open(path, operation="read") as file:
73
74
  return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
74
75
 
75
76
 
76
77
  def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
77
78
  """Write a graph as gzipped nodelink."""
78
- with safe_open(path, read=False) as file:
79
+ with safe_open(path, operation="write") as file:
79
80
  json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)
80
81
 
81
82
 
pyobo/utils/io.py CHANGED
@@ -1,27 +1,24 @@
1
1
  """I/O utilities."""
2
2
 
3
3
  import collections.abc
4
- import contextlib
5
- import csv
6
4
  import gzip
7
5
  import logging
8
6
  from collections import defaultdict
9
7
  from collections.abc import Generator, Iterable, Mapping
10
8
  from contextlib import contextmanager
11
9
  from pathlib import Path
12
- from typing import Literal, TextIO, TypeVar
10
+ from typing import TypeVar, cast
13
11
 
14
12
  import pandas as pd
13
+ import pystow.utils
14
+ from pystow.utils import safe_open_reader, safe_open_writer
15
15
  from tqdm.auto import tqdm
16
16
 
17
17
  __all__ = [
18
- "get_reader",
19
18
  "multidict",
20
19
  "multisetdict",
21
20
  "open_map_tsv",
22
21
  "open_multimap_tsv",
23
- "open_reader",
24
- "safe_open",
25
22
  "safe_open_writer",
26
23
  "write_iterable_tsv",
27
24
  "write_map_tsv",
@@ -34,35 +31,22 @@ X = TypeVar("X")
34
31
  Y = TypeVar("Y")
35
32
 
36
33
 
37
- @contextmanager
38
- def open_reader(path: str | Path, sep: str = "\t"):
39
- """Open a file and get a reader for it."""
40
- path = Path(path)
41
- with safe_open(path, read=True) as file:
42
- yield get_reader(file, sep=sep)
43
-
44
-
45
- def get_reader(x, sep: str = "\t"):
46
- """Get a :func:`csv.reader` with PyOBO default settings."""
47
- return csv.reader(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
48
-
49
-
50
34
  def open_map_tsv(
51
35
  path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
52
36
  ) -> Mapping[str, str]:
53
37
  """Load a mapping TSV file into a dictionary."""
54
- with safe_open(path, read=True) as file:
38
+ rv = {}
39
+ with pystow.utils.safe_open_reader(path) as reader:
55
40
  if has_header:
56
- next(file) # throw away header
41
+ next(reader) # throw away header
57
42
  if use_tqdm:
58
- file = tqdm(file, desc=f"loading TSV from {path}")
59
- rv = {}
60
- for row in get_reader(file):
43
+ reader = tqdm(reader, desc=f"loading TSV from {path}")
44
+ for row in reader:
61
45
  if len(row) != 2:
62
46
  logger.warning("[%s] malformed row can not be put in dict: %s", path, row)
63
47
  continue
64
48
  rv[row[0]] = row[1]
65
- return rv
49
+ return rv
66
50
 
67
51
 
68
52
  def open_multimap_tsv(
@@ -72,24 +56,27 @@ def open_multimap_tsv(
72
56
  has_header: bool = True,
73
57
  ) -> Mapping[str, list[str]]:
74
58
  """Load a mapping TSV file that has multiple mappings for each."""
75
- return multidict(_help_multimap_tsv(path=path, use_tqdm=use_tqdm, has_header=has_header))
59
+ with _help_multimap_tsv(path=path, use_tqdm=use_tqdm, has_header=has_header) as file:
60
+ return multidict(file)
76
61
 
77
62
 
63
+ @contextmanager
78
64
  def _help_multimap_tsv(
79
65
  path: str | Path,
80
66
  *,
81
67
  use_tqdm: bool = False,
82
68
  has_header: bool = True,
83
- ) -> Iterable[tuple[str, str]]:
84
- with safe_open(path, read=True) as file:
69
+ ) -> Generator[Iterable[tuple[str, str]], None, None]:
70
+ with safe_open_reader(path) as reader:
85
71
  if has_header:
86
72
  try:
87
- next(file) # throw away header
73
+ next(reader) # throw away header
88
74
  except gzip.BadGzipFile as e:
89
75
  raise ValueError(f"could not open file {path}") from e
90
76
  if use_tqdm:
91
- file = tqdm(file, desc=f"loading TSV from {path}")
92
- yield from get_reader(file)
77
+ yield tqdm(reader, desc=f"loading TSV from {path}")
78
+ else:
79
+ yield cast(Iterable[tuple[str, str]], reader)
93
80
 
94
81
 
95
82
  def multidict(pairs: Iterable[tuple[X, Y]]) -> Mapping[X, list[Y]]:
@@ -149,28 +136,3 @@ def write_iterable_tsv(
149
136
  if header is not None:
150
137
  writer.writerow(header)
151
138
  writer.writerows(it)
152
-
153
-
154
- @contextlib.contextmanager
155
- def safe_open(
156
- path: str | Path, read: bool, encoding: str | None = None
157
- ) -> Generator[TextIO, None, None]:
158
- """Safely open a file for reading or writing text."""
159
- path = Path(path).expanduser().resolve()
160
- mode: Literal["rt", "wt"] = "rt" if read else "wt"
161
- if path.suffix.endswith(".gz"):
162
- with gzip.open(path, mode=mode, encoding=encoding) as file:
163
- yield file
164
- else:
165
- with open(path, mode=mode) as file:
166
- yield file
167
-
168
-
169
- @contextlib.contextmanager
170
- def safe_open_writer(f: str | Path | TextIO, *, delimiter: str = "\t"): # type:ignore
171
- """Open a CSV writer, wrapping :func:`csv.writer`."""
172
- if isinstance(f, str | Path):
173
- with safe_open(f, read=False) as file:
174
- yield csv.writer(file, delimiter=delimiter)
175
- else:
176
- yield csv.writer(f, delimiter=delimiter)
pyobo/utils/misc.py CHANGED
@@ -1,9 +1,17 @@
1
1
  """Miscellaneous utilities."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import logging
6
+ from collections.abc import Callable, Iterable
4
7
  from datetime import datetime
5
8
 
9
+ import bioversions.utils
10
+
11
+ from pyobo.constants import ONTOLOGY_GETTERS, OntologyFormat
12
+
6
13
  __all__ = [
14
+ "VERSION_GETTERS",
7
15
  "cleanup_version",
8
16
  ]
9
17
 
@@ -15,8 +23,11 @@ BIZARRE_LOGGED = set()
15
23
  VERSION_REWRITES = {
16
24
  "$Date: 2009/11/15 10:54:12 $": "2009-11-15", # for owl
17
25
  "http://www.w3.org/2006/time#2016": "2016", # for time
26
+ "https://purl.org/ontology/modalia#1.0.0": "1.0.0", # for dalia
27
+ }
28
+ STATIC_VERSION_REWRITES = {
29
+ "orth": "2",
18
30
  }
19
- STATIC_VERSION_REWRITES = {"orth": "2"}
20
31
  VERSION_PREFIXES = [
21
32
  "http://www.orpha.net/version",
22
33
  "https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
@@ -27,17 +38,36 @@ VERSION_PREFIXES = [
27
38
  "http://purl.dataone.org/odo/SASAP/", # like in http://purl.dataone.org/odo/SASAP/0.3.1
28
39
  "http://purl.dataone.org/odo/SENSO/", # like in http://purl.dataone.org/odo/SENSO/0.1.0
29
40
  "https://purl.dataone.org/odo/ADCAD/",
41
+ "http://identifiers.org/combine.specifications/teddy.rel-",
42
+ "https://nfdi.fiz-karlsruhe.de/ontology/",
43
+ "http://www.w3.org/ns/prov-",
44
+ "https://raw.githubusercontent.com/enpadasi/Ontology-for-Nutritional-Studies/releases/download/v",
45
+ "http://purl.jp/bio/4/ontology/iobc/", # like http://purl.jp/bio/4/ontology/iobc/1.6.0
46
+ "http://w3id.org/nfdi4ing/metadata4ing/", # like http://w3id.org/nfdi4ing/metadata4ing/1.3.1
47
+ "http://www.semanticweb.com/OntoRxn/", # like http://www.semanticweb.com/OntoRxn/0.2.5
48
+ "https://w3id.org/lehrplan/ontology/", # like in https://w3id.org/lehrplan/ontology/1.0.0-4
49
+ "http://www.ebi.ac.uk/swo/version/", # http://www.ebi.ac.uk/swo/version/6.0
50
+ "https://w3id.org/emi/version/",
30
51
  ]
31
52
  VERSION_PREFIX_SPLITS = [
32
53
  "http://www.ebi.ac.uk/efo/releases/v",
33
54
  "http://www.ebi.ac.uk/swo/swo.owl/",
34
55
  "http://semanticscience.org/ontology/sio/v",
35
56
  "http://ontology.neuinfo.org/NIF/ttl/nif/version/",
57
+ "http://nmrml.org/cv/v", # as in http://nmrml.org/cv/v1.1.0/nmrCV
58
+ "http://enanomapper.github.io/ontologies/releases/", # as in http://enanomapper.github.io/ontologies/releases/10.0/enanomapper
36
59
  ]
60
+ BAD = {
61
+ "http://purl.obolibrary.org/obo",
62
+ "http://www.bioassayontology.org/bao/bao_complete",
63
+ }
37
64
 
38
65
 
39
66
  def cleanup_version(data_version: str, prefix: str) -> str:
40
67
  """Clean the version information."""
68
+ # in case a literal string that wasn't parsed properly gets put in
69
+ data_version = data_version.strip('"')
70
+
41
71
  if data_version in VERSION_REWRITES:
42
72
  return VERSION_REWRITES[data_version]
43
73
 
@@ -74,3 +104,114 @@ def cleanup_version(data_version: str, prefix: str) -> str:
74
104
  logger.debug("[%s] bizarre version: %s", prefix, data_version)
75
105
  BIZARRE_LOGGED.add((prefix, data_version))
76
106
  return data_version
107
+
108
+
109
+ def _get_obo_version(prefix: str, url: str, *, max_lines: int = 200) -> str | None:
110
+ rv = bioversions.utils.get_obo_version(url, max_lines=max_lines)
111
+ if rv is None:
112
+ return None
113
+ return cleanup_version(rv, prefix)
114
+
115
+
116
+ def _get_owl_version(prefix: str, url: str, *, max_lines: int = 200) -> str | None:
117
+ rv = bioversions.utils.get_owl_xml_version(url, max_lines=max_lines)
118
+ if rv is None:
119
+ return None
120
+ return cleanup_version(rv, prefix)
121
+
122
+
123
+ def _get_obograph_json_version(prefix: str, url: str) -> str | None:
124
+ rv = bioversions.utils.get_obograph_json_version(url)
125
+ if rv is None:
126
+ return None
127
+ return cleanup_version(rv, prefix)
128
+
129
+
130
+ #: A mapping from data type to gersion getter function
131
+ VERSION_GETTERS: dict[OntologyFormat, Callable[[str, str], str | None]] = {
132
+ "obo": _get_obo_version,
133
+ "owl": _get_owl_version,
134
+ "json": _get_obograph_json_version,
135
+ }
136
+
137
+
138
+ def _prioritize_version(
139
+ data_version: str | None,
140
+ ontology_prefix: str,
141
+ version: str | None,
142
+ date: datetime | None,
143
+ ) -> str | None:
144
+ """Process version information coming from several sources and normalize them."""
145
+ if ontology_prefix in STATIC_VERSION_REWRITES:
146
+ return STATIC_VERSION_REWRITES[ontology_prefix]
147
+
148
+ if version:
149
+ if version in BAD:
150
+ logger.debug("[%s] had known bad version, returning None: ", ontology_prefix, version)
151
+ return None
152
+
153
+ clean_injected_version = cleanup_version(version, prefix=ontology_prefix)
154
+ if not data_version:
155
+ logger.debug(
156
+ "[%s] did not have a version, overriding with %s",
157
+ ontology_prefix,
158
+ clean_injected_version,
159
+ )
160
+ return clean_injected_version
161
+
162
+ clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
163
+ if clean_data_version != clean_injected_version:
164
+ # in this case, we're going to trust the one that's passed
165
+ # through explicitly more than the graph's content
166
+ logger.debug(
167
+ "[%s] had version %s, overriding with %s",
168
+ ontology_prefix,
169
+ data_version,
170
+ version,
171
+ )
172
+ return clean_injected_version
173
+
174
+ if data_version:
175
+ if data_version in BAD:
176
+ logger.debug(
177
+ "[%s] had known bad version, returning None: ", ontology_prefix, data_version
178
+ )
179
+ return None
180
+
181
+ clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
182
+ logger.debug("[%s] using version %s", ontology_prefix, clean_data_version)
183
+ return clean_data_version
184
+
185
+ if date is not None:
186
+ derived_date_version = date.strftime("%Y-%m-%d")
187
+ logger.debug(
188
+ "[%s] does not report a version. falling back to date: %s",
189
+ ontology_prefix,
190
+ derived_date_version,
191
+ )
192
+ return derived_date_version
193
+
194
+ logger.debug("[%s] does not report a version nor a date", ontology_prefix)
195
+ return None
196
+
197
+
198
+ def _get_getter_urls(prefix: str) -> Iterable[tuple[OntologyFormat, str]]:
199
+ # assume that all possible files that can be downloaded
200
+ # are in sync and have the same version
201
+ for ontology_format, get_url_func in ONTOLOGY_GETTERS:
202
+ url = get_url_func(prefix)
203
+ if url is None:
204
+ continue
205
+ yield ontology_format, url
206
+
207
+
208
+ def _get_version_from_artifact(prefix: str) -> str | None:
209
+ for ontology_format, url in _get_getter_urls(prefix):
210
+ # Try to peak into the file to get the version without fully downloading
211
+ get_version_func = VERSION_GETTERS.get(ontology_format)
212
+ if get_version_func is None:
213
+ continue
214
+ version = get_version_func(prefix, url)
215
+ if version:
216
+ return cleanup_version(version, prefix=prefix)
217
+ return None