pyobo 0.12.0__py3-none-any.whl → 0.12.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyobo/reader.py CHANGED
@@ -15,20 +15,19 @@ from typing import Any
15
15
  import bioregistry
16
16
  import networkx as nx
17
17
  from curies import ReferenceTuple
18
+ from curies.preprocessing import BlocklistError
18
19
  from curies.vocabulary import SynonymScope
19
20
  from more_itertools import pairwise
20
21
  from tqdm.auto import tqdm
21
22
 
22
23
  from .constants import DATE_FORMAT, PROVENANCE_PREFIXES
23
24
  from .identifier_utils import (
24
- BlacklistedError,
25
25
  NotCURIEError,
26
26
  ParseError,
27
27
  UnparsableIRIError,
28
28
  _is_valid_identifier,
29
29
  _parse_str_or_curie_or_uri_helper,
30
- remap_prefix,
31
- str_is_blacklisted,
30
+ get_rules,
32
31
  )
33
32
  from .reader_utils import (
34
33
  _chomp_axioms,
@@ -53,6 +52,7 @@ from .struct.struct_utils import Annotation, Stanza
53
52
  from .struct.typedef import comment as has_comment
54
53
  from .struct.typedef import default_typedefs, has_ontology_root_term
55
54
  from .utils.cache import write_gzipped_graph
55
+ from .utils.io import safe_open
56
56
  from .utils.misc import STATIC_VERSION_REWRITES, cleanup_version
57
57
 
58
58
  __all__ = [
@@ -76,13 +76,7 @@ def from_obo_path(
76
76
  ) -> Obo:
77
77
  """Get the OBO graph from a path."""
78
78
  path = Path(path).expanduser().resolve()
79
- if path.suffix.endswith(".gz"):
80
- import gzip
81
-
82
- logger.info("[%s] parsing gzipped OBO with obonet from %s", prefix or "<unknown>", path)
83
- with gzip.open(path, "rt") as file:
84
- graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
85
- elif path.suffix.endswith(".zip"):
79
+ if path.suffix.endswith(".zip"):
86
80
  import io
87
81
  import zipfile
88
82
 
@@ -95,7 +89,7 @@ def from_obo_path(
95
89
  )
96
90
  else:
97
91
  logger.info("[%s] parsing OBO with obonet from %s", prefix or "<unknown>", path)
98
- with open(path) as file:
92
+ with safe_open(path, read=True) as file:
99
93
  graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
100
94
 
101
95
  if prefix:
@@ -1262,7 +1256,7 @@ def _handle_prop(
1262
1256
  ):
1263
1257
  case Reference() as datatype_:
1264
1258
  datatype = datatype_
1265
- case BlacklistedError():
1259
+ case BlocklistError():
1266
1260
  return None
1267
1261
  case ParseError() as exc:
1268
1262
  if strict:
@@ -1304,7 +1298,7 @@ def _handle_prop(
1304
1298
  ):
1305
1299
  case Reference() as obj_reference:
1306
1300
  return Annotation(prop_reference, obj_reference)
1307
- case BlacklistedError():
1301
+ case BlocklistError():
1308
1302
  return None
1309
1303
  case UnparsableIRIError():
1310
1304
  return Annotation(prop_reference, OBOLiteral.uri(value))
@@ -1330,7 +1324,7 @@ def _handle_prop(
1330
1324
  ):
1331
1325
  case Reference() as obj_reference:
1332
1326
  return Annotation(prop_reference, obj_reference)
1333
- case BlacklistedError():
1327
+ case BlocklistError():
1334
1328
  return None
1335
1329
  case ParseError():
1336
1330
  if datatype:
@@ -1535,10 +1529,12 @@ def _parse_xref_line(
1535
1529
  ) -> tuple[Reference, list[Reference | OBOLiteral]] | None:
1536
1530
  xref, _, rest = line.partition(" [")
1537
1531
 
1538
- if str_is_blacklisted(xref, ontology_prefix=ontology_prefix) or ":" not in xref:
1532
+ rules = get_rules()
1533
+
1534
+ if rules.str_is_blocked(xref, context=ontology_prefix) or ":" not in xref:
1539
1535
  return None # sometimes xref to self... weird
1540
1536
 
1541
- xref = remap_prefix(xref, ontology_prefix=ontology_prefix)
1537
+ xref = rules.remap_prefix(xref, context=ontology_prefix)
1542
1538
 
1543
1539
  split_space = " " in xref
1544
1540
  if split_space:
@@ -1552,7 +1548,7 @@ def _parse_xref_line(
1552
1548
  xref, ontology_prefix=ontology_prefix, node=node, line=line, context="xref", upgrade=upgrade
1553
1549
  )
1554
1550
  match xref_ref:
1555
- case BlacklistedError():
1551
+ case BlocklistError():
1556
1552
  return None
1557
1553
  case ParseError() as exc:
1558
1554
  if strict:
pyobo/sources/cgnc.py CHANGED
@@ -45,7 +45,15 @@ HEADER = [
45
45
 
46
46
  def get_terms(force: bool = False) -> Iterable[Term]:
47
47
  """Get CGNC terms."""
48
- df = ensure_df(PREFIX, url=URL, name=f"{PREFIX}.tsv", force=force, header=0, names=HEADER)
48
+ df = ensure_df(
49
+ PREFIX,
50
+ url=URL,
51
+ name=f"{PREFIX}.tsv",
52
+ force=force,
53
+ header=0,
54
+ names=HEADER,
55
+ on_bad_lines="skip",
56
+ )
49
57
  for i, (cgnc_id, entrez_id, ensembl_id, name, synonym_1, synoynm_2, _, _) in enumerate(
50
58
  df.values
51
59
  ):
pyobo/sources/credit.py CHANGED
@@ -7,7 +7,7 @@ from collections.abc import Iterable
7
7
 
8
8
  from more_itertools import chunked
9
9
 
10
- from pyobo.struct import Obo, Term
10
+ from pyobo.struct import CHARLIE_TERM, HUMAN_TERM, Obo, Reference, Term, default_reference
11
11
  from pyobo.utils.path import ensure_path
12
12
 
13
13
  __all__ = [
@@ -16,6 +16,12 @@ __all__ = [
16
16
 
17
17
  url = "https://api.github.com/repos/CASRAI-CRedIT/Dictionary/contents/Picklists/Contributor%20Roles"
18
18
  PREFIX = "credit"
19
+ ROOT = default_reference(prefix=PREFIX, identifier="contributor-role", name="contributor role")
20
+ ROOT_TERM = (
21
+ Term(reference=ROOT)
22
+ .append_contributor(CHARLIE_TERM)
23
+ .append_xref(Reference(prefix="cro", identifier="0000000"))
24
+ )
19
25
 
20
26
 
21
27
  class CreditGetter(Obo):
@@ -23,6 +29,7 @@ class CreditGetter(Obo):
23
29
 
24
30
  ontology = PREFIX
25
31
  static_version = "2022"
32
+ root_terms = [ROOT]
26
33
 
27
34
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
28
35
  """Iterate over terms in the ontology."""
@@ -34,14 +41,16 @@ def get_terms(force: bool = False) -> list[Term]:
34
41
  path = ensure_path(PREFIX, url=url, name="picklist-api.json", force=force)
35
42
  with open(path) as f:
36
43
  data = json.load(f)
37
- terms = []
44
+ terms = [
45
+ CHARLIE_TERM,
46
+ HUMAN_TERM,
47
+ ROOT_TERM,
48
+ ]
38
49
  for x in data:
39
- name = x["name"].removesuffix(".md").lower()
40
-
41
50
  pp = ensure_path(PREFIX, "picklist", url=x["download_url"], backend="requests")
42
51
  with open(pp) as f:
43
52
  header, *rest = f.read().splitlines()
44
- name = header = header.removeprefix("# Contributor Roles/")
53
+ name = header.removeprefix("# Contributor Roles/")
45
54
  dd = {k.removeprefix("## "): v for k, v in chunked(rest, 2)}
46
55
  identifier = (
47
56
  dd["Canonical URL"]
@@ -50,7 +59,9 @@ def get_terms(force: bool = False) -> list[Term]:
50
59
  )
51
60
  desc = dd["Short definition"]
52
61
  terms.append(
53
- Term.from_triple(prefix=PREFIX, identifier=identifier, name=name, definition=desc)
62
+ Term.from_triple(
63
+ prefix=PREFIX, identifier=identifier, name=name, definition=desc
64
+ ).append_parent(ROOT)
54
65
  )
55
66
 
56
67
  return terms
pyobo/sources/flybase.py CHANGED
@@ -18,7 +18,7 @@ __all__ = [
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
 
21
- BASE_URL = "http://ftp.flybase.net/releases"
21
+ BASE_URL = "https://s3ftp.flybase.org/releases"
22
22
  PREFIX = "flybase"
23
23
  NAME = "FlyBase"
24
24
 
@@ -51,7 +51,7 @@ def _get_names(version: str, force: bool = False) -> pd.DataFrame:
51
51
 
52
52
  def _get_organisms(version: str, force: bool = False) -> Mapping[str, str]:
53
53
  """Get mapping from abbreviation column to NCBI taxonomy ID column."""
54
- url = f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/species/organism_list_fb_{version}.tsv.gz"
54
+ url = f"{BASE_URL}/FB{version}/precomputed_files/species/organism_list_fb_{version}.tsv.gz"
55
55
  df = ensure_df(
56
56
  PREFIX, url=url, force=force, version=version, skiprows=4, header=None, usecols=[2, 4]
57
57
  )
@@ -60,7 +60,7 @@ def _get_organisms(version: str, force: bool = False) -> Mapping[str, str]:
60
60
 
61
61
 
62
62
  def _get_definitions(version: str, force: bool = False) -> Mapping[str, str]:
63
- url = f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/genes/automated_gene_summaries.tsv.gz"
63
+ url = f"{BASE_URL}/FB{version}/precomputed_files/genes/automated_gene_summaries.tsv.gz"
64
64
  df = ensure_df(
65
65
  PREFIX, url=url, force=force, version=version, skiprows=2, header=None, usecols=[0, 1]
66
66
  )
@@ -69,7 +69,7 @@ def _get_definitions(version: str, force: bool = False) -> Mapping[str, str]:
69
69
 
70
70
  def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, set[str]]:
71
71
  url = (
72
- f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/"
72
+ f"{BASE_URL}/FB{version}/precomputed_files/"
73
73
  f"orthologs/dmel_human_orthologs_disease_fb_{version}.tsv.gz"
74
74
  )
75
75
  df = ensure_df(
@@ -86,7 +86,7 @@ def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, set[
86
86
 
87
87
 
88
88
  def _get_synonyms(version, force):
89
- url = f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/synonyms/fb_synonym_fb_{version}.tsv.gz"
89
+ url = f"{BASE_URL}/FB{version}/precomputed_files/synonyms/fb_synonym_fb_{version}.tsv.gz"
90
90
  df = ensure_df(PREFIX, url=url, force=force, version=version, skiprows=4, usecols=[0, 2])
91
91
  return df # TODO use this
92
92
 
pyobo/sources/omim_ps.py CHANGED
@@ -13,7 +13,7 @@ __all__ = [
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
  PREFIX = "omim.ps"
16
- URL = "https://omim.org/phenotypicSeriesTitles/all"
16
+ URL = "https://omim.org/phenotypicSeriesTitles/"
17
17
 
18
18
 
19
19
  class OMIMPSGetter(Obo):
@@ -26,13 +26,13 @@ class OMIMPSGetter(Obo):
26
26
  soup = get_soup(URL, user_agent="Mozilla/5.0")
27
27
  content = soup.find(id="mimContent")
28
28
  if content is None:
29
- raise ValueError
29
+ raise ValueError("omim.ps failed - scraper could not find id='mimContent' in HTML")
30
30
  table = content.find("table") # type:ignore[attr-defined]
31
31
  if table is None:
32
- raise ValueError
32
+ raise ValueError("omim.ps failed - scraper could not find table in HTML")
33
33
  tbody = table.find("tbody")
34
34
  if tbody is None:
35
- raise ValueError
35
+ raise ValueError("omim.ps failed - scraper could not find table body in HTML")
36
36
  for row in tbody.find_all("tr"):
37
37
  anchor = row.find("td").find("a")
38
38
  name = anchor.text.strip()
@@ -105,4 +105,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
105
105
 
106
106
 
107
107
  if __name__ == "__main__":
108
- PharmGKBGeneGetter().write_default(force=True, write_obo=True, use_tqdm=True)
108
+ PharmGKBGeneGetter.cli()
@@ -16,6 +16,7 @@ from pyobo.struct.functional.utils import (
16
16
  FunctionalOWLSerializable,
17
17
  list_to_funowl,
18
18
  )
19
+ from pyobo.utils.io import safe_open
19
20
 
20
21
  __all__ = [
21
22
  "Document",
@@ -109,7 +110,8 @@ class Document:
109
110
  def write_funowl(self, path: str | Path) -> None:
110
111
  """Write functional OWL to a file.."""
111
112
  path = Path(path).expanduser().resolve()
112
- path.write_text(self.to_funowl())
113
+ with safe_open(path, read=False) as file:
114
+ file.write(self.to_funowl())
113
115
 
114
116
  def to_funowl(self) -> str:
115
117
  """Get the document as a functional OWL string."""
pyobo/struct/reference.py CHANGED
@@ -14,9 +14,9 @@ import dateutil.parser
14
14
  import pytz
15
15
  from bioregistry import NormalizedNamableReference as Reference
16
16
  from curies import ReferenceTuple
17
+ from curies.preprocessing import BlocklistError
17
18
 
18
19
  from ..identifier_utils import (
19
- BlacklistedError,
20
20
  NotCURIEError,
21
21
  ParseError,
22
22
  UnparsableIRIError,
@@ -62,7 +62,7 @@ def _parse_str_or_curie_or_uri(
62
62
  match reference:
63
63
  case Reference():
64
64
  return reference
65
- case BlacklistedError():
65
+ case BlocklistError():
66
66
  return None
67
67
  case ParseError():
68
68
  if strict:
@@ -224,7 +224,7 @@ def _obo_parse_identifier(
224
224
  ):
225
225
  case Reference() as reference:
226
226
  return reference
227
- case BlacklistedError():
227
+ case BlocklistError():
228
228
  return None
229
229
  case NotCURIEError() as exc:
230
230
  # this means there's no colon `:`
@@ -272,7 +272,7 @@ def _parse_reference_or_uri_literal(
272
272
  ):
273
273
  case Reference() as reference:
274
274
  return reference
275
- case BlacklistedError():
275
+ case BlocklistError():
276
276
  return None
277
277
  case UnparsableIRIError():
278
278
  # this means that it's defininitely a URI,
pyobo/struct/struct.py CHANGED
@@ -70,7 +70,7 @@ from ..constants import (
70
70
  TARGET_PREFIX,
71
71
  )
72
72
  from ..utils.cache import write_gzipped_graph
73
- from ..utils.io import multidict, write_iterable_tsv
73
+ from ..utils.io import multidict, safe_open, write_iterable_tsv
74
74
  from ..utils.path import (
75
75
  CacheArtifact,
76
76
  get_cache_path,
@@ -712,6 +712,13 @@ class Obo:
712
712
  raise ValueError(f"There is no version available for {self.ontology}")
713
713
  return self.data_version
714
714
 
715
+ @property
716
+ def _prefix_version(self) -> str:
717
+ """Get the prefix and version (for logging)."""
718
+ if self.data_version:
719
+ return f"{self.ontology} {self.data_version}"
720
+ return self.ontology
721
+
715
722
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
716
723
  """Iterate over terms in this ontology."""
717
724
  raise NotImplementedError
@@ -722,10 +729,11 @@ class Obo:
722
729
 
723
730
  return graph_from_obo(self)
724
731
 
725
- def write_obograph(self, path: Path) -> None:
732
+ def write_obograph(self, path: str | Path) -> None:
726
733
  """Write OBO Graph json."""
727
734
  graph = self.get_graph()
728
- path.write_text(graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
735
+ with safe_open(path, read=False) as file:
736
+ file.write(graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
729
737
 
730
738
  @classmethod
731
739
  def cli(cls, *args, default_rewrite: bool = False) -> Any:
@@ -761,13 +769,12 @@ class Obo:
761
769
  click.secho(f"[{cls.ontology}] Got an exception during instantiation - {type(e)}")
762
770
  sys.exit(1)
763
771
  inst.write_default(
764
- write_obograph=True,
765
- write_obo=True,
772
+ write_obograph=False,
773
+ write_obo=False,
766
774
  write_owl=owl,
767
775
  write_ofn=ofn,
768
776
  write_ttl=ttl,
769
777
  write_nodes=True,
770
- write_edges=True,
771
778
  force=force or rewrite,
772
779
  use_tqdm=True,
773
780
  )
@@ -969,9 +976,14 @@ class Obo:
969
976
  emit_annotation_properties=emit_annotation_properties,
970
977
  )
971
978
  if use_tqdm:
972
- it = tqdm(it, desc=f"[{self.ontology}] writing OBO", unit_scale=True, unit="line")
979
+ it = tqdm(
980
+ it,
981
+ desc=f"[{self._prefix_version}] writing OBO",
982
+ unit_scale=True,
983
+ unit="line",
984
+ )
973
985
  if isinstance(file, str | Path | os.PathLike):
974
- with open(file, "w") as fh:
986
+ with safe_open(file, read=False) as fh:
975
987
  self._write_lines(it, fh)
976
988
  else:
977
989
  self._write_lines(it, file)
@@ -1002,11 +1014,72 @@ class Obo:
1002
1014
 
1003
1015
  def write_nodes(self, path: str | Path) -> None:
1004
1016
  """Write a nodes TSV file."""
1005
- # TODO reimplement internally
1006
- self.get_graph().get_nodes_df().to_csv(path, sep="\t", index=False)
1017
+ write_iterable_tsv(
1018
+ path=path,
1019
+ header=self.nodes_header,
1020
+ it=self.iterate_edge_rows(),
1021
+ )
1022
+
1023
+ @property
1024
+ def nodes_header(self) -> Sequence[str]:
1025
+ """Get the header for nodes."""
1026
+ return [
1027
+ "curie:ID",
1028
+ "name:string",
1029
+ "synonyms:string[]",
1030
+ "synonym_predicates:string[]",
1031
+ "synonym_types:string[]",
1032
+ "definition:string",
1033
+ "deprecated:boolean",
1034
+ "type:string",
1035
+ "provenance:string[]",
1036
+ "alts:string[]",
1037
+ "replaced_by:string[]",
1038
+ "mapping_objects:string[]",
1039
+ "mapping_predicates:string[]",
1040
+ "version:string",
1041
+ ]
1042
+
1043
+ def _get_node_row(self, node: Term, sep: str, version: str) -> Sequence[str]:
1044
+ synonym_predicate_curies, synonym_type_curies, synonyms = [], [], []
1045
+ for synonym in node.synonyms:
1046
+ synonym_predicate_curies.append(synonym.predicate.curie)
1047
+ synonym_type_curies.append(synonym.type.curie if synonym.type else "")
1048
+ synonyms.append(synonym.name)
1049
+ mapping_predicate_curies, mapping_target_curies = [], []
1050
+ for predicate, obj in node.get_mappings(include_xrefs=True, add_context=False):
1051
+ mapping_predicate_curies.append(predicate.curie)
1052
+ mapping_target_curies.append(obj.curie)
1053
+ return (
1054
+ node.curie,
1055
+ node.name or "",
1056
+ sep.join(synonyms),
1057
+ sep.join(synonym_predicate_curies),
1058
+ sep.join(synonym_type_curies),
1059
+ node.definition or "",
1060
+ "true" if node.is_obsolete else "false",
1061
+ node.type,
1062
+ sep.join(
1063
+ reference.curie for reference in node.provenance if isinstance(reference, Reference)
1064
+ ),
1065
+ sep.join(alt_reference.curie for alt_reference in node.alt_ids),
1066
+ sep.join(ref.curie for ref in node.get_replaced_by()),
1067
+ sep.join(mapping_target_curies),
1068
+ sep.join(mapping_predicate_curies),
1069
+ version,
1070
+ )
1071
+
1072
+ def iterate_node_rows(self, sep: str = ";") -> Iterable[Sequence[str]]:
1073
+ """Get a nodes iterator appropriate for serialization."""
1074
+ version = self.data_version or ""
1075
+ for node in self.iter_terms():
1076
+ if node.prefix != self.ontology:
1077
+ continue
1078
+ yield self._get_node_row(node, sep=sep, version=version)
1007
1079
 
1008
1080
  def write_edges(self, path: str | Path) -> None:
1009
1081
  """Write a edges TSV file."""
1082
+ # node, this is actually taken care of as part of the cache configuration
1010
1083
  write_iterable_tsv(
1011
1084
  path=path,
1012
1085
  header=self.edges_header,
@@ -1025,15 +1098,15 @@ class Obo:
1025
1098
 
1026
1099
  @property
1027
1100
  def _obo_path(self) -> Path:
1028
- return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.obo")
1101
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.obo.gz")
1029
1102
 
1030
1103
  @property
1031
1104
  def _obograph_path(self) -> Path:
1032
- return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.json")
1105
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.json.gz")
1033
1106
 
1034
1107
  @property
1035
1108
  def _owl_path(self) -> Path:
1036
- return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.owl")
1109
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.owl.gz")
1037
1110
 
1038
1111
  @property
1039
1112
  def _obonet_gz_path(self) -> Path:
@@ -1041,7 +1114,7 @@ class Obo:
1041
1114
 
1042
1115
  @property
1043
1116
  def _ofn_path(self) -> Path:
1044
- return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.ofn")
1117
+ return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.ofn.gz")
1045
1118
 
1046
1119
  @property
1047
1120
  def _ttl_path(self) -> Path:
@@ -1060,22 +1133,10 @@ class Obo:
1060
1133
  [f"{self.ontology}_id", "taxonomy_id"],
1061
1134
  self.iterate_id_species,
1062
1135
  ),
1063
- (
1064
- # TODO deprecate this in favor of literal mappings output
1065
- CacheArtifact.synonyms,
1066
- [f"{self.ontology}_id", "synonym"],
1067
- self.iterate_synonym_rows,
1068
- ),
1069
1136
  (CacheArtifact.alts, [f"{self.ontology}_id", "alt_id"], self.iterate_alt_rows),
1070
1137
  (CacheArtifact.mappings, SSSOM_DF_COLUMNS, self.iterate_mapping_rows),
1071
1138
  (CacheArtifact.relations, self.relations_header, self.iter_relation_rows),
1072
1139
  (CacheArtifact.edges, self.edges_header, self.iterate_edge_rows),
1073
- (
1074
- # TODO deprecate this in favor of pair of literal and object properties
1075
- CacheArtifact.properties,
1076
- self.properties_header,
1077
- self._iter_property_rows,
1078
- ),
1079
1140
  (
1080
1141
  CacheArtifact.object_properties,
1081
1142
  self.object_properties_header,
@@ -1097,8 +1158,8 @@ class Obo:
1097
1158
  """Write the metadata JSON file."""
1098
1159
  metadata = self.get_metadata()
1099
1160
  for path in (self._root_metadata_path, self._get_cache_path(CacheArtifact.metadata)):
1100
- logger.debug("[%s v%s] caching metadata to %s", self.ontology, self.data_version, path)
1101
- with path.open("w") as file:
1161
+ logger.debug("[%s] caching metadata to %s", self._prefix_version, path)
1162
+ with safe_open(path, read=False) as file:
1102
1163
  json.dump(metadata, file, indent=2)
1103
1164
 
1104
1165
  def write_prefix_map(self) -> None:
@@ -1110,9 +1171,8 @@ class Obo:
1110
1171
  """Write cache parts."""
1111
1172
  typedefs_path = self._get_cache_path(CacheArtifact.typedefs)
1112
1173
  logger.debug(
1113
- "[%s v%s] caching typedefs to %s",
1114
- self.ontology,
1115
- self.data_version,
1174
+ "[%s] caching typedefs to %s",
1175
+ self._prefix_version,
1116
1176
  typedefs_path,
1117
1177
  )
1118
1178
  typedef_df: pd.DataFrame = self.get_typedef_df()
@@ -1121,10 +1181,10 @@ class Obo:
1121
1181
 
1122
1182
  for cache_artifact, header, fn in self._get_cache_config():
1123
1183
  path = self._get_cache_path(cache_artifact)
1124
- if path.exists() and not force:
1184
+ if path.is_file() and not force:
1125
1185
  continue
1126
1186
  tqdm.write(
1127
- f"[{self.ontology} {self.data_version}] writing {cache_artifact.name} to {path}",
1187
+ f"[{self._prefix_version}] writing {cache_artifact.name} to {path}",
1128
1188
  )
1129
1189
  write_iterable_tsv(
1130
1190
  path=path,
@@ -1139,12 +1199,11 @@ class Obo:
1139
1199
  relations_path = get_relation_cache_path(
1140
1200
  self.ontology, reference=relation, version=self.data_version
1141
1201
  )
1142
- if relations_path.exists() and not force:
1202
+ if relations_path.is_file() and not force:
1143
1203
  continue
1144
1204
  logger.debug(
1145
- "[%s v%s] caching relation %s ! %s",
1146
- self.ontology,
1147
- self.data_version,
1205
+ "[%s] caching relation %s ! %s",
1206
+ self._prefix_version,
1148
1207
  relation.curie,
1149
1208
  relation.name,
1150
1209
  )
@@ -1164,8 +1223,7 @@ class Obo:
1164
1223
  write_owl: bool = False,
1165
1224
  write_ofn: bool = False,
1166
1225
  write_ttl: bool = False,
1167
- write_nodes: bool = True,
1168
- write_edges: bool = True,
1226
+ write_nodes: bool = False,
1169
1227
  obograph_use_internal: bool = False,
1170
1228
  write_cache: bool = True,
1171
1229
  ) -> None:
@@ -1174,15 +1232,15 @@ class Obo:
1174
1232
  self.write_prefix_map()
1175
1233
  if write_cache:
1176
1234
  self.write_cache(force=force)
1177
- if write_obo and (not self._obo_path.exists() or force):
1178
- tqdm.write(f"[{self.ontology}] writing OBO to {self._obo_path}")
1235
+ if write_obo and (not self._obo_path.is_file() or force):
1236
+ tqdm.write(f"[{self._prefix_version}] writing OBO to {self._obo_path}")
1179
1237
  self.write_obo(self._obo_path, use_tqdm=use_tqdm)
1180
- if (write_ofn or write_owl or write_obograph) and (not self._ofn_path.exists() or force):
1181
- tqdm.write(f"[{self.ontology}] writing OFN to {self._ofn_path}")
1238
+ if (write_ofn or write_owl or write_obograph) and (not self._ofn_path.is_file() or force):
1239
+ tqdm.write(f"[{self._prefix_version}] writing OFN to {self._ofn_path}")
1182
1240
  self.write_ofn(self._ofn_path)
1183
- if write_obograph and (not self._obograph_path.exists() or force):
1241
+ if write_obograph and (not self._obograph_path.is_file() or force):
1184
1242
  if obograph_use_internal:
1185
- tqdm.write(f"[{self.ontology}] writing OBO Graph to {self._obograph_path}")
1243
+ tqdm.write(f"[{self._prefix_version}] writing OBO Graph to {self._obograph_path}")
1186
1244
  self.write_obograph(self._obograph_path)
1187
1245
  else:
1188
1246
  import bioontologies.robot
@@ -1193,22 +1251,22 @@ class Obo:
1193
1251
  bioontologies.robot.convert(
1194
1252
  self._ofn_path, self._obograph_path, debug=True, merge=False, reason=False
1195
1253
  )
1196
- if write_owl and (not self._owl_path.exists() or force):
1197
- tqdm.write(f"[{self.ontology}] writing OWL to {self._owl_path}")
1254
+ if write_owl and (not self._owl_path.is_file() or force):
1255
+ tqdm.write(f"[{self._prefix_version}] writing OWL to {self._owl_path}")
1198
1256
  import bioontologies.robot
1199
1257
 
1200
1258
  bioontologies.robot.convert(
1201
1259
  self._ofn_path, self._owl_path, debug=True, merge=False, reason=False
1202
1260
  )
1203
- if write_ttl and (not self._ttl_path.exists() or force):
1204
- tqdm.write(f"[{self.ontology}] writing Turtle to {self._ttl_path}")
1261
+ if write_ttl and (not self._ttl_path.is_file() or force):
1262
+ tqdm.write(f"[{self._prefix_version}] writing Turtle to {self._ttl_path}")
1205
1263
  self.write_rdf(self._ttl_path)
1206
- if write_obonet and (not self._obonet_gz_path.exists() or force):
1207
- tqdm.write(f"[{self.ontology}] writing obonet to {self._obonet_gz_path}")
1264
+ if write_obonet and (not self._obonet_gz_path.is_file() or force):
1265
+ tqdm.write(f"[{self._prefix_version}] writing obonet to {self._obonet_gz_path}")
1208
1266
  self.write_obonet_gz(self._obonet_gz_path)
1209
1267
  if write_nodes:
1210
1268
  nodes_path = self._get_cache_path(CacheArtifact.nodes)
1211
- tqdm.write(f"[{self.ontology}] writing nodes TSV to {nodes_path}")
1269
+ tqdm.write(f"[{self._prefix_version}] writing nodes TSV to {nodes_path}")
1212
1270
  self.write_nodes(nodes_path)
1213
1271
 
1214
1272
  @property
@@ -1335,9 +1393,8 @@ class Obo:
1335
1393
  rv.add_edge(_source, _target, key=_key)
1336
1394
 
1337
1395
  logger.info(
1338
- "[%s v%s] exported graph with %d nodes",
1339
- self.ontology,
1340
- self.data_version,
1396
+ "[%s] exported graph with %d nodes",
1397
+ self._prefix_version,
1341
1398
  rv.number_of_nodes(),
1342
1399
  )
1343
1400
  return rv
pyobo/utils/cache.py CHANGED
@@ -1,6 +1,5 @@
1
1
  """Utilities for caching files."""
2
2
 
3
- import gzip
4
3
  import json
5
4
  import logging
6
5
  from collections.abc import Iterable, Mapping
@@ -14,7 +13,7 @@ from pystow.cache import CachedDataFrame as cached_df # noqa:N813
14
13
  from pystow.cache import CachedJSON as cached_json # noqa:N813
15
14
  from pystow.cache import CachedPickle as cached_pickle # noqa:N813
16
15
 
17
- from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
16
+ from .io import open_map_tsv, open_multimap_tsv, safe_open, write_map_tsv, write_multimap_tsv
18
17
 
19
18
  __all__ = [
20
19
  "cached_collection",
@@ -70,13 +69,13 @@ NODE_LINK_STYLE = "links" # TODO update to "edges"
70
69
 
71
70
  def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
72
71
  """Read a graph that's gzipped nodelink."""
73
- with gzip.open(path, "rt") as file:
72
+ with safe_open(path, read=True) as file:
74
73
  return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
75
74
 
76
75
 
77
76
  def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
78
77
  """Write a graph as gzipped nodelink."""
79
- with gzip.open(path, "wt") as file:
78
+ with safe_open(path, read=False) as file:
80
79
  json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)
81
80
 
82
81