pyobo 0.12.0__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/api/properties.py +8 -12
- pyobo/api/xrefs.py +1 -2
- pyobo/cli/database.py +30 -2
- pyobo/cli/database_utils.py +5 -11
- pyobo/getters.py +18 -78
- pyobo/gilda_utils.py +3 -80
- pyobo/identifier_utils/__init__.py +2 -10
- pyobo/identifier_utils/api.py +21 -12
- pyobo/identifier_utils/preprocessing.json +74 -13
- pyobo/identifier_utils/preprocessing.py +5 -39
- pyobo/obographs.py +5 -1
- pyobo/reader.py +13 -17
- pyobo/sources/cgnc.py +9 -1
- pyobo/sources/flybase.py +5 -5
- pyobo/sources/omim_ps.py +4 -4
- pyobo/sources/pharmgkb/pharmgkb_gene.py +1 -1
- pyobo/struct/functional/ontology.py +3 -1
- pyobo/struct/reference.py +4 -4
- pyobo/struct/struct.py +112 -55
- pyobo/utils/cache.py +3 -4
- pyobo/utils/io.py +38 -14
- pyobo/utils/path.py +16 -19
- pyobo/version.py +1 -1
- {pyobo-0.12.0.dist-info → pyobo-0.12.1.dist-info}/METADATA +71 -110
- {pyobo-0.12.0.dist-info → pyobo-0.12.1.dist-info}/RECORD +29 -30
- {pyobo-0.12.0.dist-info → pyobo-0.12.1.dist-info}/WHEEL +1 -1
- pyobo/identifier_utils/model.py +0 -130
- {pyobo-0.12.0.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +0 -0
- {pyobo-0.12.0.dist-info → pyobo-0.12.1.dist-info}/licenses/LICENSE +0 -0
pyobo/struct/struct.py
CHANGED
|
@@ -70,7 +70,7 @@ from ..constants import (
|
|
|
70
70
|
TARGET_PREFIX,
|
|
71
71
|
)
|
|
72
72
|
from ..utils.cache import write_gzipped_graph
|
|
73
|
-
from ..utils.io import multidict, write_iterable_tsv
|
|
73
|
+
from ..utils.io import multidict, safe_open, write_iterable_tsv
|
|
74
74
|
from ..utils.path import (
|
|
75
75
|
CacheArtifact,
|
|
76
76
|
get_cache_path,
|
|
@@ -712,6 +712,13 @@ class Obo:
|
|
|
712
712
|
raise ValueError(f"There is no version available for {self.ontology}")
|
|
713
713
|
return self.data_version
|
|
714
714
|
|
|
715
|
+
@property
|
|
716
|
+
def _prefix_version(self) -> str:
|
|
717
|
+
"""Get the prefix and version (for logging)."""
|
|
718
|
+
if self.data_version:
|
|
719
|
+
return f"{self.ontology} {self.data_version}"
|
|
720
|
+
return self.ontology
|
|
721
|
+
|
|
715
722
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
716
723
|
"""Iterate over terms in this ontology."""
|
|
717
724
|
raise NotImplementedError
|
|
@@ -722,10 +729,11 @@ class Obo:
|
|
|
722
729
|
|
|
723
730
|
return graph_from_obo(self)
|
|
724
731
|
|
|
725
|
-
def write_obograph(self, path: Path) -> None:
|
|
732
|
+
def write_obograph(self, path: str | Path) -> None:
|
|
726
733
|
"""Write OBO Graph json."""
|
|
727
734
|
graph = self.get_graph()
|
|
728
|
-
path
|
|
735
|
+
with safe_open(path, read=False) as file:
|
|
736
|
+
file.write(graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
|
|
729
737
|
|
|
730
738
|
@classmethod
|
|
731
739
|
def cli(cls, *args, default_rewrite: bool = False) -> Any:
|
|
@@ -761,13 +769,12 @@ class Obo:
|
|
|
761
769
|
click.secho(f"[{cls.ontology}] Got an exception during instantiation - {type(e)}")
|
|
762
770
|
sys.exit(1)
|
|
763
771
|
inst.write_default(
|
|
764
|
-
write_obograph=
|
|
765
|
-
write_obo=
|
|
772
|
+
write_obograph=False,
|
|
773
|
+
write_obo=False,
|
|
766
774
|
write_owl=owl,
|
|
767
775
|
write_ofn=ofn,
|
|
768
776
|
write_ttl=ttl,
|
|
769
777
|
write_nodes=True,
|
|
770
|
-
write_edges=True,
|
|
771
778
|
force=force or rewrite,
|
|
772
779
|
use_tqdm=True,
|
|
773
780
|
)
|
|
@@ -969,9 +976,14 @@ class Obo:
|
|
|
969
976
|
emit_annotation_properties=emit_annotation_properties,
|
|
970
977
|
)
|
|
971
978
|
if use_tqdm:
|
|
972
|
-
it = tqdm(
|
|
979
|
+
it = tqdm(
|
|
980
|
+
it,
|
|
981
|
+
desc=f"[{self._prefix_version}] writing OBO",
|
|
982
|
+
unit_scale=True,
|
|
983
|
+
unit="line",
|
|
984
|
+
)
|
|
973
985
|
if isinstance(file, str | Path | os.PathLike):
|
|
974
|
-
with
|
|
986
|
+
with safe_open(file, read=False) as fh:
|
|
975
987
|
self._write_lines(it, fh)
|
|
976
988
|
else:
|
|
977
989
|
self._write_lines(it, file)
|
|
@@ -1002,11 +1014,72 @@ class Obo:
|
|
|
1002
1014
|
|
|
1003
1015
|
def write_nodes(self, path: str | Path) -> None:
|
|
1004
1016
|
"""Write a nodes TSV file."""
|
|
1005
|
-
|
|
1006
|
-
|
|
1017
|
+
write_iterable_tsv(
|
|
1018
|
+
path=path,
|
|
1019
|
+
header=self.nodes_header,
|
|
1020
|
+
it=self.iterate_edge_rows(),
|
|
1021
|
+
)
|
|
1022
|
+
|
|
1023
|
+
@property
|
|
1024
|
+
def nodes_header(self) -> Sequence[str]:
|
|
1025
|
+
"""Get the header for nodes."""
|
|
1026
|
+
return [
|
|
1027
|
+
"curie:ID",
|
|
1028
|
+
"name:string",
|
|
1029
|
+
"synonyms:string[]",
|
|
1030
|
+
"synonym_predicates:string[]",
|
|
1031
|
+
"synonym_types:string[]",
|
|
1032
|
+
"definition:string",
|
|
1033
|
+
"deprecated:boolean",
|
|
1034
|
+
"type:string",
|
|
1035
|
+
"provenance:string[]",
|
|
1036
|
+
"alts:string[]",
|
|
1037
|
+
"replaced_by:string[]",
|
|
1038
|
+
"mapping_objects:string[]",
|
|
1039
|
+
"mapping_predicates:string[]",
|
|
1040
|
+
"version:string",
|
|
1041
|
+
]
|
|
1042
|
+
|
|
1043
|
+
def _get_node_row(self, node: Term, sep: str, version: str) -> Sequence[str]:
|
|
1044
|
+
synonym_predicate_curies, synonym_type_curies, synonyms = [], [], []
|
|
1045
|
+
for synonym in node.synonyms:
|
|
1046
|
+
synonym_predicate_curies.append(synonym.predicate.curie)
|
|
1047
|
+
synonym_type_curies.append(synonym.type.curie if synonym.type else "")
|
|
1048
|
+
synonyms.append(synonym.name)
|
|
1049
|
+
mapping_predicate_curies, mapping_target_curies = [], []
|
|
1050
|
+
for predicate, obj in node.get_mappings(include_xrefs=True, add_context=False):
|
|
1051
|
+
mapping_predicate_curies.append(predicate.curie)
|
|
1052
|
+
mapping_target_curies.append(obj.curie)
|
|
1053
|
+
return (
|
|
1054
|
+
node.curie,
|
|
1055
|
+
node.name or "",
|
|
1056
|
+
sep.join(synonyms),
|
|
1057
|
+
sep.join(synonym_predicate_curies),
|
|
1058
|
+
sep.join(synonym_type_curies),
|
|
1059
|
+
node.definition or "",
|
|
1060
|
+
"true" if node.is_obsolete else "false",
|
|
1061
|
+
node.type,
|
|
1062
|
+
sep.join(
|
|
1063
|
+
reference.curie for reference in node.provenance if isinstance(reference, Reference)
|
|
1064
|
+
),
|
|
1065
|
+
sep.join(alt_reference.curie for alt_reference in node.alt_ids),
|
|
1066
|
+
sep.join(ref.curie for ref in node.get_replaced_by()),
|
|
1067
|
+
sep.join(mapping_target_curies),
|
|
1068
|
+
sep.join(mapping_predicate_curies),
|
|
1069
|
+
version,
|
|
1070
|
+
)
|
|
1071
|
+
|
|
1072
|
+
def iterate_node_rows(self, sep: str = ";") -> Iterable[Sequence[str]]:
|
|
1073
|
+
"""Get a nodes iterator appropriate for serialization."""
|
|
1074
|
+
version = self.data_version or ""
|
|
1075
|
+
for node in self.iter_terms():
|
|
1076
|
+
if node.prefix != self.ontology:
|
|
1077
|
+
continue
|
|
1078
|
+
yield self._get_node_row(node, sep=sep, version=version)
|
|
1007
1079
|
|
|
1008
1080
|
def write_edges(self, path: str | Path) -> None:
|
|
1009
1081
|
"""Write a edges TSV file."""
|
|
1082
|
+
# node, this is actually taken care of as part of the cache configuration
|
|
1010
1083
|
write_iterable_tsv(
|
|
1011
1084
|
path=path,
|
|
1012
1085
|
header=self.edges_header,
|
|
@@ -1025,15 +1098,15 @@ class Obo:
|
|
|
1025
1098
|
|
|
1026
1099
|
@property
|
|
1027
1100
|
def _obo_path(self) -> Path:
|
|
1028
|
-
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.obo")
|
|
1101
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.obo.gz")
|
|
1029
1102
|
|
|
1030
1103
|
@property
|
|
1031
1104
|
def _obograph_path(self) -> Path:
|
|
1032
|
-
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.json")
|
|
1105
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.json.gz")
|
|
1033
1106
|
|
|
1034
1107
|
@property
|
|
1035
1108
|
def _owl_path(self) -> Path:
|
|
1036
|
-
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.owl")
|
|
1109
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.owl.gz")
|
|
1037
1110
|
|
|
1038
1111
|
@property
|
|
1039
1112
|
def _obonet_gz_path(self) -> Path:
|
|
@@ -1041,7 +1114,7 @@ class Obo:
|
|
|
1041
1114
|
|
|
1042
1115
|
@property
|
|
1043
1116
|
def _ofn_path(self) -> Path:
|
|
1044
|
-
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.ofn")
|
|
1117
|
+
return self._path(BUILD_SUBDIRECTORY_NAME, name=f"{self.ontology}.ofn.gz")
|
|
1045
1118
|
|
|
1046
1119
|
@property
|
|
1047
1120
|
def _ttl_path(self) -> Path:
|
|
@@ -1060,22 +1133,10 @@ class Obo:
|
|
|
1060
1133
|
[f"{self.ontology}_id", "taxonomy_id"],
|
|
1061
1134
|
self.iterate_id_species,
|
|
1062
1135
|
),
|
|
1063
|
-
(
|
|
1064
|
-
# TODO deprecate this in favor of literal mappings output
|
|
1065
|
-
CacheArtifact.synonyms,
|
|
1066
|
-
[f"{self.ontology}_id", "synonym"],
|
|
1067
|
-
self.iterate_synonym_rows,
|
|
1068
|
-
),
|
|
1069
1136
|
(CacheArtifact.alts, [f"{self.ontology}_id", "alt_id"], self.iterate_alt_rows),
|
|
1070
1137
|
(CacheArtifact.mappings, SSSOM_DF_COLUMNS, self.iterate_mapping_rows),
|
|
1071
1138
|
(CacheArtifact.relations, self.relations_header, self.iter_relation_rows),
|
|
1072
1139
|
(CacheArtifact.edges, self.edges_header, self.iterate_edge_rows),
|
|
1073
|
-
(
|
|
1074
|
-
# TODO deprecate this in favor of pair of literal and object properties
|
|
1075
|
-
CacheArtifact.properties,
|
|
1076
|
-
self.properties_header,
|
|
1077
|
-
self._iter_property_rows,
|
|
1078
|
-
),
|
|
1079
1140
|
(
|
|
1080
1141
|
CacheArtifact.object_properties,
|
|
1081
1142
|
self.object_properties_header,
|
|
@@ -1097,8 +1158,8 @@ class Obo:
|
|
|
1097
1158
|
"""Write the metadata JSON file."""
|
|
1098
1159
|
metadata = self.get_metadata()
|
|
1099
1160
|
for path in (self._root_metadata_path, self._get_cache_path(CacheArtifact.metadata)):
|
|
1100
|
-
logger.debug("[%s
|
|
1101
|
-
with path
|
|
1161
|
+
logger.debug("[%s] caching metadata to %s", self._prefix_version, path)
|
|
1162
|
+
with safe_open(path, read=False) as file:
|
|
1102
1163
|
json.dump(metadata, file, indent=2)
|
|
1103
1164
|
|
|
1104
1165
|
def write_prefix_map(self) -> None:
|
|
@@ -1110,9 +1171,8 @@ class Obo:
|
|
|
1110
1171
|
"""Write cache parts."""
|
|
1111
1172
|
typedefs_path = self._get_cache_path(CacheArtifact.typedefs)
|
|
1112
1173
|
logger.debug(
|
|
1113
|
-
"[%s
|
|
1114
|
-
self.
|
|
1115
|
-
self.data_version,
|
|
1174
|
+
"[%s] caching typedefs to %s",
|
|
1175
|
+
self._prefix_version,
|
|
1116
1176
|
typedefs_path,
|
|
1117
1177
|
)
|
|
1118
1178
|
typedef_df: pd.DataFrame = self.get_typedef_df()
|
|
@@ -1121,10 +1181,10 @@ class Obo:
|
|
|
1121
1181
|
|
|
1122
1182
|
for cache_artifact, header, fn in self._get_cache_config():
|
|
1123
1183
|
path = self._get_cache_path(cache_artifact)
|
|
1124
|
-
if path.
|
|
1184
|
+
if path.is_file() and not force:
|
|
1125
1185
|
continue
|
|
1126
1186
|
tqdm.write(
|
|
1127
|
-
f"[{self.
|
|
1187
|
+
f"[{self._prefix_version}] writing {cache_artifact.name} to {path}",
|
|
1128
1188
|
)
|
|
1129
1189
|
write_iterable_tsv(
|
|
1130
1190
|
path=path,
|
|
@@ -1139,12 +1199,11 @@ class Obo:
|
|
|
1139
1199
|
relations_path = get_relation_cache_path(
|
|
1140
1200
|
self.ontology, reference=relation, version=self.data_version
|
|
1141
1201
|
)
|
|
1142
|
-
if relations_path.
|
|
1202
|
+
if relations_path.is_file() and not force:
|
|
1143
1203
|
continue
|
|
1144
1204
|
logger.debug(
|
|
1145
|
-
"[%s
|
|
1146
|
-
self.
|
|
1147
|
-
self.data_version,
|
|
1205
|
+
"[%s] caching relation %s ! %s",
|
|
1206
|
+
self._prefix_version,
|
|
1148
1207
|
relation.curie,
|
|
1149
1208
|
relation.name,
|
|
1150
1209
|
)
|
|
@@ -1164,8 +1223,7 @@ class Obo:
|
|
|
1164
1223
|
write_owl: bool = False,
|
|
1165
1224
|
write_ofn: bool = False,
|
|
1166
1225
|
write_ttl: bool = False,
|
|
1167
|
-
write_nodes: bool =
|
|
1168
|
-
write_edges: bool = True,
|
|
1226
|
+
write_nodes: bool = False,
|
|
1169
1227
|
obograph_use_internal: bool = False,
|
|
1170
1228
|
write_cache: bool = True,
|
|
1171
1229
|
) -> None:
|
|
@@ -1174,15 +1232,15 @@ class Obo:
|
|
|
1174
1232
|
self.write_prefix_map()
|
|
1175
1233
|
if write_cache:
|
|
1176
1234
|
self.write_cache(force=force)
|
|
1177
|
-
if write_obo and (not self._obo_path.
|
|
1178
|
-
tqdm.write(f"[{self.
|
|
1235
|
+
if write_obo and (not self._obo_path.is_file() or force):
|
|
1236
|
+
tqdm.write(f"[{self._prefix_version}] writing OBO to {self._obo_path}")
|
|
1179
1237
|
self.write_obo(self._obo_path, use_tqdm=use_tqdm)
|
|
1180
|
-
if (write_ofn or write_owl or write_obograph) and (not self._ofn_path.
|
|
1181
|
-
tqdm.write(f"[{self.
|
|
1238
|
+
if (write_ofn or write_owl or write_obograph) and (not self._ofn_path.is_file() or force):
|
|
1239
|
+
tqdm.write(f"[{self._prefix_version}] writing OFN to {self._ofn_path}")
|
|
1182
1240
|
self.write_ofn(self._ofn_path)
|
|
1183
|
-
if write_obograph and (not self._obograph_path.
|
|
1241
|
+
if write_obograph and (not self._obograph_path.is_file() or force):
|
|
1184
1242
|
if obograph_use_internal:
|
|
1185
|
-
tqdm.write(f"[{self.
|
|
1243
|
+
tqdm.write(f"[{self._prefix_version}] writing OBO Graph to {self._obograph_path}")
|
|
1186
1244
|
self.write_obograph(self._obograph_path)
|
|
1187
1245
|
else:
|
|
1188
1246
|
import bioontologies.robot
|
|
@@ -1193,22 +1251,22 @@ class Obo:
|
|
|
1193
1251
|
bioontologies.robot.convert(
|
|
1194
1252
|
self._ofn_path, self._obograph_path, debug=True, merge=False, reason=False
|
|
1195
1253
|
)
|
|
1196
|
-
if write_owl and (not self._owl_path.
|
|
1197
|
-
tqdm.write(f"[{self.
|
|
1254
|
+
if write_owl and (not self._owl_path.is_file() or force):
|
|
1255
|
+
tqdm.write(f"[{self._prefix_version}] writing OWL to {self._owl_path}")
|
|
1198
1256
|
import bioontologies.robot
|
|
1199
1257
|
|
|
1200
1258
|
bioontologies.robot.convert(
|
|
1201
1259
|
self._ofn_path, self._owl_path, debug=True, merge=False, reason=False
|
|
1202
1260
|
)
|
|
1203
|
-
if write_ttl and (not self._ttl_path.
|
|
1204
|
-
tqdm.write(f"[{self.
|
|
1261
|
+
if write_ttl and (not self._ttl_path.is_file() or force):
|
|
1262
|
+
tqdm.write(f"[{self._prefix_version}] writing Turtle to {self._ttl_path}")
|
|
1205
1263
|
self.write_rdf(self._ttl_path)
|
|
1206
|
-
if write_obonet and (not self._obonet_gz_path.
|
|
1207
|
-
tqdm.write(f"[{self.
|
|
1264
|
+
if write_obonet and (not self._obonet_gz_path.is_file() or force):
|
|
1265
|
+
tqdm.write(f"[{self._prefix_version}] writing obonet to {self._obonet_gz_path}")
|
|
1208
1266
|
self.write_obonet_gz(self._obonet_gz_path)
|
|
1209
1267
|
if write_nodes:
|
|
1210
1268
|
nodes_path = self._get_cache_path(CacheArtifact.nodes)
|
|
1211
|
-
tqdm.write(f"[{self.
|
|
1269
|
+
tqdm.write(f"[{self._prefix_version}] writing nodes TSV to {nodes_path}")
|
|
1212
1270
|
self.write_nodes(nodes_path)
|
|
1213
1271
|
|
|
1214
1272
|
@property
|
|
@@ -1335,9 +1393,8 @@ class Obo:
|
|
|
1335
1393
|
rv.add_edge(_source, _target, key=_key)
|
|
1336
1394
|
|
|
1337
1395
|
logger.info(
|
|
1338
|
-
"[%s
|
|
1339
|
-
self.
|
|
1340
|
-
self.data_version,
|
|
1396
|
+
"[%s] exported graph with %d nodes",
|
|
1397
|
+
self._prefix_version,
|
|
1341
1398
|
rv.number_of_nodes(),
|
|
1342
1399
|
)
|
|
1343
1400
|
return rv
|
pyobo/utils/cache.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Utilities for caching files."""
|
|
2
2
|
|
|
3
|
-
import gzip
|
|
4
3
|
import json
|
|
5
4
|
import logging
|
|
6
5
|
from collections.abc import Iterable, Mapping
|
|
@@ -14,7 +13,7 @@ from pystow.cache import CachedDataFrame as cached_df # noqa:N813
|
|
|
14
13
|
from pystow.cache import CachedJSON as cached_json # noqa:N813
|
|
15
14
|
from pystow.cache import CachedPickle as cached_pickle # noqa:N813
|
|
16
15
|
|
|
17
|
-
from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
|
|
16
|
+
from .io import open_map_tsv, open_multimap_tsv, safe_open, write_map_tsv, write_multimap_tsv
|
|
18
17
|
|
|
19
18
|
__all__ = [
|
|
20
19
|
"cached_collection",
|
|
@@ -70,13 +69,13 @@ NODE_LINK_STYLE = "links" # TODO update to "edges"
|
|
|
70
69
|
|
|
71
70
|
def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
|
|
72
71
|
"""Read a graph that's gzipped nodelink."""
|
|
73
|
-
with
|
|
72
|
+
with safe_open(path, read=True) as file:
|
|
74
73
|
return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
|
|
75
74
|
|
|
76
75
|
|
|
77
76
|
def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
|
|
78
77
|
"""Write a graph as gzipped nodelink."""
|
|
79
|
-
with
|
|
78
|
+
with safe_open(path, read=False) as file:
|
|
80
79
|
json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)
|
|
81
80
|
|
|
82
81
|
|
pyobo/utils/io.py
CHANGED
|
@@ -1,26 +1,28 @@
|
|
|
1
1
|
"""I/O utilities."""
|
|
2
2
|
|
|
3
3
|
import collections.abc
|
|
4
|
+
import contextlib
|
|
4
5
|
import csv
|
|
5
6
|
import gzip
|
|
6
7
|
import logging
|
|
7
8
|
from collections import defaultdict
|
|
8
|
-
from collections.abc import Iterable, Mapping
|
|
9
|
+
from collections.abc import Generator, Iterable, Mapping
|
|
9
10
|
from contextlib import contextmanager
|
|
10
11
|
from pathlib import Path
|
|
11
|
-
from typing import TypeVar
|
|
12
|
+
from typing import Literal, TextIO, TypeVar
|
|
12
13
|
|
|
13
14
|
import pandas as pd
|
|
14
15
|
from tqdm.auto import tqdm
|
|
15
16
|
|
|
16
17
|
__all__ = [
|
|
17
18
|
"get_reader",
|
|
18
|
-
"get_writer",
|
|
19
19
|
"multidict",
|
|
20
20
|
"multisetdict",
|
|
21
21
|
"open_map_tsv",
|
|
22
22
|
"open_multimap_tsv",
|
|
23
23
|
"open_reader",
|
|
24
|
+
"safe_open",
|
|
25
|
+
"safe_open_writer",
|
|
24
26
|
"write_iterable_tsv",
|
|
25
27
|
"write_map_tsv",
|
|
26
28
|
"write_multimap_tsv",
|
|
@@ -36,7 +38,7 @@ Y = TypeVar("Y")
|
|
|
36
38
|
def open_reader(path: str | Path, sep: str = "\t"):
|
|
37
39
|
"""Open a file and get a reader for it."""
|
|
38
40
|
path = Path(path)
|
|
39
|
-
with
|
|
41
|
+
with safe_open(path, read=True) as file:
|
|
40
42
|
yield get_reader(file, sep=sep)
|
|
41
43
|
|
|
42
44
|
|
|
@@ -45,16 +47,11 @@ def get_reader(x, sep: str = "\t"):
|
|
|
45
47
|
return csv.reader(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
|
|
46
48
|
|
|
47
49
|
|
|
48
|
-
def get_writer(x, sep: str = "\t"):
|
|
49
|
-
"""Get a :func:`csv.writer` with PyOBO default settings."""
|
|
50
|
-
return csv.writer(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
|
|
51
|
-
|
|
52
|
-
|
|
53
50
|
def open_map_tsv(
|
|
54
51
|
path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
|
|
55
52
|
) -> Mapping[str, str]:
|
|
56
53
|
"""Load a mapping TSV file into a dictionary."""
|
|
57
|
-
with
|
|
54
|
+
with safe_open(path, read=True) as file:
|
|
58
55
|
if has_header:
|
|
59
56
|
next(file) # throw away header
|
|
60
57
|
if use_tqdm:
|
|
@@ -84,9 +81,12 @@ def _help_multimap_tsv(
|
|
|
84
81
|
use_tqdm: bool = False,
|
|
85
82
|
has_header: bool = True,
|
|
86
83
|
) -> Iterable[tuple[str, str]]:
|
|
87
|
-
with
|
|
84
|
+
with safe_open(path, read=True) as file:
|
|
88
85
|
if has_header:
|
|
89
|
-
|
|
86
|
+
try:
|
|
87
|
+
next(file) # throw away header
|
|
88
|
+
except gzip.BadGzipFile as e:
|
|
89
|
+
raise ValueError(f"could not open file {path}") from e
|
|
90
90
|
if use_tqdm:
|
|
91
91
|
file = tqdm(file, desc=f"loading TSV from {path}")
|
|
92
92
|
yield from get_reader(file)
|
|
@@ -145,8 +145,32 @@ def write_iterable_tsv(
|
|
|
145
145
|
"""Write a mapping dictionary to a TSV file."""
|
|
146
146
|
it = (row for row in it if all(cell is not None for cell in row))
|
|
147
147
|
it = sorted(it)
|
|
148
|
-
with
|
|
149
|
-
writer = get_writer(file, sep=sep)
|
|
148
|
+
with safe_open_writer(path, delimiter=sep) as writer:
|
|
150
149
|
if header is not None:
|
|
151
150
|
writer.writerow(header)
|
|
152
151
|
writer.writerows(it)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@contextlib.contextmanager
|
|
155
|
+
def safe_open(
|
|
156
|
+
path: str | Path, read: bool, encoding: str | None = None
|
|
157
|
+
) -> Generator[TextIO, None, None]:
|
|
158
|
+
"""Safely open a file for reading or writing text."""
|
|
159
|
+
path = Path(path).expanduser().resolve()
|
|
160
|
+
mode: Literal["rt", "wt"] = "rt" if read else "wt"
|
|
161
|
+
if path.suffix.endswith(".gz"):
|
|
162
|
+
with gzip.open(path, mode=mode, encoding=encoding) as file:
|
|
163
|
+
yield file
|
|
164
|
+
else:
|
|
165
|
+
with open(path, mode=mode) as file:
|
|
166
|
+
yield file
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@contextlib.contextmanager
|
|
170
|
+
def safe_open_writer(f: str | Path | TextIO, *, delimiter: str = "\t"): # type:ignore
|
|
171
|
+
"""Open a CSV writer, wrapping :func:`csv.writer`."""
|
|
172
|
+
if isinstance(f, str | Path):
|
|
173
|
+
with safe_open(f, read=False) as file:
|
|
174
|
+
yield csv.writer(file, delimiter=delimiter)
|
|
175
|
+
else:
|
|
176
|
+
yield csv.writer(f, delimiter=delimiter)
|
pyobo/utils/path.py
CHANGED
|
@@ -99,25 +99,22 @@ def ensure_df(
|
|
|
99
99
|
class CacheArtifact(enum.Enum):
|
|
100
100
|
"""An enumeration for."""
|
|
101
101
|
|
|
102
|
-
names = "names.tsv"
|
|
103
|
-
definitions = "definitions.tsv"
|
|
104
|
-
species = "species.tsv"
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
nodes = "nodes.tsv"
|
|
120
|
-
edges = "edges.tsv"
|
|
102
|
+
names = "names.tsv.gz"
|
|
103
|
+
definitions = "definitions.tsv.gz"
|
|
104
|
+
species = "species.tsv.gz"
|
|
105
|
+
mappings = "mappings.tsv.gz"
|
|
106
|
+
relations = "relations.tsv.gz"
|
|
107
|
+
alts = "alt_ids.tsv.gz"
|
|
108
|
+
typedefs = "typedefs.tsv.gz"
|
|
109
|
+
literal_mappings = "literal_mappings.tsv.gz"
|
|
110
|
+
references = "references.tsv.gz"
|
|
111
|
+
obsoletes = "obsolete.tsv.gz"
|
|
112
|
+
|
|
113
|
+
literal_properties = "literal_properties.tsv.gz"
|
|
114
|
+
object_properties = "object_properties.tsv.gz"
|
|
115
|
+
|
|
116
|
+
nodes = "nodes.tsv.gz"
|
|
117
|
+
edges = "edges.tsv.gz"
|
|
121
118
|
|
|
122
119
|
prefixes = "prefixes.json"
|
|
123
120
|
metadata = "metadata.json"
|