pyobo 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +6 -0
- pyobo/api/__init__.py +3 -0
- pyobo/api/embedding.py +118 -0
- pyobo/api/utils.py +0 -10
- pyobo/cli/cli.py +1 -6
- pyobo/cli/database.py +7 -1
- pyobo/constants.py +23 -0
- pyobo/getters.py +52 -35
- pyobo/identifier_utils/api.py +3 -1
- pyobo/sources/__init__.py +14 -1
- pyobo/sources/chembl/__init__.py +6 -0
- pyobo/sources/chembl/chembl_cell.py +94 -0
- pyobo/sources/chembl/chembl_mechanism.py +81 -0
- pyobo/sources/chembl/chembl_tissue.py +70 -0
- pyobo/sources/clinicaltrials.py +32 -33
- pyobo/sources/complexportal.py +5 -1
- pyobo/sources/drugcentral.py +2 -1
- pyobo/sources/hgnc/hgnc.py +13 -6
- pyobo/sources/iana_media_type.py +100 -0
- pyobo/sources/mesh.py +82 -29
- pyobo/sources/reactome.py +10 -3
- pyobo/sources/spdx.py +89 -0
- pyobo/sources/uniprot/uniprot.py +2 -2
- pyobo/sources/wikipathways.py +92 -7
- pyobo/struct/__init__.py +2 -0
- pyobo/struct/functional/dsl.py +10 -1
- pyobo/struct/functional/ontology.py +3 -3
- pyobo/struct/obo/reader.py +17 -53
- pyobo/struct/obograph/export.py +2 -2
- pyobo/struct/struct.py +125 -8
- pyobo/struct/struct_utils.py +10 -0
- pyobo/struct/typedef.py +15 -3
- pyobo/struct/vocabulary.py +8 -0
- pyobo/utils/cache.py +4 -3
- pyobo/utils/io.py +18 -56
- pyobo/utils/misc.py +142 -1
- pyobo/utils/path.py +34 -2
- pyobo/version.py +1 -1
- {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/METADATA +11 -7
- {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/RECORD +44 -38
- {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/WHEEL +0 -0
- {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/entry_points.txt +0 -0
- {pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/licenses/LICENSE +0 -0
pyobo/struct/struct.py
CHANGED
|
@@ -8,6 +8,7 @@ import json
|
|
|
8
8
|
import logging
|
|
9
9
|
import os
|
|
10
10
|
import sys
|
|
11
|
+
import tempfile
|
|
11
12
|
import warnings
|
|
12
13
|
from collections import ChainMap, defaultdict
|
|
13
14
|
from collections.abc import Callable, Collection, Iterable, Iterator, Mapping, Sequence
|
|
@@ -25,6 +26,7 @@ import ssslm
|
|
|
25
26
|
from curies import Converter, ReferenceTuple
|
|
26
27
|
from curies import vocabulary as _cv
|
|
27
28
|
from more_click import force_option, verbose_option
|
|
29
|
+
from pystow.utils import safe_open
|
|
28
30
|
from tqdm.auto import tqdm
|
|
29
31
|
from typing_extensions import Self
|
|
30
32
|
|
|
@@ -70,7 +72,7 @@ from ..constants import (
|
|
|
70
72
|
TARGET_PREFIX,
|
|
71
73
|
)
|
|
72
74
|
from ..utils.cache import write_gzipped_graph
|
|
73
|
-
from ..utils.io import multidict,
|
|
75
|
+
from ..utils.io import multidict, write_iterable_tsv
|
|
74
76
|
from ..utils.path import (
|
|
75
77
|
CacheArtifact,
|
|
76
78
|
get_cache_path,
|
|
@@ -87,6 +89,7 @@ __all__ = [
|
|
|
87
89
|
"TypeDef",
|
|
88
90
|
"abbreviation",
|
|
89
91
|
"acronym",
|
|
92
|
+
"build_ontology",
|
|
90
93
|
"make_ad_hoc_ontology",
|
|
91
94
|
]
|
|
92
95
|
|
|
@@ -746,13 +749,23 @@ class Obo:
|
|
|
746
749
|
help="Re-process the data, but don't download it again.",
|
|
747
750
|
)
|
|
748
751
|
@click.option("--owl", is_flag=True, help="Write OWL via ROBOT")
|
|
752
|
+
@click.option("--obo", is_flag=True, help="Write OBO")
|
|
749
753
|
@click.option("--ofn", is_flag=True, help="Write Functional OWL (OFN)")
|
|
750
754
|
@click.option("--ttl", is_flag=True, help="Write turtle RDF via OFN")
|
|
755
|
+
@click.option("--cache/--no-cache", is_flag=True, help="Write the cache", default=True)
|
|
751
756
|
@click.option(
|
|
752
757
|
"--version", help="Specify data version to get. Use this if bioversions is acting up."
|
|
753
758
|
)
|
|
754
|
-
def _main(
|
|
755
|
-
|
|
759
|
+
def _main(
|
|
760
|
+
force: bool,
|
|
761
|
+
obo: bool,
|
|
762
|
+
owl: bool,
|
|
763
|
+
ofn: bool,
|
|
764
|
+
ttl: bool,
|
|
765
|
+
version: str | None,
|
|
766
|
+
rewrite: bool,
|
|
767
|
+
cache: bool,
|
|
768
|
+
) -> None:
|
|
756
769
|
try:
|
|
757
770
|
inst = cls(force=force, data_version=version)
|
|
758
771
|
except Exception as e:
|
|
@@ -760,13 +773,14 @@ class Obo:
|
|
|
760
773
|
sys.exit(1)
|
|
761
774
|
inst.write_default(
|
|
762
775
|
write_obograph=False,
|
|
763
|
-
write_obo=
|
|
776
|
+
write_obo=obo,
|
|
764
777
|
write_owl=owl,
|
|
765
778
|
write_ofn=ofn,
|
|
766
779
|
write_ttl=ttl,
|
|
767
780
|
write_nodes=True,
|
|
768
781
|
force=force or rewrite,
|
|
769
782
|
use_tqdm=True,
|
|
783
|
+
write_cache=cache,
|
|
770
784
|
)
|
|
771
785
|
|
|
772
786
|
return _main
|
|
@@ -909,6 +923,8 @@ class Obo:
|
|
|
909
923
|
end = f'"{obo_escape_slim(value.value)}" {reference_escape(value.datatype, ontology_prefix=self.ontology)}'
|
|
910
924
|
case Reference():
|
|
911
925
|
end = reference_escape(value, ontology_prefix=self.ontology)
|
|
926
|
+
case _:
|
|
927
|
+
raise TypeError(f"Invalid property value: {value}")
|
|
912
928
|
yield f"property_value: {reference_escape(predicate, ontology_prefix=self.ontology)} {end}"
|
|
913
929
|
|
|
914
930
|
def _iterate_property_pairs(self) -> Iterable[Annotation]:
|
|
@@ -925,10 +941,21 @@ class Obo:
|
|
|
925
941
|
license_literal = OBOLiteral.string(license_spdx_id)
|
|
926
942
|
yield Annotation(v.has_license, license_literal)
|
|
927
943
|
|
|
928
|
-
# Description
|
|
929
944
|
if description := bioregistry.get_description(self.ontology):
|
|
930
|
-
description = obo_escape_slim(description.strip())
|
|
931
945
|
yield Annotation(v.has_description, OBOLiteral.string(description.strip()))
|
|
946
|
+
if homepage := bioregistry.get_homepage(self.ontology):
|
|
947
|
+
yield Annotation(v.has_homepage, OBOLiteral.uri(homepage))
|
|
948
|
+
if repository := bioregistry.get_repository(self.ontology):
|
|
949
|
+
yield Annotation(v.has_repository, OBOLiteral.uri(repository))
|
|
950
|
+
if logo := bioregistry.get_logo(self.ontology):
|
|
951
|
+
yield Annotation(v.has_logo, OBOLiteral.uri(logo))
|
|
952
|
+
if mailing_list := bioregistry.get_mailing_list(self.ontology):
|
|
953
|
+
yield Annotation(v.has_mailing_list, OBOLiteral.string(mailing_list))
|
|
954
|
+
if (maintainer := bioregistry.get_contact(self.ontology)) and maintainer.orcid:
|
|
955
|
+
yield Annotation(
|
|
956
|
+
v.has_maintainer,
|
|
957
|
+
Reference(prefix="orcid", identifier=maintainer.orcid, name=maintainer.name),
|
|
958
|
+
)
|
|
932
959
|
|
|
933
960
|
# Root terms
|
|
934
961
|
for root_term in self.root_terms or []:
|
|
@@ -973,7 +1000,7 @@ class Obo:
|
|
|
973
1000
|
unit="line",
|
|
974
1001
|
)
|
|
975
1002
|
if isinstance(file, str | Path | os.PathLike):
|
|
976
|
-
with safe_open(file,
|
|
1003
|
+
with safe_open(file, operation="write") as fh:
|
|
977
1004
|
self._write_lines(it, fh)
|
|
978
1005
|
else:
|
|
979
1006
|
self._write_lines(it, file)
|
|
@@ -995,6 +1022,15 @@ class Obo:
|
|
|
995
1022
|
ofn = get_ofn_from_obo(self)
|
|
996
1023
|
ofn.write_funowl(path)
|
|
997
1024
|
|
|
1025
|
+
def write_owl(self, path: str | Path) -> None:
|
|
1026
|
+
"""Write OWL, by first outputting OFN then converting with ROBOT."""
|
|
1027
|
+
from bioontologies import robot
|
|
1028
|
+
|
|
1029
|
+
with tempfile.TemporaryDirectory() as directory:
|
|
1030
|
+
ofn_path = Path(directory).joinpath("tmp.ofn")
|
|
1031
|
+
self.write_ofn(ofn_path)
|
|
1032
|
+
robot.convert(ofn_path, path)
|
|
1033
|
+
|
|
998
1034
|
def write_rdf(self, path: str | Path) -> None:
|
|
999
1035
|
"""Write as Turtle RDF."""
|
|
1000
1036
|
from .functional.obo_to_functional import get_ofn_from_obo
|
|
@@ -1149,7 +1185,7 @@ class Obo:
|
|
|
1149
1185
|
metadata = self.get_metadata()
|
|
1150
1186
|
for path in (self._root_metadata_path, self._get_cache_path(CacheArtifact.metadata)):
|
|
1151
1187
|
logger.debug("[%s] caching metadata to %s", self._prefix_version, path)
|
|
1152
|
-
with safe_open(path,
|
|
1188
|
+
with safe_open(path, operation="write") as file:
|
|
1153
1189
|
json.dump(metadata, file, indent=2)
|
|
1154
1190
|
|
|
1155
1191
|
def write_prefix_map(self) -> None:
|
|
@@ -2265,6 +2301,87 @@ class AdHocOntologyBase(Obo):
|
|
|
2265
2301
|
"""A base class for ad-hoc ontologies."""
|
|
2266
2302
|
|
|
2267
2303
|
|
|
2304
|
+
def build_ontology(
|
|
2305
|
+
prefix: str,
|
|
2306
|
+
*,
|
|
2307
|
+
terms: list[Term] | None = None,
|
|
2308
|
+
synonym_typedefs: list[SynonymTypeDef] | None = None,
|
|
2309
|
+
typedefs: list[TypeDef] | None = None,
|
|
2310
|
+
name: str | None = None, # inferred
|
|
2311
|
+
version: str | None = None,
|
|
2312
|
+
idspaces: dict[str, str] | None = None,
|
|
2313
|
+
root_terms: list[Reference] | None = None,
|
|
2314
|
+
subsetdefs: list[tuple[Reference, str]] | None = None,
|
|
2315
|
+
properties: list[Annotation] | None = None,
|
|
2316
|
+
imports: list[str] | None = None,
|
|
2317
|
+
description: str | None = None,
|
|
2318
|
+
homepage: str | None = None,
|
|
2319
|
+
mailing_list: str | None = None,
|
|
2320
|
+
logo: str | None = None,
|
|
2321
|
+
repository: str | None = None,
|
|
2322
|
+
) -> Obo:
|
|
2323
|
+
"""Build an ontology from parts."""
|
|
2324
|
+
if name is None:
|
|
2325
|
+
name = bioregistry.get_name(prefix)
|
|
2326
|
+
# TODO auto-populate license and other properties
|
|
2327
|
+
|
|
2328
|
+
if properties is None:
|
|
2329
|
+
properties = []
|
|
2330
|
+
if typedefs is None:
|
|
2331
|
+
typedefs = []
|
|
2332
|
+
|
|
2333
|
+
if description:
|
|
2334
|
+
from .typedef import has_description
|
|
2335
|
+
|
|
2336
|
+
properties.append(Annotation.string(has_description.reference, description))
|
|
2337
|
+
if has_description not in typedefs:
|
|
2338
|
+
typedefs.append(has_description) # TODO get proper typedef
|
|
2339
|
+
|
|
2340
|
+
if homepage:
|
|
2341
|
+
from .typedef import has_homepage
|
|
2342
|
+
|
|
2343
|
+
properties.append(Annotation.uri(has_homepage.reference, homepage))
|
|
2344
|
+
if has_homepage not in typedefs:
|
|
2345
|
+
typedefs.append(has_homepage)
|
|
2346
|
+
|
|
2347
|
+
if logo:
|
|
2348
|
+
from .typedef import has_depiction
|
|
2349
|
+
|
|
2350
|
+
properties.append(Annotation.uri(has_depiction.reference, logo))
|
|
2351
|
+
if has_depiction not in typedefs:
|
|
2352
|
+
typedefs.append(has_depiction)
|
|
2353
|
+
|
|
2354
|
+
if mailing_list:
|
|
2355
|
+
from .typedef import has_mailing_list
|
|
2356
|
+
|
|
2357
|
+
properties.append(Annotation.string(has_mailing_list.reference, mailing_list))
|
|
2358
|
+
if has_mailing_list not in typedefs:
|
|
2359
|
+
typedefs.append(has_mailing_list)
|
|
2360
|
+
|
|
2361
|
+
if repository:
|
|
2362
|
+
from .typedef import has_repository
|
|
2363
|
+
|
|
2364
|
+
properties.append(Annotation.uri(has_repository.reference, repository))
|
|
2365
|
+
if has_repository not in typedefs:
|
|
2366
|
+
typedefs.append(has_repository)
|
|
2367
|
+
|
|
2368
|
+
return make_ad_hoc_ontology(
|
|
2369
|
+
_ontology=prefix,
|
|
2370
|
+
_name=name,
|
|
2371
|
+
# _auto_generated_by
|
|
2372
|
+
_typedefs=typedefs,
|
|
2373
|
+
_synonym_typedefs=synonym_typedefs,
|
|
2374
|
+
# _date: datetime.datetime | None = None,
|
|
2375
|
+
_data_version=version,
|
|
2376
|
+
_idspaces=idspaces,
|
|
2377
|
+
_root_terms=root_terms,
|
|
2378
|
+
_subsetdefs=subsetdefs,
|
|
2379
|
+
_property_values=properties,
|
|
2380
|
+
_imports=imports,
|
|
2381
|
+
terms=terms,
|
|
2382
|
+
)
|
|
2383
|
+
|
|
2384
|
+
|
|
2268
2385
|
def make_ad_hoc_ontology(
|
|
2269
2386
|
_ontology: str,
|
|
2270
2387
|
_name: str | None = None,
|
pyobo/struct/struct_utils.py
CHANGED
|
@@ -63,6 +63,16 @@ class Annotation(NamedTuple):
|
|
|
63
63
|
"""Return a literal property for a float."""
|
|
64
64
|
return cls(predicate, OBOLiteral.float(value))
|
|
65
65
|
|
|
66
|
+
@classmethod
|
|
67
|
+
def uri(cls, predicate: Reference, uri: str) -> Self:
|
|
68
|
+
"""Return a literal property for a URI."""
|
|
69
|
+
return cls(predicate, OBOLiteral.uri(uri))
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def string(cls, predicate: Reference, value: str, *, language: str | None = None) -> Self:
|
|
73
|
+
"""Return a literal property for a float."""
|
|
74
|
+
return cls(predicate, OBOLiteral.string(value, language=language))
|
|
75
|
+
|
|
66
76
|
@staticmethod
|
|
67
77
|
def _sort_key(x: Annotation):
|
|
68
78
|
return x.predicate, _reference_or_literal_key(x.value)
|
pyobo/struct/typedef.py
CHANGED
|
@@ -15,7 +15,9 @@ __all__ = [
|
|
|
15
15
|
"alternative_term",
|
|
16
16
|
"broad_match",
|
|
17
17
|
"close_match",
|
|
18
|
+
"contributes_to_condition",
|
|
18
19
|
"default_typedefs",
|
|
20
|
+
"derives_from_organism",
|
|
19
21
|
"editor_note",
|
|
20
22
|
"enables",
|
|
21
23
|
"exact_match",
|
|
@@ -24,10 +26,12 @@ __all__ = [
|
|
|
24
26
|
"gene_product_member_of",
|
|
25
27
|
"has_contributor",
|
|
26
28
|
"has_dbxref",
|
|
29
|
+
"has_depiction",
|
|
27
30
|
"has_end_date",
|
|
28
31
|
"has_gene_product",
|
|
29
32
|
"has_homepage",
|
|
30
33
|
"has_inchi",
|
|
34
|
+
"has_mailbox",
|
|
31
35
|
"has_mature",
|
|
32
36
|
"has_member",
|
|
33
37
|
"has_part",
|
|
@@ -103,12 +107,18 @@ has_component = TypeDef(
|
|
|
103
107
|
derives_from = TypeDef(
|
|
104
108
|
reference=Reference(prefix=RO_PREFIX, identifier="0001000", name="derives from"),
|
|
105
109
|
)
|
|
110
|
+
derives_from_organism = TypeDef(
|
|
111
|
+
reference=Reference(prefix="CLO", identifier="0037207", name="derives from organism")
|
|
112
|
+
)
|
|
106
113
|
molecularly_interacts_with = TypeDef(
|
|
107
114
|
reference=Reference(prefix=RO_PREFIX, identifier="0002436", name="molecularly interacts with"),
|
|
108
115
|
)
|
|
109
116
|
located_in = TypeDef(
|
|
110
117
|
reference=Reference(prefix=RO_PREFIX, identifier="0001025", name="located in"),
|
|
111
118
|
)
|
|
119
|
+
contributes_to_condition = TypeDef(
|
|
120
|
+
reference=Reference(prefix=RO_PREFIX, identifier="0003304", name="contributes to condition"),
|
|
121
|
+
)
|
|
112
122
|
exact_match = TypeDef(reference=v.exact_match, is_metadata_tag=True)
|
|
113
123
|
narrow_match = TypeDef(reference=v.narrow_match, is_metadata_tag=True)
|
|
114
124
|
broad_match = TypeDef(reference=v.broad_match, is_metadata_tag=True)
|
|
@@ -257,9 +267,11 @@ has_smiles = TypeDef(reference=v.has_smiles, is_metadata_tag=True).append_xref(v
|
|
|
257
267
|
|
|
258
268
|
has_inchi = TypeDef(reference=v.has_inchi, is_metadata_tag=True).append_xref(v.debio_has_inchi)
|
|
259
269
|
|
|
260
|
-
has_homepage = TypeDef(
|
|
261
|
-
|
|
262
|
-
)
|
|
270
|
+
has_homepage = TypeDef(reference=v.has_homepage, is_metadata_tag=True)
|
|
271
|
+
has_depiction = TypeDef(reference=v.has_depiction, is_metadata_tag=True)
|
|
272
|
+
has_mailbox = TypeDef(reference=v.has_mailbox, is_metadata_tag=True)
|
|
273
|
+
has_mailing_list = TypeDef(reference=v.has_mailing_list, is_metadata_tag=True)
|
|
274
|
+
has_repository = TypeDef(reference=v.has_repository, is_metadata_tag=True)
|
|
263
275
|
|
|
264
276
|
has_category = TypeDef(
|
|
265
277
|
reference=Reference(prefix="biolink", identifier="category", name="has category"),
|
pyobo/struct/vocabulary.py
CHANGED
|
@@ -90,6 +90,14 @@ has_description = _c(_v.has_description)
|
|
|
90
90
|
has_license = _c(_v.has_license)
|
|
91
91
|
has_title = _c(_v.has_title)
|
|
92
92
|
|
|
93
|
+
has_homepage = Reference(prefix="foaf", identifier="homepage", name="has homepage")
|
|
94
|
+
has_logo = Reference(prefix="foaf", identifier="logo", name="has logo")
|
|
95
|
+
has_mailbox = Reference(prefix="foaf", identifier="mbox", name="has mailbox")
|
|
96
|
+
has_depiction = Reference(prefix="foaf", identifier="depicted_by", name="depicted by")
|
|
97
|
+
has_repository = Reference(prefix="doap", identifier="repository", name="has repository")
|
|
98
|
+
has_mailing_list = Reference(prefix="doap", identifier="mailing-list", name="has mailing list")
|
|
99
|
+
has_maintainer = Reference(prefix="doap", identifier="maintainer", name="has maintainer")
|
|
100
|
+
|
|
93
101
|
has_part = Reference(prefix=BFO_PREFIX, identifier="0000051", name="has part")
|
|
94
102
|
part_of = Reference(prefix=BFO_PREFIX, identifier="0000050", name="part of")
|
|
95
103
|
orthologous = Reference(
|
pyobo/utils/cache.py
CHANGED
|
@@ -12,8 +12,9 @@ from pystow.cache import CachedCollection as cached_collection # noqa:N813
|
|
|
12
12
|
from pystow.cache import CachedDataFrame as cached_df # noqa:N813
|
|
13
13
|
from pystow.cache import CachedJSON as cached_json # noqa:N813
|
|
14
14
|
from pystow.cache import CachedPickle as cached_pickle # noqa:N813
|
|
15
|
+
from pystow.utils import safe_open
|
|
15
16
|
|
|
16
|
-
from .io import open_map_tsv, open_multimap_tsv,
|
|
17
|
+
from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
|
|
17
18
|
|
|
18
19
|
__all__ = [
|
|
19
20
|
"cached_collection",
|
|
@@ -69,13 +70,13 @@ NODE_LINK_STYLE = "links" # TODO update to "edges"
|
|
|
69
70
|
|
|
70
71
|
def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
|
|
71
72
|
"""Read a graph that's gzipped nodelink."""
|
|
72
|
-
with safe_open(path, read
|
|
73
|
+
with safe_open(path, operation="read") as file:
|
|
73
74
|
return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
|
|
74
75
|
|
|
75
76
|
|
|
76
77
|
def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
|
|
77
78
|
"""Write a graph as gzipped nodelink."""
|
|
78
|
-
with safe_open(path,
|
|
79
|
+
with safe_open(path, operation="write") as file:
|
|
79
80
|
json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)
|
|
80
81
|
|
|
81
82
|
|
pyobo/utils/io.py
CHANGED
|
@@ -1,27 +1,24 @@
|
|
|
1
1
|
"""I/O utilities."""
|
|
2
2
|
|
|
3
3
|
import collections.abc
|
|
4
|
-
import contextlib
|
|
5
|
-
import csv
|
|
6
4
|
import gzip
|
|
7
5
|
import logging
|
|
8
6
|
from collections import defaultdict
|
|
9
7
|
from collections.abc import Generator, Iterable, Mapping
|
|
10
8
|
from contextlib import contextmanager
|
|
11
9
|
from pathlib import Path
|
|
12
|
-
from typing import
|
|
10
|
+
from typing import TypeVar, cast
|
|
13
11
|
|
|
14
12
|
import pandas as pd
|
|
13
|
+
import pystow.utils
|
|
14
|
+
from pystow.utils import safe_open_reader, safe_open_writer
|
|
15
15
|
from tqdm.auto import tqdm
|
|
16
16
|
|
|
17
17
|
__all__ = [
|
|
18
|
-
"get_reader",
|
|
19
18
|
"multidict",
|
|
20
19
|
"multisetdict",
|
|
21
20
|
"open_map_tsv",
|
|
22
21
|
"open_multimap_tsv",
|
|
23
|
-
"open_reader",
|
|
24
|
-
"safe_open",
|
|
25
22
|
"safe_open_writer",
|
|
26
23
|
"write_iterable_tsv",
|
|
27
24
|
"write_map_tsv",
|
|
@@ -34,35 +31,22 @@ X = TypeVar("X")
|
|
|
34
31
|
Y = TypeVar("Y")
|
|
35
32
|
|
|
36
33
|
|
|
37
|
-
@contextmanager
|
|
38
|
-
def open_reader(path: str | Path, sep: str = "\t"):
|
|
39
|
-
"""Open a file and get a reader for it."""
|
|
40
|
-
path = Path(path)
|
|
41
|
-
with safe_open(path, read=True) as file:
|
|
42
|
-
yield get_reader(file, sep=sep)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def get_reader(x, sep: str = "\t"):
|
|
46
|
-
"""Get a :func:`csv.reader` with PyOBO default settings."""
|
|
47
|
-
return csv.reader(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
|
|
48
|
-
|
|
49
|
-
|
|
50
34
|
def open_map_tsv(
|
|
51
35
|
path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
|
|
52
36
|
) -> Mapping[str, str]:
|
|
53
37
|
"""Load a mapping TSV file into a dictionary."""
|
|
54
|
-
|
|
38
|
+
rv = {}
|
|
39
|
+
with pystow.utils.safe_open_reader(path) as reader:
|
|
55
40
|
if has_header:
|
|
56
|
-
next(
|
|
41
|
+
next(reader) # throw away header
|
|
57
42
|
if use_tqdm:
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
for row in get_reader(file):
|
|
43
|
+
reader = tqdm(reader, desc=f"loading TSV from {path}")
|
|
44
|
+
for row in reader:
|
|
61
45
|
if len(row) != 2:
|
|
62
46
|
logger.warning("[%s] malformed row can not be put in dict: %s", path, row)
|
|
63
47
|
continue
|
|
64
48
|
rv[row[0]] = row[1]
|
|
65
|
-
|
|
49
|
+
return rv
|
|
66
50
|
|
|
67
51
|
|
|
68
52
|
def open_multimap_tsv(
|
|
@@ -72,24 +56,27 @@ def open_multimap_tsv(
|
|
|
72
56
|
has_header: bool = True,
|
|
73
57
|
) -> Mapping[str, list[str]]:
|
|
74
58
|
"""Load a mapping TSV file that has multiple mappings for each."""
|
|
75
|
-
|
|
59
|
+
with _help_multimap_tsv(path=path, use_tqdm=use_tqdm, has_header=has_header) as file:
|
|
60
|
+
return multidict(file)
|
|
76
61
|
|
|
77
62
|
|
|
63
|
+
@contextmanager
|
|
78
64
|
def _help_multimap_tsv(
|
|
79
65
|
path: str | Path,
|
|
80
66
|
*,
|
|
81
67
|
use_tqdm: bool = False,
|
|
82
68
|
has_header: bool = True,
|
|
83
|
-
) -> Iterable[tuple[str, str]]:
|
|
84
|
-
with
|
|
69
|
+
) -> Generator[Iterable[tuple[str, str]], None, None]:
|
|
70
|
+
with safe_open_reader(path) as reader:
|
|
85
71
|
if has_header:
|
|
86
72
|
try:
|
|
87
|
-
next(
|
|
73
|
+
next(reader) # throw away header
|
|
88
74
|
except gzip.BadGzipFile as e:
|
|
89
75
|
raise ValueError(f"could not open file {path}") from e
|
|
90
76
|
if use_tqdm:
|
|
91
|
-
|
|
92
|
-
|
|
77
|
+
yield tqdm(reader, desc=f"loading TSV from {path}")
|
|
78
|
+
else:
|
|
79
|
+
yield cast(Iterable[tuple[str, str]], reader)
|
|
93
80
|
|
|
94
81
|
|
|
95
82
|
def multidict(pairs: Iterable[tuple[X, Y]]) -> Mapping[X, list[Y]]:
|
|
@@ -149,28 +136,3 @@ def write_iterable_tsv(
|
|
|
149
136
|
if header is not None:
|
|
150
137
|
writer.writerow(header)
|
|
151
138
|
writer.writerows(it)
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
@contextlib.contextmanager
|
|
155
|
-
def safe_open(
|
|
156
|
-
path: str | Path, read: bool, encoding: str | None = None
|
|
157
|
-
) -> Generator[TextIO, None, None]:
|
|
158
|
-
"""Safely open a file for reading or writing text."""
|
|
159
|
-
path = Path(path).expanduser().resolve()
|
|
160
|
-
mode: Literal["rt", "wt"] = "rt" if read else "wt"
|
|
161
|
-
if path.suffix.endswith(".gz"):
|
|
162
|
-
with gzip.open(path, mode=mode, encoding=encoding) as file:
|
|
163
|
-
yield file
|
|
164
|
-
else:
|
|
165
|
-
with open(path, mode=mode) as file:
|
|
166
|
-
yield file
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
@contextlib.contextmanager
|
|
170
|
-
def safe_open_writer(f: str | Path | TextIO, *, delimiter: str = "\t"): # type:ignore
|
|
171
|
-
"""Open a CSV writer, wrapping :func:`csv.writer`."""
|
|
172
|
-
if isinstance(f, str | Path):
|
|
173
|
-
with safe_open(f, read=False) as file:
|
|
174
|
-
yield csv.writer(file, delimiter=delimiter)
|
|
175
|
-
else:
|
|
176
|
-
yield csv.writer(f, delimiter=delimiter)
|
pyobo/utils/misc.py
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
"""Miscellaneous utilities."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import logging
|
|
6
|
+
from collections.abc import Callable, Iterable
|
|
4
7
|
from datetime import datetime
|
|
5
8
|
|
|
9
|
+
import bioversions.utils
|
|
10
|
+
|
|
11
|
+
from pyobo.constants import ONTOLOGY_GETTERS, OntologyFormat
|
|
12
|
+
|
|
6
13
|
__all__ = [
|
|
14
|
+
"VERSION_GETTERS",
|
|
7
15
|
"cleanup_version",
|
|
8
16
|
]
|
|
9
17
|
|
|
@@ -15,8 +23,11 @@ BIZARRE_LOGGED = set()
|
|
|
15
23
|
VERSION_REWRITES = {
|
|
16
24
|
"$Date: 2009/11/15 10:54:12 $": "2009-11-15", # for owl
|
|
17
25
|
"http://www.w3.org/2006/time#2016": "2016", # for time
|
|
26
|
+
"https://purl.org/ontology/modalia#1.0.0": "1.0.0", # for dalia
|
|
27
|
+
}
|
|
28
|
+
STATIC_VERSION_REWRITES = {
|
|
29
|
+
"orth": "2",
|
|
18
30
|
}
|
|
19
|
-
STATIC_VERSION_REWRITES = {"orth": "2"}
|
|
20
31
|
VERSION_PREFIXES = [
|
|
21
32
|
"http://www.orpha.net/version",
|
|
22
33
|
"https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
|
|
@@ -27,17 +38,36 @@ VERSION_PREFIXES = [
|
|
|
27
38
|
"http://purl.dataone.org/odo/SASAP/", # like in http://purl.dataone.org/odo/SASAP/0.3.1
|
|
28
39
|
"http://purl.dataone.org/odo/SENSO/", # like in http://purl.dataone.org/odo/SENSO/0.1.0
|
|
29
40
|
"https://purl.dataone.org/odo/ADCAD/",
|
|
41
|
+
"http://identifiers.org/combine.specifications/teddy.rel-",
|
|
42
|
+
"https://nfdi.fiz-karlsruhe.de/ontology/",
|
|
43
|
+
"http://www.w3.org/ns/prov-",
|
|
44
|
+
"https://raw.githubusercontent.com/enpadasi/Ontology-for-Nutritional-Studies/releases/download/v",
|
|
45
|
+
"http://purl.jp/bio/4/ontology/iobc/", # like http://purl.jp/bio/4/ontology/iobc/1.6.0
|
|
46
|
+
"http://w3id.org/nfdi4ing/metadata4ing/", # like http://w3id.org/nfdi4ing/metadata4ing/1.3.1
|
|
47
|
+
"http://www.semanticweb.com/OntoRxn/", # like http://www.semanticweb.com/OntoRxn/0.2.5
|
|
48
|
+
"https://w3id.org/lehrplan/ontology/", # like in https://w3id.org/lehrplan/ontology/1.0.0-4
|
|
49
|
+
"http://www.ebi.ac.uk/swo/version/", # http://www.ebi.ac.uk/swo/version/6.0
|
|
50
|
+
"https://w3id.org/emi/version/",
|
|
30
51
|
]
|
|
31
52
|
VERSION_PREFIX_SPLITS = [
|
|
32
53
|
"http://www.ebi.ac.uk/efo/releases/v",
|
|
33
54
|
"http://www.ebi.ac.uk/swo/swo.owl/",
|
|
34
55
|
"http://semanticscience.org/ontology/sio/v",
|
|
35
56
|
"http://ontology.neuinfo.org/NIF/ttl/nif/version/",
|
|
57
|
+
"http://nmrml.org/cv/v", # as in http://nmrml.org/cv/v1.1.0/nmrCV
|
|
58
|
+
"http://enanomapper.github.io/ontologies/releases/", # as in http://enanomapper.github.io/ontologies/releases/10.0/enanomapper
|
|
36
59
|
]
|
|
60
|
+
BAD = {
|
|
61
|
+
"http://purl.obolibrary.org/obo",
|
|
62
|
+
"http://www.bioassayontology.org/bao/bao_complete",
|
|
63
|
+
}
|
|
37
64
|
|
|
38
65
|
|
|
39
66
|
def cleanup_version(data_version: str, prefix: str) -> str:
|
|
40
67
|
"""Clean the version information."""
|
|
68
|
+
# in case a literal string that wasn't parsed properly gets put in
|
|
69
|
+
data_version = data_version.strip('"')
|
|
70
|
+
|
|
41
71
|
if data_version in VERSION_REWRITES:
|
|
42
72
|
return VERSION_REWRITES[data_version]
|
|
43
73
|
|
|
@@ -74,3 +104,114 @@ def cleanup_version(data_version: str, prefix: str) -> str:
|
|
|
74
104
|
logger.debug("[%s] bizarre version: %s", prefix, data_version)
|
|
75
105
|
BIZARRE_LOGGED.add((prefix, data_version))
|
|
76
106
|
return data_version
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _get_obo_version(prefix: str, url: str, *, max_lines: int = 200) -> str | None:
|
|
110
|
+
rv = bioversions.utils.get_obo_version(url, max_lines=max_lines)
|
|
111
|
+
if rv is None:
|
|
112
|
+
return None
|
|
113
|
+
return cleanup_version(rv, prefix)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _get_owl_version(prefix: str, url: str, *, max_lines: int = 200) -> str | None:
|
|
117
|
+
rv = bioversions.utils.get_owl_xml_version(url, max_lines=max_lines)
|
|
118
|
+
if rv is None:
|
|
119
|
+
return None
|
|
120
|
+
return cleanup_version(rv, prefix)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _get_obograph_json_version(prefix: str, url: str) -> str | None:
|
|
124
|
+
rv = bioversions.utils.get_obograph_json_version(url)
|
|
125
|
+
if rv is None:
|
|
126
|
+
return None
|
|
127
|
+
return cleanup_version(rv, prefix)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
#: A mapping from data type to gersion getter function
|
|
131
|
+
VERSION_GETTERS: dict[OntologyFormat, Callable[[str, str], str | None]] = {
|
|
132
|
+
"obo": _get_obo_version,
|
|
133
|
+
"owl": _get_owl_version,
|
|
134
|
+
"json": _get_obograph_json_version,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _prioritize_version(
|
|
139
|
+
data_version: str | None,
|
|
140
|
+
ontology_prefix: str,
|
|
141
|
+
version: str | None,
|
|
142
|
+
date: datetime | None,
|
|
143
|
+
) -> str | None:
|
|
144
|
+
"""Process version information coming from several sources and normalize them."""
|
|
145
|
+
if ontology_prefix in STATIC_VERSION_REWRITES:
|
|
146
|
+
return STATIC_VERSION_REWRITES[ontology_prefix]
|
|
147
|
+
|
|
148
|
+
if version:
|
|
149
|
+
if version in BAD:
|
|
150
|
+
logger.debug("[%s] had known bad version, returning None: ", ontology_prefix, version)
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
clean_injected_version = cleanup_version(version, prefix=ontology_prefix)
|
|
154
|
+
if not data_version:
|
|
155
|
+
logger.debug(
|
|
156
|
+
"[%s] did not have a version, overriding with %s",
|
|
157
|
+
ontology_prefix,
|
|
158
|
+
clean_injected_version,
|
|
159
|
+
)
|
|
160
|
+
return clean_injected_version
|
|
161
|
+
|
|
162
|
+
clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
|
|
163
|
+
if clean_data_version != clean_injected_version:
|
|
164
|
+
# in this case, we're going to trust the one that's passed
|
|
165
|
+
# through explicitly more than the graph's content
|
|
166
|
+
logger.debug(
|
|
167
|
+
"[%s] had version %s, overriding with %s",
|
|
168
|
+
ontology_prefix,
|
|
169
|
+
data_version,
|
|
170
|
+
version,
|
|
171
|
+
)
|
|
172
|
+
return clean_injected_version
|
|
173
|
+
|
|
174
|
+
if data_version:
|
|
175
|
+
if data_version in BAD:
|
|
176
|
+
logger.debug(
|
|
177
|
+
"[%s] had known bad version, returning None: ", ontology_prefix, data_version
|
|
178
|
+
)
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
|
|
182
|
+
logger.debug("[%s] using version %s", ontology_prefix, clean_data_version)
|
|
183
|
+
return clean_data_version
|
|
184
|
+
|
|
185
|
+
if date is not None:
|
|
186
|
+
derived_date_version = date.strftime("%Y-%m-%d")
|
|
187
|
+
logger.debug(
|
|
188
|
+
"[%s] does not report a version. falling back to date: %s",
|
|
189
|
+
ontology_prefix,
|
|
190
|
+
derived_date_version,
|
|
191
|
+
)
|
|
192
|
+
return derived_date_version
|
|
193
|
+
|
|
194
|
+
logger.debug("[%s] does not report a version nor a date", ontology_prefix)
|
|
195
|
+
return None
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _get_getter_urls(prefix: str) -> Iterable[tuple[OntologyFormat, str]]:
|
|
199
|
+
# assume that all possible files that can be downloaded
|
|
200
|
+
# are in sync and have the same version
|
|
201
|
+
for ontology_format, get_url_func in ONTOLOGY_GETTERS:
|
|
202
|
+
url = get_url_func(prefix)
|
|
203
|
+
if url is None:
|
|
204
|
+
continue
|
|
205
|
+
yield ontology_format, url
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _get_version_from_artifact(prefix: str) -> str | None:
|
|
209
|
+
for ontology_format, url in _get_getter_urls(prefix):
|
|
210
|
+
# Try to peak into the file to get the version without fully downloading
|
|
211
|
+
get_version_func = VERSION_GETTERS.get(ontology_format)
|
|
212
|
+
if get_version_func is None:
|
|
213
|
+
continue
|
|
214
|
+
version = get_version_func(prefix, url)
|
|
215
|
+
if version:
|
|
216
|
+
return cleanup_version(version, prefix=prefix)
|
|
217
|
+
return None
|