pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -117
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +107 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +210 -160
- pyobo/cli/database_utils.py +155 -0
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +209 -191
- pyobo/gilda_utils.py +52 -250
- pyobo/identifier_utils/__init__.py +33 -0
- pyobo/identifier_utils/api.py +305 -0
- pyobo/identifier_utils/preprocessing.json +873 -0
- pyobo/identifier_utils/preprocessing.py +27 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +48 -40
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1354 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +9 -6
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +8 -13
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +11 -4
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +272 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1484 -657
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +16 -15
- pyobo/utils/io.py +51 -41
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +73 -70
- pyobo/version.py +3 -3
- pyobo-0.12.1.dist-info/METADATA +671 -0
- pyobo-0.12.1.dist-info/RECORD +201 -0
- pyobo-0.12.1.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo/xrefdb/xrefs_pipeline.py +0 -180
- pyobo-0.11.2.dist-info/METADATA +0 -711
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/utils/cache.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
"""Utilities for caching files."""
|
|
2
2
|
|
|
3
|
-
import gzip
|
|
4
3
|
import json
|
|
5
4
|
import logging
|
|
6
|
-
import os
|
|
7
5
|
from collections.abc import Iterable, Mapping
|
|
8
6
|
from pathlib import Path
|
|
9
|
-
from typing import Generic, TypeVar
|
|
7
|
+
from typing import Generic, TypeVar
|
|
10
8
|
|
|
11
9
|
import networkx as nx
|
|
12
10
|
from pystow.cache import Cached
|
|
@@ -15,18 +13,18 @@ from pystow.cache import CachedDataFrame as cached_df # noqa:N813
|
|
|
15
13
|
from pystow.cache import CachedJSON as cached_json # noqa:N813
|
|
16
14
|
from pystow.cache import CachedPickle as cached_pickle # noqa:N813
|
|
17
15
|
|
|
18
|
-
from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
|
|
16
|
+
from .io import open_map_tsv, open_multimap_tsv, safe_open, write_map_tsv, write_multimap_tsv
|
|
19
17
|
|
|
20
18
|
__all__ = [
|
|
21
|
-
# from pystow
|
|
22
|
-
"cached_json",
|
|
23
19
|
"cached_collection",
|
|
24
20
|
"cached_df",
|
|
25
|
-
"cached_pickle",
|
|
26
21
|
# implemented here
|
|
27
22
|
"cached_graph",
|
|
23
|
+
# from pystow
|
|
24
|
+
"cached_json",
|
|
28
25
|
"cached_mapping",
|
|
29
26
|
"cached_multidict",
|
|
27
|
+
"cached_pickle",
|
|
30
28
|
]
|
|
31
29
|
|
|
32
30
|
logger = logging.getLogger(__name__)
|
|
@@ -39,14 +37,15 @@ class _CachedMapping(Cached[X], Generic[X]):
|
|
|
39
37
|
|
|
40
38
|
def __init__(
|
|
41
39
|
self,
|
|
42
|
-
path:
|
|
40
|
+
path: str | Path,
|
|
43
41
|
header: Iterable[str],
|
|
44
42
|
*,
|
|
45
43
|
use_tqdm: bool = False,
|
|
46
44
|
force: bool = False,
|
|
45
|
+
cache: bool = True,
|
|
47
46
|
):
|
|
48
47
|
"""Initialize the mapping cache."""
|
|
49
|
-
super().__init__(path=path, force=force)
|
|
48
|
+
super().__init__(path=path, cache=cache, force=force)
|
|
50
49
|
self.header = header
|
|
51
50
|
self.use_tqdm = use_tqdm
|
|
52
51
|
|
|
@@ -65,17 +64,19 @@ class CachedMapping(_CachedMapping[Mapping[str, str]]):
|
|
|
65
64
|
|
|
66
65
|
cached_mapping = CachedMapping
|
|
67
66
|
|
|
67
|
+
NODE_LINK_STYLE = "links" # TODO update to "edges"
|
|
68
|
+
|
|
68
69
|
|
|
69
|
-
def get_gzipped_graph(path:
|
|
70
|
+
def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
|
|
70
71
|
"""Read a graph that's gzipped nodelink."""
|
|
71
|
-
with
|
|
72
|
-
return nx.node_link_graph(json.load(file))
|
|
72
|
+
with safe_open(path, read=True) as file:
|
|
73
|
+
return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
|
|
73
74
|
|
|
74
75
|
|
|
75
|
-
def write_gzipped_graph(graph: nx.MultiDiGraph, path:
|
|
76
|
+
def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
|
|
76
77
|
"""Write a graph as gzipped nodelink."""
|
|
77
|
-
with
|
|
78
|
-
json.dump(nx.node_link_data(graph), file)
|
|
78
|
+
with safe_open(path, read=False) as file:
|
|
79
|
+
json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)
|
|
79
80
|
|
|
80
81
|
|
|
81
82
|
class CachedGraph(Cached[nx.MultiDiGraph]):
|
pyobo/utils/io.py
CHANGED
|
@@ -1,33 +1,31 @@
|
|
|
1
1
|
"""I/O utilities."""
|
|
2
2
|
|
|
3
3
|
import collections.abc
|
|
4
|
+
import contextlib
|
|
4
5
|
import csv
|
|
5
6
|
import gzip
|
|
6
7
|
import logging
|
|
7
|
-
import time
|
|
8
8
|
from collections import defaultdict
|
|
9
|
-
from collections.abc import Iterable, Mapping
|
|
9
|
+
from collections.abc import Generator, Iterable, Mapping
|
|
10
10
|
from contextlib import contextmanager
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import
|
|
13
|
-
from xml.etree.ElementTree import Element
|
|
12
|
+
from typing import Literal, TextIO, TypeVar
|
|
14
13
|
|
|
15
14
|
import pandas as pd
|
|
16
|
-
from lxml import etree
|
|
17
15
|
from tqdm.auto import tqdm
|
|
18
16
|
|
|
19
17
|
__all__ = [
|
|
20
|
-
"
|
|
21
|
-
"open_multimap_tsv",
|
|
18
|
+
"get_reader",
|
|
22
19
|
"multidict",
|
|
23
20
|
"multisetdict",
|
|
21
|
+
"open_map_tsv",
|
|
22
|
+
"open_multimap_tsv",
|
|
23
|
+
"open_reader",
|
|
24
|
+
"safe_open",
|
|
25
|
+
"safe_open_writer",
|
|
26
|
+
"write_iterable_tsv",
|
|
24
27
|
"write_map_tsv",
|
|
25
28
|
"write_multimap_tsv",
|
|
26
|
-
"write_iterable_tsv",
|
|
27
|
-
"parse_xml_gz",
|
|
28
|
-
"get_writer",
|
|
29
|
-
"open_reader",
|
|
30
|
-
"get_reader",
|
|
31
29
|
]
|
|
32
30
|
|
|
33
31
|
logger = logging.getLogger(__name__)
|
|
@@ -37,10 +35,10 @@ Y = TypeVar("Y")
|
|
|
37
35
|
|
|
38
36
|
|
|
39
37
|
@contextmanager
|
|
40
|
-
def open_reader(path:
|
|
38
|
+
def open_reader(path: str | Path, sep: str = "\t"):
|
|
41
39
|
"""Open a file and get a reader for it."""
|
|
42
40
|
path = Path(path)
|
|
43
|
-
with
|
|
41
|
+
with safe_open(path, read=True) as file:
|
|
44
42
|
yield get_reader(file, sep=sep)
|
|
45
43
|
|
|
46
44
|
|
|
@@ -49,16 +47,11 @@ def get_reader(x, sep: str = "\t"):
|
|
|
49
47
|
return csv.reader(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
|
|
50
48
|
|
|
51
49
|
|
|
52
|
-
def get_writer(x, sep: str = "\t"):
|
|
53
|
-
"""Get a :func:`csv.writer` with PyOBO default settings."""
|
|
54
|
-
return csv.writer(x, delimiter=sep, quoting=csv.QUOTE_MINIMAL)
|
|
55
|
-
|
|
56
|
-
|
|
57
50
|
def open_map_tsv(
|
|
58
|
-
path:
|
|
51
|
+
path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
|
|
59
52
|
) -> Mapping[str, str]:
|
|
60
53
|
"""Load a mapping TSV file into a dictionary."""
|
|
61
|
-
with
|
|
54
|
+
with safe_open(path, read=True) as file:
|
|
62
55
|
if has_header:
|
|
63
56
|
next(file) # throw away header
|
|
64
57
|
if use_tqdm:
|
|
@@ -73,7 +66,7 @@ def open_map_tsv(
|
|
|
73
66
|
|
|
74
67
|
|
|
75
68
|
def open_multimap_tsv(
|
|
76
|
-
path:
|
|
69
|
+
path: str | Path,
|
|
77
70
|
*,
|
|
78
71
|
use_tqdm: bool = False,
|
|
79
72
|
has_header: bool = True,
|
|
@@ -83,14 +76,17 @@ def open_multimap_tsv(
|
|
|
83
76
|
|
|
84
77
|
|
|
85
78
|
def _help_multimap_tsv(
|
|
86
|
-
path:
|
|
79
|
+
path: str | Path,
|
|
87
80
|
*,
|
|
88
81
|
use_tqdm: bool = False,
|
|
89
82
|
has_header: bool = True,
|
|
90
83
|
) -> Iterable[tuple[str, str]]:
|
|
91
|
-
with
|
|
84
|
+
with safe_open(path, read=True) as file:
|
|
92
85
|
if has_header:
|
|
93
|
-
|
|
86
|
+
try:
|
|
87
|
+
next(file) # throw away header
|
|
88
|
+
except gzip.BadGzipFile as e:
|
|
89
|
+
raise ValueError(f"could not open file {path}") from e
|
|
94
90
|
if use_tqdm:
|
|
95
91
|
file = tqdm(file, desc=f"loading TSV from {path}")
|
|
96
92
|
yield from get_reader(file)
|
|
@@ -115,9 +111,9 @@ def multisetdict(pairs: Iterable[tuple[X, Y]]) -> dict[X, set[Y]]:
|
|
|
115
111
|
|
|
116
112
|
def write_map_tsv(
|
|
117
113
|
*,
|
|
118
|
-
path:
|
|
119
|
-
header:
|
|
120
|
-
rv:
|
|
114
|
+
path: str | Path,
|
|
115
|
+
header: Iterable[str] | None = None,
|
|
116
|
+
rv: Iterable[tuple[str, str]] | Mapping[str, str],
|
|
121
117
|
sep: str = "\t",
|
|
122
118
|
) -> None:
|
|
123
119
|
"""Write a mapping dictionary to a TSV file."""
|
|
@@ -129,7 +125,7 @@ def write_map_tsv(
|
|
|
129
125
|
|
|
130
126
|
def write_multimap_tsv(
|
|
131
127
|
*,
|
|
132
|
-
path:
|
|
128
|
+
path: str | Path,
|
|
133
129
|
header: Iterable[str],
|
|
134
130
|
rv: Mapping[str, list[str]],
|
|
135
131
|
sep: str = "\t",
|
|
@@ -141,26 +137,40 @@ def write_multimap_tsv(
|
|
|
141
137
|
|
|
142
138
|
def write_iterable_tsv(
|
|
143
139
|
*,
|
|
144
|
-
path:
|
|
145
|
-
header:
|
|
140
|
+
path: str | Path,
|
|
141
|
+
header: Iterable[str] | None = None,
|
|
146
142
|
it: Iterable[tuple[str, ...]],
|
|
147
143
|
sep: str = "\t",
|
|
148
144
|
) -> None:
|
|
149
145
|
"""Write a mapping dictionary to a TSV file."""
|
|
150
146
|
it = (row for row in it if all(cell is not None for cell in row))
|
|
151
147
|
it = sorted(it)
|
|
152
|
-
with
|
|
153
|
-
writer = get_writer(file, sep=sep)
|
|
148
|
+
with safe_open_writer(path, delimiter=sep) as writer:
|
|
154
149
|
if header is not None:
|
|
155
150
|
writer.writerow(header)
|
|
156
151
|
writer.writerows(it)
|
|
157
152
|
|
|
158
153
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
path =
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
154
|
+
@contextlib.contextmanager
|
|
155
|
+
def safe_open(
|
|
156
|
+
path: str | Path, read: bool, encoding: str | None = None
|
|
157
|
+
) -> Generator[TextIO, None, None]:
|
|
158
|
+
"""Safely open a file for reading or writing text."""
|
|
159
|
+
path = Path(path).expanduser().resolve()
|
|
160
|
+
mode: Literal["rt", "wt"] = "rt" if read else "wt"
|
|
161
|
+
if path.suffix.endswith(".gz"):
|
|
162
|
+
with gzip.open(path, mode=mode, encoding=encoding) as file:
|
|
163
|
+
yield file
|
|
164
|
+
else:
|
|
165
|
+
with open(path, mode=mode) as file:
|
|
166
|
+
yield file
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@contextlib.contextmanager
|
|
170
|
+
def safe_open_writer(f: str | Path | TextIO, *, delimiter: str = "\t"): # type:ignore
|
|
171
|
+
"""Open a CSV writer, wrapping :func:`csv.writer`."""
|
|
172
|
+
if isinstance(f, str | Path):
|
|
173
|
+
with safe_open(f, read=False) as file:
|
|
174
|
+
yield csv.writer(file, delimiter=delimiter)
|
|
175
|
+
else:
|
|
176
|
+
yield csv.writer(f, delimiter=delimiter)
|
pyobo/utils/iter.py
CHANGED
|
@@ -8,8 +8,8 @@ from typing import TypeVar
|
|
|
8
8
|
from more_itertools import peekable
|
|
9
9
|
|
|
10
10
|
__all__ = [
|
|
11
|
-
"iterate_together",
|
|
12
11
|
"iterate_gzips_together",
|
|
12
|
+
"iterate_together",
|
|
13
13
|
]
|
|
14
14
|
|
|
15
15
|
X = TypeVar("X")
|
|
@@ -20,9 +20,9 @@ Y = TypeVar("Y")
|
|
|
20
20
|
def iterate_gzips_together(a_path, b_path) -> Iterable[tuple[str, str, list[str]]]:
|
|
21
21
|
"""Iterate over two gzipped files together."""
|
|
22
22
|
with gzip.open(a_path, mode="rt", errors="ignore") as a, gzip.open(b_path, mode="rt") as b:
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
yield from iterate_together(
|
|
23
|
+
a_reader = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
|
|
24
|
+
b_reader = csv.reader(b, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
|
|
25
|
+
yield from iterate_together(a_reader, b_reader) # type:ignore
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def iterate_together(
|
|
@@ -38,7 +38,7 @@ def iterate_together(
|
|
|
38
38
|
- Each key in the index is present within both files
|
|
39
39
|
"""
|
|
40
40
|
b_peekable = peekable(b)
|
|
41
|
-
b_index
|
|
41
|
+
b_index: X | type[_Done] = b_peekable.peek()[0]
|
|
42
42
|
|
|
43
43
|
for a_index, a_value in a:
|
|
44
44
|
zs = []
|
pyobo/utils/misc.py
CHANGED
|
@@ -1,79 +1,67 @@
|
|
|
1
1
|
"""Miscellaneous utilities."""
|
|
2
2
|
|
|
3
|
-
import gzip
|
|
4
3
|
import logging
|
|
5
|
-
import os
|
|
6
4
|
from datetime import datetime
|
|
7
|
-
from subprocess import check_output
|
|
8
|
-
from typing import Optional
|
|
9
5
|
|
|
10
6
|
__all__ = [
|
|
11
|
-
"obo_to_obograph",
|
|
12
|
-
"obo_to_owl",
|
|
13
7
|
"cleanup_version",
|
|
14
8
|
]
|
|
15
9
|
|
|
16
|
-
|
|
17
10
|
logger = logging.getLogger(__name__)
|
|
18
11
|
|
|
19
|
-
|
|
20
|
-
def obo_to_obograph(obo_path, obograph_path) -> None:
|
|
21
|
-
"""Convert an OBO file to OBO Graph file with pronto."""
|
|
22
|
-
import pronto
|
|
23
|
-
|
|
24
|
-
ontology = pronto.Ontology(obo_path)
|
|
25
|
-
with gzip.open(obograph_path, "wb") as file:
|
|
26
|
-
ontology.dump(file, format="json")
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def obo_to_owl(obo_path, owl_path, owl_format: str = "ofn"):
|
|
30
|
-
"""Convert an OBO file to an OWL file with ROBOT."""
|
|
31
|
-
args = ["robot", "convert", "-i", obo_path, "-o", owl_path, "--format", owl_format]
|
|
32
|
-
ret = check_output( # noqa:S603
|
|
33
|
-
args,
|
|
34
|
-
cwd=os.path.dirname(__file__),
|
|
35
|
-
)
|
|
36
|
-
return ret.decode()
|
|
37
|
-
|
|
38
|
-
|
|
39
12
|
BIZARRE_LOGGED = set()
|
|
40
13
|
|
|
14
|
+
#: Rewrites for mostly static resources that have weird quirks
|
|
15
|
+
VERSION_REWRITES = {
|
|
16
|
+
"$Date: 2009/11/15 10:54:12 $": "2009-11-15", # for owl
|
|
17
|
+
"http://www.w3.org/2006/time#2016": "2016", # for time
|
|
18
|
+
}
|
|
19
|
+
STATIC_VERSION_REWRITES = {"orth": "2"}
|
|
20
|
+
VERSION_PREFIXES = [
|
|
21
|
+
"http://www.orpha.net/version",
|
|
22
|
+
"https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
|
|
23
|
+
"http://humanbehaviourchange.org/ontology/bcio.owl/",
|
|
24
|
+
"http://purl.org/pav/",
|
|
25
|
+
"http://identifiers.org/combine.specifications/teddy.rel-",
|
|
26
|
+
"https://purl.dataone.org/odo/MOSAIC/",
|
|
27
|
+
"http://purl.dataone.org/odo/SASAP/", # like in http://purl.dataone.org/odo/SASAP/0.3.1
|
|
28
|
+
"http://purl.dataone.org/odo/SENSO/", # like in http://purl.dataone.org/odo/SENSO/0.1.0
|
|
29
|
+
"https://purl.dataone.org/odo/ADCAD/",
|
|
30
|
+
]
|
|
31
|
+
VERSION_PREFIX_SPLITS = [
|
|
32
|
+
"http://www.ebi.ac.uk/efo/releases/v",
|
|
33
|
+
"http://www.ebi.ac.uk/swo/swo.owl/",
|
|
34
|
+
"http://semanticscience.org/ontology/sio/v",
|
|
35
|
+
"http://ontology.neuinfo.org/NIF/ttl/nif/version/",
|
|
36
|
+
]
|
|
37
|
+
|
|
41
38
|
|
|
42
|
-
def cleanup_version(data_version: str, prefix: str) ->
|
|
39
|
+
def cleanup_version(data_version: str, prefix: str) -> str:
|
|
43
40
|
"""Clean the version information."""
|
|
44
|
-
if data_version
|
|
45
|
-
|
|
41
|
+
if data_version in VERSION_REWRITES:
|
|
42
|
+
return VERSION_REWRITES[data_version]
|
|
43
|
+
|
|
44
|
+
data_version = data_version.removesuffix(".owl")
|
|
46
45
|
if data_version.endswith(prefix):
|
|
47
46
|
data_version = data_version[: -len(prefix)]
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
return "2"
|
|
47
|
+
data_version = data_version.removesuffix("/")
|
|
48
|
+
|
|
49
|
+
data_version = data_version.removeprefix("releases/")
|
|
50
|
+
data_version = data_version.removeprefix("release/")
|
|
53
51
|
|
|
54
|
-
|
|
55
|
-
"http://www.orpha.net/version",
|
|
56
|
-
"https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
|
|
57
|
-
"http://humanbehaviourchange.org/ontology/bcio.owl/",
|
|
58
|
-
"http://purl.org/pav/",
|
|
59
|
-
"http://identifiers.org/combine.specifications/teddy.rel-",
|
|
60
|
-
]
|
|
61
|
-
for version_prefix in version_prefixes:
|
|
52
|
+
for version_prefix in VERSION_PREFIXES:
|
|
62
53
|
if data_version.startswith(version_prefix):
|
|
63
|
-
return data_version
|
|
54
|
+
return data_version.removeprefix(version_prefix)
|
|
64
55
|
|
|
65
|
-
|
|
66
|
-
"http://www.ebi.ac.uk/efo/releases/v",
|
|
67
|
-
"http://www.ebi.ac.uk/swo/swo.owl/",
|
|
68
|
-
"http://semanticscience.org/ontology/sio/v",
|
|
69
|
-
"http://ontology.neuinfo.org/NIF/ttl/nif/version/",
|
|
70
|
-
]
|
|
71
|
-
for version_prefix_split in version_prefixes_split:
|
|
56
|
+
for version_prefix_split in VERSION_PREFIX_SPLITS:
|
|
72
57
|
if data_version.startswith(version_prefix_split):
|
|
73
|
-
return data_version
|
|
58
|
+
return data_version.removeprefix(version_prefix_split).split("/")[0]
|
|
74
59
|
|
|
60
|
+
# use a heuristic to determine if the version is one of
|
|
61
|
+
# consecutive, major.minor, or semantic versioning (i.e., major.minor.patch)
|
|
75
62
|
if data_version.replace(".", "").isnumeric():
|
|
76
|
-
return data_version
|
|
63
|
+
return data_version
|
|
64
|
+
|
|
77
65
|
for v in reversed(data_version.split("/")):
|
|
78
66
|
v = v.strip()
|
|
79
67
|
try:
|
pyobo/utils/ndex_utils.py
CHANGED
|
File without changes
|
pyobo/utils/path.py
CHANGED
|
@@ -1,60 +1,42 @@
|
|
|
1
1
|
"""Utilities for building paths."""
|
|
2
2
|
|
|
3
|
+
import enum
|
|
3
4
|
import logging
|
|
4
5
|
from pathlib import Path
|
|
5
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, Literal
|
|
6
7
|
|
|
7
8
|
import pandas as pd
|
|
8
|
-
import
|
|
9
|
-
from pystow
|
|
9
|
+
from curies import Reference
|
|
10
|
+
from pystow import VersionHint
|
|
10
11
|
|
|
11
|
-
from
|
|
12
|
-
from ..constants import RAW_MODULE
|
|
12
|
+
from ..constants import CACHE_SUBDIRECTORY_NAME, RAW_MODULE, RELATION_SUBDIRECTORY_NAME
|
|
13
13
|
|
|
14
14
|
__all__ = [
|
|
15
|
-
"
|
|
16
|
-
"prefix_directory_join",
|
|
17
|
-
"prefix_cache_join",
|
|
18
|
-
"get_prefix_obo_path",
|
|
19
|
-
"ensure_path",
|
|
15
|
+
"CacheArtifact",
|
|
20
16
|
"ensure_df",
|
|
21
|
-
"
|
|
17
|
+
"ensure_path",
|
|
18
|
+
"get_cache_path",
|
|
19
|
+
"get_relation_cache_path",
|
|
20
|
+
"prefix_directory_join",
|
|
22
21
|
]
|
|
23
22
|
|
|
24
23
|
logger = logging.getLogger(__name__)
|
|
25
24
|
|
|
26
|
-
VersionHint = Union[None, str, Callable[[], Optional[str]]]
|
|
27
|
-
|
|
28
|
-
requests_ftp.monkeypatch_session()
|
|
29
|
-
|
|
30
25
|
|
|
31
26
|
def prefix_directory_join(
|
|
32
27
|
prefix: str,
|
|
33
28
|
*parts: str,
|
|
34
|
-
name:
|
|
29
|
+
name: str | None = None,
|
|
35
30
|
version: VersionHint = None,
|
|
36
31
|
ensure_exists: bool = True,
|
|
37
32
|
) -> Path:
|
|
38
33
|
"""Join in the prefix directory."""
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
version
|
|
44
|
-
|
|
45
|
-
elif not isinstance(version, str):
|
|
46
|
-
raise TypeError(f"Invalid type: {version} ({type(version)})")
|
|
47
|
-
if version is None:
|
|
48
|
-
raise AssertionError
|
|
49
|
-
version = cleanup_version(version, prefix=prefix)
|
|
50
|
-
if version is not None and "/" in version:
|
|
51
|
-
raise ValueError(f"[{prefix}] Can not have slash in version: {version}")
|
|
52
|
-
return RAW_MODULE.join(prefix, version, *parts, name=name, ensure_exists=ensure_exists)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def get_prefix_obo_path(prefix: str, version: VersionHint = None, ext: str = "obo") -> Path:
|
|
56
|
-
"""Get the canonical path to the OBO file."""
|
|
57
|
-
return prefix_directory_join(prefix, name=f"{prefix}.{ext}", version=version)
|
|
34
|
+
return RAW_MODULE.module(prefix).join(
|
|
35
|
+
*parts,
|
|
36
|
+
name=name,
|
|
37
|
+
ensure_exists=ensure_exists,
|
|
38
|
+
version=version,
|
|
39
|
+
)
|
|
58
40
|
|
|
59
41
|
|
|
60
42
|
def ensure_path(
|
|
@@ -62,36 +44,29 @@ def ensure_path(
|
|
|
62
44
|
*parts: str,
|
|
63
45
|
url: str,
|
|
64
46
|
version: VersionHint = None,
|
|
65
|
-
name:
|
|
47
|
+
name: str | None = None,
|
|
66
48
|
force: bool = False,
|
|
67
|
-
error_on_missing: bool = False,
|
|
68
49
|
backend: Literal["requests", "urllib"] = "urllib",
|
|
69
50
|
verify: bool = True,
|
|
70
|
-
|
|
51
|
+
**download_kwargs: Any,
|
|
52
|
+
) -> Path:
|
|
71
53
|
"""Download a file if it doesn't exist."""
|
|
72
|
-
if name is None:
|
|
73
|
-
name = name_from_url(url)
|
|
74
|
-
|
|
75
|
-
path = prefix_directory_join(prefix, *parts, name=name, version=version)
|
|
76
|
-
|
|
77
|
-
if not path.exists() and error_on_missing:
|
|
78
|
-
raise FileNotFoundError
|
|
79
|
-
|
|
80
|
-
kwargs: dict[str, Any]
|
|
81
54
|
if verify:
|
|
82
|
-
|
|
55
|
+
download_kwargs = {"backend": backend}
|
|
83
56
|
else:
|
|
84
57
|
if backend != "requests":
|
|
85
58
|
logger.warning("using requests since verify=False")
|
|
86
|
-
|
|
59
|
+
download_kwargs = {"backend": "requests", "verify": False}
|
|
87
60
|
|
|
88
|
-
|
|
61
|
+
path = RAW_MODULE.module(prefix).ensure(
|
|
62
|
+
*parts,
|
|
89
63
|
url=url,
|
|
90
|
-
|
|
64
|
+
name=name,
|
|
91
65
|
force=force,
|
|
92
|
-
|
|
66
|
+
version=version,
|
|
67
|
+
download_kwargs=download_kwargs,
|
|
93
68
|
)
|
|
94
|
-
return path
|
|
69
|
+
return path
|
|
95
70
|
|
|
96
71
|
|
|
97
72
|
def ensure_df(
|
|
@@ -99,7 +74,7 @@ def ensure_df(
|
|
|
99
74
|
*parts: str,
|
|
100
75
|
url: str,
|
|
101
76
|
version: VersionHint = None,
|
|
102
|
-
name:
|
|
77
|
+
name: str | None = None,
|
|
103
78
|
force: bool = False,
|
|
104
79
|
sep: str = "\t",
|
|
105
80
|
dtype=str,
|
|
@@ -121,21 +96,49 @@ def ensure_df(
|
|
|
121
96
|
return pd.read_csv(_path, sep=sep, dtype=dtype, **kwargs)
|
|
122
97
|
|
|
123
98
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
99
|
+
class CacheArtifact(enum.Enum):
|
|
100
|
+
"""An enumeration for."""
|
|
101
|
+
|
|
102
|
+
names = "names.tsv.gz"
|
|
103
|
+
definitions = "definitions.tsv.gz"
|
|
104
|
+
species = "species.tsv.gz"
|
|
105
|
+
mappings = "mappings.tsv.gz"
|
|
106
|
+
relations = "relations.tsv.gz"
|
|
107
|
+
alts = "alt_ids.tsv.gz"
|
|
108
|
+
typedefs = "typedefs.tsv.gz"
|
|
109
|
+
literal_mappings = "literal_mappings.tsv.gz"
|
|
110
|
+
references = "references.tsv.gz"
|
|
111
|
+
obsoletes = "obsolete.tsv.gz"
|
|
112
|
+
|
|
113
|
+
literal_properties = "literal_properties.tsv.gz"
|
|
114
|
+
object_properties = "object_properties.tsv.gz"
|
|
137
115
|
|
|
116
|
+
nodes = "nodes.tsv.gz"
|
|
117
|
+
edges = "edges.tsv.gz"
|
|
138
118
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
119
|
+
prefixes = "prefixes.json"
|
|
120
|
+
metadata = "metadata.json"
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def get_cache_path(
|
|
124
|
+
ontology: str,
|
|
125
|
+
name: CacheArtifact,
|
|
126
|
+
*,
|
|
127
|
+
version: str | None = None,
|
|
128
|
+
) -> Path:
|
|
129
|
+
"""Get a cache path."""
|
|
130
|
+
return prefix_directory_join(
|
|
131
|
+
ontology, CACHE_SUBDIRECTORY_NAME, name=name.value, version=version
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def get_relation_cache_path(
|
|
136
|
+
ontology: str,
|
|
137
|
+
reference: Reference,
|
|
138
|
+
*,
|
|
139
|
+
version: str | None = None,
|
|
140
|
+
) -> Path:
|
|
141
|
+
"""Get a relation cache path."""
|
|
142
|
+
return prefix_directory_join(
|
|
143
|
+
ontology, RELATION_SUBDIRECTORY_NAME, name=f"{reference.curie}.tsv", version=version
|
|
144
|
+
)
|
pyobo/version.py
CHANGED
|
@@ -8,11 +8,11 @@ from subprocess import CalledProcessError, check_output
|
|
|
8
8
|
|
|
9
9
|
__all__ = [
|
|
10
10
|
"VERSION",
|
|
11
|
-
"get_version",
|
|
12
11
|
"get_git_hash",
|
|
12
|
+
"get_version",
|
|
13
13
|
]
|
|
14
14
|
|
|
15
|
-
VERSION = "0.
|
|
15
|
+
VERSION = "0.12.1"
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def get_git_hash() -> str:
|
|
@@ -30,7 +30,7 @@ def get_git_hash() -> str:
|
|
|
30
30
|
return ret.strip().decode("utf-8")[:8]
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
def get_version(with_git_hash: bool = False):
|
|
33
|
+
def get_version(with_git_hash: bool = False) -> str:
|
|
34
34
|
"""Get the PyOBO version string, including a git hash."""
|
|
35
35
|
return f"{VERSION}-{get_git_hash()}" if with_git_hash else VERSION
|
|
36
36
|
|