pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
- pyobo/apps/__init__.py +0 -3
- pyobo/apps/cli.py +0 -24
- pyobo/apps/gilda/__init__.py +0 -3
- pyobo/apps/gilda/__main__.py +0 -8
- pyobo/apps/gilda/app.py +0 -48
- pyobo/apps/gilda/cli.py +0 -36
- pyobo/apps/gilda/templates/base.html +0 -33
- pyobo/apps/gilda/templates/home.html +0 -11
- pyobo/apps/gilda/templates/matches.html +0 -32
- pyobo/apps/mapper/__init__.py +0 -3
- pyobo/apps/mapper/__main__.py +0 -11
- pyobo/apps/mapper/cli.py +0 -37
- pyobo/apps/mapper/mapper.py +0 -187
- pyobo/apps/mapper/templates/base.html +0 -35
- pyobo/apps/mapper/templates/mapper_home.html +0 -64
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.1.dist-info/RECORD +0 -173
- pyobo-0.11.1.dist-info/WHEEL +0 -5
- pyobo-0.11.1.dist-info/top_level.txt +0 -1
pyobo/utils/cache.py
CHANGED
|
@@ -3,10 +3,9 @@
|
|
|
3
3
|
import gzip
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
|
-
import os
|
|
7
6
|
from collections.abc import Iterable, Mapping
|
|
8
7
|
from pathlib import Path
|
|
9
|
-
from typing import Generic, TypeVar
|
|
8
|
+
from typing import Generic, TypeVar
|
|
10
9
|
|
|
11
10
|
import networkx as nx
|
|
12
11
|
from pystow.cache import Cached
|
|
@@ -18,15 +17,15 @@ from pystow.cache import CachedPickle as cached_pickle # noqa:N813
|
|
|
18
17
|
from .io import open_map_tsv, open_multimap_tsv, write_map_tsv, write_multimap_tsv
|
|
19
18
|
|
|
20
19
|
__all__ = [
|
|
21
|
-
# from pystow
|
|
22
|
-
"cached_json",
|
|
23
20
|
"cached_collection",
|
|
24
21
|
"cached_df",
|
|
25
|
-
"cached_pickle",
|
|
26
22
|
# implemented here
|
|
27
23
|
"cached_graph",
|
|
24
|
+
# from pystow
|
|
25
|
+
"cached_json",
|
|
28
26
|
"cached_mapping",
|
|
29
27
|
"cached_multidict",
|
|
28
|
+
"cached_pickle",
|
|
30
29
|
]
|
|
31
30
|
|
|
32
31
|
logger = logging.getLogger(__name__)
|
|
@@ -39,14 +38,15 @@ class _CachedMapping(Cached[X], Generic[X]):
|
|
|
39
38
|
|
|
40
39
|
def __init__(
|
|
41
40
|
self,
|
|
42
|
-
path:
|
|
41
|
+
path: str | Path,
|
|
43
42
|
header: Iterable[str],
|
|
44
43
|
*,
|
|
45
44
|
use_tqdm: bool = False,
|
|
46
45
|
force: bool = False,
|
|
46
|
+
cache: bool = True,
|
|
47
47
|
):
|
|
48
48
|
"""Initialize the mapping cache."""
|
|
49
|
-
super().__init__(path=path, force=force)
|
|
49
|
+
super().__init__(path=path, cache=cache, force=force)
|
|
50
50
|
self.header = header
|
|
51
51
|
self.use_tqdm = use_tqdm
|
|
52
52
|
|
|
@@ -65,17 +65,19 @@ class CachedMapping(_CachedMapping[Mapping[str, str]]):
|
|
|
65
65
|
|
|
66
66
|
cached_mapping = CachedMapping
|
|
67
67
|
|
|
68
|
+
NODE_LINK_STYLE = "links" # TODO update to "edges"
|
|
69
|
+
|
|
68
70
|
|
|
69
|
-
def get_gzipped_graph(path:
|
|
71
|
+
def get_gzipped_graph(path: str | Path) -> nx.MultiDiGraph:
|
|
70
72
|
"""Read a graph that's gzipped nodelink."""
|
|
71
73
|
with gzip.open(path, "rt") as file:
|
|
72
|
-
return nx.node_link_graph(json.load(file))
|
|
74
|
+
return nx.node_link_graph(json.load(file), edges=NODE_LINK_STYLE)
|
|
73
75
|
|
|
74
76
|
|
|
75
|
-
def write_gzipped_graph(graph: nx.MultiDiGraph, path:
|
|
77
|
+
def write_gzipped_graph(graph: nx.MultiDiGraph, path: str | Path) -> None:
|
|
76
78
|
"""Write a graph as gzipped nodelink."""
|
|
77
79
|
with gzip.open(path, "wt") as file:
|
|
78
|
-
json.dump(nx.node_link_data(graph), file)
|
|
80
|
+
json.dump(nx.node_link_data(graph, edges=NODE_LINK_STYLE), file)
|
|
79
81
|
|
|
80
82
|
|
|
81
83
|
class CachedGraph(Cached[nx.MultiDiGraph]):
|
pyobo/utils/io.py
CHANGED
|
@@ -4,30 +4,26 @@ import collections.abc
|
|
|
4
4
|
import csv
|
|
5
5
|
import gzip
|
|
6
6
|
import logging
|
|
7
|
-
import time
|
|
8
7
|
from collections import defaultdict
|
|
9
8
|
from collections.abc import Iterable, Mapping
|
|
10
9
|
from contextlib import contextmanager
|
|
11
10
|
from pathlib import Path
|
|
12
|
-
from typing import
|
|
13
|
-
from xml.etree.ElementTree import Element
|
|
11
|
+
from typing import TypeVar
|
|
14
12
|
|
|
15
13
|
import pandas as pd
|
|
16
|
-
from lxml import etree
|
|
17
14
|
from tqdm.auto import tqdm
|
|
18
15
|
|
|
19
16
|
__all__ = [
|
|
20
|
-
"
|
|
21
|
-
"
|
|
17
|
+
"get_reader",
|
|
18
|
+
"get_writer",
|
|
22
19
|
"multidict",
|
|
23
20
|
"multisetdict",
|
|
21
|
+
"open_map_tsv",
|
|
22
|
+
"open_multimap_tsv",
|
|
23
|
+
"open_reader",
|
|
24
|
+
"write_iterable_tsv",
|
|
24
25
|
"write_map_tsv",
|
|
25
26
|
"write_multimap_tsv",
|
|
26
|
-
"write_iterable_tsv",
|
|
27
|
-
"parse_xml_gz",
|
|
28
|
-
"get_writer",
|
|
29
|
-
"open_reader",
|
|
30
|
-
"get_reader",
|
|
31
27
|
]
|
|
32
28
|
|
|
33
29
|
logger = logging.getLogger(__name__)
|
|
@@ -37,7 +33,7 @@ Y = TypeVar("Y")
|
|
|
37
33
|
|
|
38
34
|
|
|
39
35
|
@contextmanager
|
|
40
|
-
def open_reader(path:
|
|
36
|
+
def open_reader(path: str | Path, sep: str = "\t"):
|
|
41
37
|
"""Open a file and get a reader for it."""
|
|
42
38
|
path = Path(path)
|
|
43
39
|
with gzip.open(path, "rt") if path.suffix == ".gz" else open(path) as file:
|
|
@@ -55,7 +51,7 @@ def get_writer(x, sep: str = "\t"):
|
|
|
55
51
|
|
|
56
52
|
|
|
57
53
|
def open_map_tsv(
|
|
58
|
-
path:
|
|
54
|
+
path: str | Path, *, use_tqdm: bool = False, has_header: bool = True
|
|
59
55
|
) -> Mapping[str, str]:
|
|
60
56
|
"""Load a mapping TSV file into a dictionary."""
|
|
61
57
|
with open(path) as file:
|
|
@@ -73,7 +69,7 @@ def open_map_tsv(
|
|
|
73
69
|
|
|
74
70
|
|
|
75
71
|
def open_multimap_tsv(
|
|
76
|
-
path:
|
|
72
|
+
path: str | Path,
|
|
77
73
|
*,
|
|
78
74
|
use_tqdm: bool = False,
|
|
79
75
|
has_header: bool = True,
|
|
@@ -83,7 +79,7 @@ def open_multimap_tsv(
|
|
|
83
79
|
|
|
84
80
|
|
|
85
81
|
def _help_multimap_tsv(
|
|
86
|
-
path:
|
|
82
|
+
path: str | Path,
|
|
87
83
|
*,
|
|
88
84
|
use_tqdm: bool = False,
|
|
89
85
|
has_header: bool = True,
|
|
@@ -115,9 +111,9 @@ def multisetdict(pairs: Iterable[tuple[X, Y]]) -> dict[X, set[Y]]:
|
|
|
115
111
|
|
|
116
112
|
def write_map_tsv(
|
|
117
113
|
*,
|
|
118
|
-
path:
|
|
119
|
-
header:
|
|
120
|
-
rv:
|
|
114
|
+
path: str | Path,
|
|
115
|
+
header: Iterable[str] | None = None,
|
|
116
|
+
rv: Iterable[tuple[str, str]] | Mapping[str, str],
|
|
121
117
|
sep: str = "\t",
|
|
122
118
|
) -> None:
|
|
123
119
|
"""Write a mapping dictionary to a TSV file."""
|
|
@@ -129,7 +125,7 @@ def write_map_tsv(
|
|
|
129
125
|
|
|
130
126
|
def write_multimap_tsv(
|
|
131
127
|
*,
|
|
132
|
-
path:
|
|
128
|
+
path: str | Path,
|
|
133
129
|
header: Iterable[str],
|
|
134
130
|
rv: Mapping[str, list[str]],
|
|
135
131
|
sep: str = "\t",
|
|
@@ -141,8 +137,8 @@ def write_multimap_tsv(
|
|
|
141
137
|
|
|
142
138
|
def write_iterable_tsv(
|
|
143
139
|
*,
|
|
144
|
-
path:
|
|
145
|
-
header:
|
|
140
|
+
path: str | Path,
|
|
141
|
+
header: Iterable[str] | None = None,
|
|
146
142
|
it: Iterable[tuple[str, ...]],
|
|
147
143
|
sep: str = "\t",
|
|
148
144
|
) -> None:
|
|
@@ -154,13 +150,3 @@ def write_iterable_tsv(
|
|
|
154
150
|
if header is not None:
|
|
155
151
|
writer.writerow(header)
|
|
156
152
|
writer.writerows(it)
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
def parse_xml_gz(path: Union[str, Path]) -> Element:
|
|
160
|
-
"""Parse an XML file from a path to a GZIP file."""
|
|
161
|
-
path = Path(path).resolve()
|
|
162
|
-
t = time.time()
|
|
163
|
-
logger.info("parsing xml from %s", path)
|
|
164
|
-
tree = etree.parse(path.as_posix()) # type:ignore
|
|
165
|
-
logger.info("parsed xml in %.2f seconds", time.time() - t)
|
|
166
|
-
return tree.getroot()
|
pyobo/utils/iter.py
CHANGED
|
@@ -8,8 +8,8 @@ from typing import TypeVar
|
|
|
8
8
|
from more_itertools import peekable
|
|
9
9
|
|
|
10
10
|
__all__ = [
|
|
11
|
-
"iterate_together",
|
|
12
11
|
"iterate_gzips_together",
|
|
12
|
+
"iterate_together",
|
|
13
13
|
]
|
|
14
14
|
|
|
15
15
|
X = TypeVar("X")
|
|
@@ -20,9 +20,9 @@ Y = TypeVar("Y")
|
|
|
20
20
|
def iterate_gzips_together(a_path, b_path) -> Iterable[tuple[str, str, list[str]]]:
|
|
21
21
|
"""Iterate over two gzipped files together."""
|
|
22
22
|
with gzip.open(a_path, mode="rt", errors="ignore") as a, gzip.open(b_path, mode="rt") as b:
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
yield from iterate_together(
|
|
23
|
+
a_reader = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
|
|
24
|
+
b_reader = csv.reader(b, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
|
|
25
|
+
yield from iterate_together(a_reader, b_reader) # type:ignore
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def iterate_together(
|
|
@@ -38,7 +38,7 @@ def iterate_together(
|
|
|
38
38
|
- Each key in the index is present within both files
|
|
39
39
|
"""
|
|
40
40
|
b_peekable = peekable(b)
|
|
41
|
-
b_index
|
|
41
|
+
b_index: X | type[_Done] = b_peekable.peek()[0]
|
|
42
42
|
|
|
43
43
|
for a_index, a_value in a:
|
|
44
44
|
zs = []
|
pyobo/utils/misc.py
CHANGED
|
@@ -1,79 +1,67 @@
|
|
|
1
1
|
"""Miscellaneous utilities."""
|
|
2
2
|
|
|
3
|
-
import gzip
|
|
4
3
|
import logging
|
|
5
|
-
import os
|
|
6
4
|
from datetime import datetime
|
|
7
|
-
from subprocess import check_output
|
|
8
|
-
from typing import Optional
|
|
9
5
|
|
|
10
6
|
__all__ = [
|
|
11
|
-
"obo_to_obograph",
|
|
12
|
-
"obo_to_owl",
|
|
13
7
|
"cleanup_version",
|
|
14
8
|
]
|
|
15
9
|
|
|
16
|
-
|
|
17
10
|
logger = logging.getLogger(__name__)
|
|
18
11
|
|
|
19
|
-
|
|
20
|
-
def obo_to_obograph(obo_path, obograph_path) -> None:
|
|
21
|
-
"""Convert an OBO file to OBO Graph file with pronto."""
|
|
22
|
-
import pronto
|
|
23
|
-
|
|
24
|
-
ontology = pronto.Ontology(obo_path)
|
|
25
|
-
with gzip.open(obograph_path, "wb") as file:
|
|
26
|
-
ontology.dump(file, format="json")
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def obo_to_owl(obo_path, owl_path, owl_format: str = "ofn"):
|
|
30
|
-
"""Convert an OBO file to an OWL file with ROBOT."""
|
|
31
|
-
args = ["robot", "convert", "-i", obo_path, "-o", owl_path, "--format", owl_format]
|
|
32
|
-
ret = check_output( # noqa:S603
|
|
33
|
-
args,
|
|
34
|
-
cwd=os.path.dirname(__file__),
|
|
35
|
-
)
|
|
36
|
-
return ret.decode()
|
|
37
|
-
|
|
38
|
-
|
|
39
12
|
BIZARRE_LOGGED = set()
|
|
40
13
|
|
|
14
|
+
#: Rewrites for mostly static resources that have weird quirks
|
|
15
|
+
VERSION_REWRITES = {
|
|
16
|
+
"$Date: 2009/11/15 10:54:12 $": "2009-11-15", # for owl
|
|
17
|
+
"http://www.w3.org/2006/time#2016": "2016", # for time
|
|
18
|
+
}
|
|
19
|
+
STATIC_VERSION_REWRITES = {"orth": "2"}
|
|
20
|
+
VERSION_PREFIXES = [
|
|
21
|
+
"http://www.orpha.net/version",
|
|
22
|
+
"https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
|
|
23
|
+
"http://humanbehaviourchange.org/ontology/bcio.owl/",
|
|
24
|
+
"http://purl.org/pav/",
|
|
25
|
+
"http://identifiers.org/combine.specifications/teddy.rel-",
|
|
26
|
+
"https://purl.dataone.org/odo/MOSAIC/",
|
|
27
|
+
"http://purl.dataone.org/odo/SASAP/", # like in http://purl.dataone.org/odo/SASAP/0.3.1
|
|
28
|
+
"http://purl.dataone.org/odo/SENSO/", # like in http://purl.dataone.org/odo/SENSO/0.1.0
|
|
29
|
+
"https://purl.dataone.org/odo/ADCAD/",
|
|
30
|
+
]
|
|
31
|
+
VERSION_PREFIX_SPLITS = [
|
|
32
|
+
"http://www.ebi.ac.uk/efo/releases/v",
|
|
33
|
+
"http://www.ebi.ac.uk/swo/swo.owl/",
|
|
34
|
+
"http://semanticscience.org/ontology/sio/v",
|
|
35
|
+
"http://ontology.neuinfo.org/NIF/ttl/nif/version/",
|
|
36
|
+
]
|
|
37
|
+
|
|
41
38
|
|
|
42
|
-
def cleanup_version(data_version: str, prefix: str) ->
|
|
39
|
+
def cleanup_version(data_version: str, prefix: str) -> str:
|
|
43
40
|
"""Clean the version information."""
|
|
44
|
-
if data_version
|
|
45
|
-
|
|
41
|
+
if data_version in VERSION_REWRITES:
|
|
42
|
+
return VERSION_REWRITES[data_version]
|
|
43
|
+
|
|
44
|
+
data_version = data_version.removesuffix(".owl")
|
|
46
45
|
if data_version.endswith(prefix):
|
|
47
46
|
data_version = data_version[: -len(prefix)]
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
return "2"
|
|
47
|
+
data_version = data_version.removesuffix("/")
|
|
48
|
+
|
|
49
|
+
data_version = data_version.removeprefix("releases/")
|
|
50
|
+
data_version = data_version.removeprefix("release/")
|
|
53
51
|
|
|
54
|
-
|
|
55
|
-
"http://www.orpha.net/version",
|
|
56
|
-
"https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_",
|
|
57
|
-
"http://humanbehaviourchange.org/ontology/bcio.owl/",
|
|
58
|
-
"http://purl.org/pav/",
|
|
59
|
-
"http://identifiers.org/combine.specifications/teddy.rel-",
|
|
60
|
-
]
|
|
61
|
-
for version_prefix in version_prefixes:
|
|
52
|
+
for version_prefix in VERSION_PREFIXES:
|
|
62
53
|
if data_version.startswith(version_prefix):
|
|
63
|
-
return data_version
|
|
54
|
+
return data_version.removeprefix(version_prefix)
|
|
64
55
|
|
|
65
|
-
|
|
66
|
-
"http://www.ebi.ac.uk/efo/releases/v",
|
|
67
|
-
"http://www.ebi.ac.uk/swo/swo.owl/",
|
|
68
|
-
"http://semanticscience.org/ontology/sio/v",
|
|
69
|
-
"http://ontology.neuinfo.org/NIF/ttl/nif/version/",
|
|
70
|
-
]
|
|
71
|
-
for version_prefix_split in version_prefixes_split:
|
|
56
|
+
for version_prefix_split in VERSION_PREFIX_SPLITS:
|
|
72
57
|
if data_version.startswith(version_prefix_split):
|
|
73
|
-
return data_version
|
|
58
|
+
return data_version.removeprefix(version_prefix_split).split("/")[0]
|
|
74
59
|
|
|
60
|
+
# use a heuristic to determine if the version is one of
|
|
61
|
+
# consecutive, major.minor, or semantic versioning (i.e., major.minor.patch)
|
|
75
62
|
if data_version.replace(".", "").isnumeric():
|
|
76
|
-
return data_version
|
|
63
|
+
return data_version
|
|
64
|
+
|
|
77
65
|
for v in reversed(data_version.split("/")):
|
|
78
66
|
v = v.strip()
|
|
79
67
|
try:
|
pyobo/utils/ndex_utils.py
CHANGED
|
File without changes
|
pyobo/utils/path.py
CHANGED
|
@@ -1,60 +1,42 @@
|
|
|
1
1
|
"""Utilities for building paths."""
|
|
2
2
|
|
|
3
|
+
import enum
|
|
3
4
|
import logging
|
|
4
5
|
from pathlib import Path
|
|
5
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, Literal
|
|
6
7
|
|
|
7
8
|
import pandas as pd
|
|
8
|
-
import
|
|
9
|
-
from pystow
|
|
9
|
+
from curies import Reference
|
|
10
|
+
from pystow import VersionHint
|
|
10
11
|
|
|
11
|
-
from
|
|
12
|
-
from ..constants import RAW_MODULE
|
|
12
|
+
from ..constants import CACHE_SUBDIRECTORY_NAME, RAW_MODULE, RELATION_SUBDIRECTORY_NAME
|
|
13
13
|
|
|
14
14
|
__all__ = [
|
|
15
|
-
"
|
|
16
|
-
"prefix_directory_join",
|
|
17
|
-
"prefix_cache_join",
|
|
18
|
-
"get_prefix_obo_path",
|
|
19
|
-
"ensure_path",
|
|
15
|
+
"CacheArtifact",
|
|
20
16
|
"ensure_df",
|
|
21
|
-
"
|
|
17
|
+
"ensure_path",
|
|
18
|
+
"get_cache_path",
|
|
19
|
+
"get_relation_cache_path",
|
|
20
|
+
"prefix_directory_join",
|
|
22
21
|
]
|
|
23
22
|
|
|
24
23
|
logger = logging.getLogger(__name__)
|
|
25
24
|
|
|
26
|
-
VersionHint = Union[None, str, Callable[[], Optional[str]]]
|
|
27
|
-
|
|
28
|
-
requests_ftp.monkeypatch_session()
|
|
29
|
-
|
|
30
25
|
|
|
31
26
|
def prefix_directory_join(
|
|
32
27
|
prefix: str,
|
|
33
28
|
*parts: str,
|
|
34
|
-
name:
|
|
29
|
+
name: str | None = None,
|
|
35
30
|
version: VersionHint = None,
|
|
36
31
|
ensure_exists: bool = True,
|
|
37
32
|
) -> Path:
|
|
38
33
|
"""Join in the prefix directory."""
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
version
|
|
44
|
-
|
|
45
|
-
elif not isinstance(version, str):
|
|
46
|
-
raise TypeError(f"Invalid type: {version} ({type(version)})")
|
|
47
|
-
if version is None:
|
|
48
|
-
raise AssertionError
|
|
49
|
-
version = cleanup_version(version, prefix=prefix)
|
|
50
|
-
if version is not None and "/" in version:
|
|
51
|
-
raise ValueError(f"[{prefix}] Can not have slash in version: {version}")
|
|
52
|
-
return RAW_MODULE.join(prefix, version, *parts, name=name, ensure_exists=ensure_exists)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def get_prefix_obo_path(prefix: str, version: VersionHint = None, ext: str = "obo") -> Path:
|
|
56
|
-
"""Get the canonical path to the OBO file."""
|
|
57
|
-
return prefix_directory_join(prefix, name=f"{prefix}.{ext}", version=version)
|
|
34
|
+
return RAW_MODULE.module(prefix).join(
|
|
35
|
+
*parts,
|
|
36
|
+
name=name,
|
|
37
|
+
ensure_exists=ensure_exists,
|
|
38
|
+
version=version,
|
|
39
|
+
)
|
|
58
40
|
|
|
59
41
|
|
|
60
42
|
def ensure_path(
|
|
@@ -62,36 +44,29 @@ def ensure_path(
|
|
|
62
44
|
*parts: str,
|
|
63
45
|
url: str,
|
|
64
46
|
version: VersionHint = None,
|
|
65
|
-
name:
|
|
47
|
+
name: str | None = None,
|
|
66
48
|
force: bool = False,
|
|
67
|
-
error_on_missing: bool = False,
|
|
68
49
|
backend: Literal["requests", "urllib"] = "urllib",
|
|
69
50
|
verify: bool = True,
|
|
70
|
-
|
|
51
|
+
**download_kwargs: Any,
|
|
52
|
+
) -> Path:
|
|
71
53
|
"""Download a file if it doesn't exist."""
|
|
72
|
-
if name is None:
|
|
73
|
-
name = name_from_url(url)
|
|
74
|
-
|
|
75
|
-
path = prefix_directory_join(prefix, *parts, name=name, version=version)
|
|
76
|
-
|
|
77
|
-
if not path.exists() and error_on_missing:
|
|
78
|
-
raise FileNotFoundError
|
|
79
|
-
|
|
80
|
-
kwargs: dict[str, Any]
|
|
81
54
|
if verify:
|
|
82
|
-
|
|
55
|
+
download_kwargs = {"backend": backend}
|
|
83
56
|
else:
|
|
84
57
|
if backend != "requests":
|
|
85
58
|
logger.warning("using requests since verify=False")
|
|
86
|
-
|
|
59
|
+
download_kwargs = {"backend": "requests", "verify": False}
|
|
87
60
|
|
|
88
|
-
|
|
61
|
+
path = RAW_MODULE.module(prefix).ensure(
|
|
62
|
+
*parts,
|
|
89
63
|
url=url,
|
|
90
|
-
|
|
64
|
+
name=name,
|
|
91
65
|
force=force,
|
|
92
|
-
|
|
66
|
+
version=version,
|
|
67
|
+
download_kwargs=download_kwargs,
|
|
93
68
|
)
|
|
94
|
-
return path
|
|
69
|
+
return path
|
|
95
70
|
|
|
96
71
|
|
|
97
72
|
def ensure_df(
|
|
@@ -99,7 +74,7 @@ def ensure_df(
|
|
|
99
74
|
*parts: str,
|
|
100
75
|
url: str,
|
|
101
76
|
version: VersionHint = None,
|
|
102
|
-
name:
|
|
77
|
+
name: str | None = None,
|
|
103
78
|
force: bool = False,
|
|
104
79
|
sep: str = "\t",
|
|
105
80
|
dtype=str,
|
|
@@ -121,21 +96,52 @@ def ensure_df(
|
|
|
121
96
|
return pd.read_csv(_path, sep=sep, dtype=dtype, **kwargs)
|
|
122
97
|
|
|
123
98
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
99
|
+
class CacheArtifact(enum.Enum):
|
|
100
|
+
"""An enumeration for."""
|
|
101
|
+
|
|
102
|
+
names = "names.tsv"
|
|
103
|
+
definitions = "definitions.tsv"
|
|
104
|
+
species = "species.tsv"
|
|
105
|
+
synonyms = "synonyms.tsv" # deprecated
|
|
106
|
+
xrefs = "xrefs.tsv" # deprecated
|
|
107
|
+
mappings = "mappings.tsv"
|
|
108
|
+
relations = "relations.tsv"
|
|
109
|
+
alts = "alt_ids.tsv"
|
|
110
|
+
typedefs = "typedefs.tsv"
|
|
111
|
+
literal_mappings = "literal_mappings.tsv"
|
|
112
|
+
references = "references.tsv"
|
|
113
|
+
obsoletes = "obsolete.tsv"
|
|
114
|
+
|
|
115
|
+
properties = "properties.tsv" # deprecated
|
|
116
|
+
literal_properties = "literal_properties.tsv"
|
|
117
|
+
object_properties = "object_properties.tsv"
|
|
137
118
|
|
|
119
|
+
nodes = "nodes.tsv"
|
|
120
|
+
edges = "edges.tsv"
|
|
138
121
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
122
|
+
prefixes = "prefixes.json"
|
|
123
|
+
metadata = "metadata.json"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def get_cache_path(
|
|
127
|
+
ontology: str,
|
|
128
|
+
name: CacheArtifact,
|
|
129
|
+
*,
|
|
130
|
+
version: str | None = None,
|
|
131
|
+
) -> Path:
|
|
132
|
+
"""Get a cache path."""
|
|
133
|
+
return prefix_directory_join(
|
|
134
|
+
ontology, CACHE_SUBDIRECTORY_NAME, name=name.value, version=version
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def get_relation_cache_path(
|
|
139
|
+
ontology: str,
|
|
140
|
+
reference: Reference,
|
|
141
|
+
*,
|
|
142
|
+
version: str | None = None,
|
|
143
|
+
) -> Path:
|
|
144
|
+
"""Get a relation cache path."""
|
|
145
|
+
return prefix_directory_join(
|
|
146
|
+
ontology, RELATION_SUBDIRECTORY_NAME, name=f"{reference.curie}.tsv", version=version
|
|
147
|
+
)
|
pyobo/version.py
CHANGED
|
@@ -8,11 +8,11 @@ from subprocess import CalledProcessError, check_output
|
|
|
8
8
|
|
|
9
9
|
__all__ = [
|
|
10
10
|
"VERSION",
|
|
11
|
-
"get_version",
|
|
12
11
|
"get_git_hash",
|
|
12
|
+
"get_version",
|
|
13
13
|
]
|
|
14
14
|
|
|
15
|
-
VERSION = "0.
|
|
15
|
+
VERSION = "0.12.0"
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def get_git_hash() -> str:
|
|
@@ -30,7 +30,7 @@ def get_git_hash() -> str:
|
|
|
30
30
|
return ret.strip().decode("utf-8")[:8]
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
def get_version(with_git_hash: bool = False):
|
|
33
|
+
def get_version(with_git_hash: bool = False) -> str:
|
|
34
34
|
"""Get the PyOBO version string, including a git hash."""
|
|
35
35
|
return f"{VERSION}-{get_git_hash()}" if with_git_hash else VERSION
|
|
36
36
|
|