pyobo 0.10.12__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +0 -2
- pyobo/__main__.py +0 -2
- pyobo/api/__init__.py +0 -2
- pyobo/api/alts.py +6 -7
- pyobo/api/hierarchy.py +14 -15
- pyobo/api/metadata.py +3 -4
- pyobo/api/names.py +31 -32
- pyobo/api/properties.py +6 -7
- pyobo/api/relations.py +12 -11
- pyobo/api/species.py +5 -6
- pyobo/api/typedefs.py +1 -3
- pyobo/api/utils.py +61 -5
- pyobo/api/xrefs.py +4 -5
- pyobo/aws.py +3 -5
- pyobo/cli/__init__.py +0 -2
- pyobo/cli/aws.py +0 -2
- pyobo/cli/cli.py +0 -4
- pyobo/cli/database.py +1 -3
- pyobo/cli/lookup.py +0 -2
- pyobo/cli/utils.py +0 -2
- pyobo/constants.py +0 -33
- pyobo/getters.py +19 -26
- pyobo/gilda_utils.py +9 -10
- pyobo/identifier_utils.py +10 -10
- pyobo/mocks.py +5 -6
- pyobo/normalizer.py +24 -24
- pyobo/obographs.py +3 -3
- pyobo/plugins.py +3 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +19 -21
- pyobo/registries/__init__.py +0 -2
- pyobo/registries/metaregistry.py +6 -8
- pyobo/resource_utils.py +1 -3
- pyobo/resources/__init__.py +0 -2
- pyobo/resources/ncbitaxon.py +2 -3
- pyobo/resources/ro.py +2 -4
- pyobo/sources/README.md +15 -0
- pyobo/sources/__init__.py +0 -2
- pyobo/sources/agrovoc.py +3 -3
- pyobo/sources/antibodyregistry.py +2 -3
- pyobo/sources/biogrid.py +4 -4
- pyobo/sources/ccle.py +3 -4
- pyobo/sources/cgnc.py +1 -3
- pyobo/sources/chebi.py +2 -4
- pyobo/sources/chembl.py +1 -3
- pyobo/sources/civic_gene.py +2 -3
- pyobo/sources/complexportal.py +3 -5
- pyobo/sources/conso.py +2 -4
- pyobo/sources/cpt.py +1 -3
- pyobo/sources/credit.py +1 -1
- pyobo/sources/cvx.py +1 -3
- pyobo/sources/depmap.py +3 -4
- pyobo/sources/dictybase_gene.py +1 -3
- pyobo/sources/drugbank.py +6 -7
- pyobo/sources/drugbank_salt.py +3 -4
- pyobo/sources/drugcentral.py +5 -7
- pyobo/sources/expasy.py +11 -12
- pyobo/sources/famplex.py +3 -5
- pyobo/sources/flybase.py +2 -4
- pyobo/sources/geonames.py +1 -1
- pyobo/sources/gmt_utils.py +5 -6
- pyobo/sources/go.py +4 -6
- pyobo/sources/gwascentral_phenotype.py +1 -3
- pyobo/sources/gwascentral_study.py +2 -3
- pyobo/sources/hgnc.py +6 -7
- pyobo/sources/hgncgenefamily.py +2 -4
- pyobo/sources/icd10.py +3 -4
- pyobo/sources/icd11.py +3 -4
- pyobo/sources/icd_utils.py +6 -7
- pyobo/sources/interpro.py +3 -5
- pyobo/sources/itis.py +1 -3
- pyobo/sources/kegg/__init__.py +0 -2
- pyobo/sources/kegg/api.py +3 -4
- pyobo/sources/kegg/genes.py +3 -4
- pyobo/sources/kegg/genome.py +1 -3
- pyobo/sources/kegg/pathway.py +5 -6
- pyobo/sources/mesh.py +19 -21
- pyobo/sources/mgi.py +1 -3
- pyobo/sources/mirbase.py +4 -6
- pyobo/sources/mirbase_constants.py +0 -2
- pyobo/sources/mirbase_family.py +1 -3
- pyobo/sources/mirbase_mature.py +1 -3
- pyobo/sources/msigdb.py +4 -5
- pyobo/sources/ncbigene.py +3 -5
- pyobo/sources/npass.py +1 -3
- pyobo/sources/omim_ps.py +1 -3
- pyobo/sources/pathbank.py +3 -5
- pyobo/sources/pfam.py +1 -3
- pyobo/sources/pfam_clan.py +1 -3
- pyobo/sources/pid.py +3 -5
- pyobo/sources/pombase.py +1 -3
- pyobo/sources/pubchem.py +2 -3
- pyobo/sources/reactome.py +2 -4
- pyobo/sources/rgd.py +2 -3
- pyobo/sources/rhea.py +7 -8
- pyobo/sources/ror.py +3 -2
- pyobo/sources/selventa/__init__.py +0 -2
- pyobo/sources/selventa/schem.py +1 -3
- pyobo/sources/selventa/scomp.py +1 -3
- pyobo/sources/selventa/sdis.py +1 -3
- pyobo/sources/selventa/sfam.py +1 -3
- pyobo/sources/sgd.py +1 -3
- pyobo/sources/slm.py +1 -3
- pyobo/sources/umls/__init__.py +0 -2
- pyobo/sources/umls/__main__.py +0 -2
- pyobo/sources/umls/get_synonym_types.py +1 -1
- pyobo/sources/umls/umls.py +2 -4
- pyobo/sources/uniprot/__init__.py +0 -2
- pyobo/sources/uniprot/uniprot.py +4 -4
- pyobo/sources/uniprot/uniprot_ptm.py +6 -5
- pyobo/sources/utils.py +3 -5
- pyobo/sources/wikipathways.py +1 -3
- pyobo/sources/zfin.py +2 -3
- pyobo/ssg/__init__.py +3 -2
- pyobo/struct/__init__.py +0 -2
- pyobo/struct/reference.py +13 -15
- pyobo/struct/struct.py +102 -96
- pyobo/struct/typedef.py +9 -10
- pyobo/struct/utils.py +0 -2
- pyobo/utils/__init__.py +0 -2
- pyobo/utils/cache.py +14 -6
- pyobo/utils/io.py +9 -10
- pyobo/utils/iter.py +5 -6
- pyobo/utils/misc.py +1 -3
- pyobo/utils/ndex_utils.py +6 -7
- pyobo/utils/path.py +4 -5
- pyobo/version.py +3 -5
- pyobo/xrefdb/__init__.py +0 -2
- pyobo/xrefdb/canonicalizer.py +27 -18
- pyobo/xrefdb/priority.py +0 -2
- pyobo/xrefdb/sources/__init__.py +3 -4
- pyobo/xrefdb/sources/biomappings.py +0 -2
- pyobo/xrefdb/sources/cbms2019.py +0 -2
- pyobo/xrefdb/sources/chembl.py +0 -2
- pyobo/xrefdb/sources/compath.py +1 -3
- pyobo/xrefdb/sources/famplex.py +3 -5
- pyobo/xrefdb/sources/gilda.py +0 -2
- pyobo/xrefdb/sources/intact.py +5 -5
- pyobo/xrefdb/sources/ncit.py +1 -3
- pyobo/xrefdb/sources/pubchem.py +2 -5
- pyobo/xrefdb/sources/wikidata.py +2 -4
- pyobo/xrefdb/xrefs_pipeline.py +15 -16
- {pyobo-0.10.12.dist-info → pyobo-0.11.0.dist-info}/LICENSE +1 -1
- pyobo-0.11.0.dist-info/METADATA +723 -0
- pyobo-0.11.0.dist-info/RECORD +171 -0
- {pyobo-0.10.12.dist-info → pyobo-0.11.0.dist-info}/WHEEL +1 -1
- pyobo-0.11.0.dist-info/entry_points.txt +2 -0
- pyobo-0.10.12.dist-info/METADATA +0 -499
- pyobo-0.10.12.dist-info/RECORD +0 -169
- pyobo-0.10.12.dist-info/entry_points.txt +0 -15
- {pyobo-0.10.12.dist-info → pyobo-0.11.0.dist-info}/top_level.txt +0 -0
pyobo/utils/cache.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities for caching files."""
|
|
4
2
|
|
|
5
3
|
import gzip
|
|
6
4
|
import json
|
|
7
5
|
import logging
|
|
8
6
|
import os
|
|
7
|
+
from collections.abc import Iterable, Mapping
|
|
9
8
|
from pathlib import Path
|
|
10
|
-
from typing import Generic,
|
|
9
|
+
from typing import Generic, TypeVar, Union
|
|
11
10
|
|
|
12
11
|
import networkx as nx
|
|
13
12
|
from pystow.cache import Cached
|
|
@@ -46,6 +45,7 @@ class _CachedMapping(Cached[X], Generic[X]):
|
|
|
46
45
|
use_tqdm: bool = False,
|
|
47
46
|
force: bool = False,
|
|
48
47
|
):
|
|
48
|
+
"""Initialize the mapping cache."""
|
|
49
49
|
super().__init__(path=path, force=force)
|
|
50
50
|
self.header = header
|
|
51
51
|
self.use_tqdm = use_tqdm
|
|
@@ -55,9 +55,11 @@ class CachedMapping(_CachedMapping[Mapping[str, str]]):
|
|
|
55
55
|
"""A cache for simple mappings."""
|
|
56
56
|
|
|
57
57
|
def load(self) -> Mapping[str, str]:
|
|
58
|
+
"""Load a TSV file."""
|
|
58
59
|
return open_map_tsv(self.path, use_tqdm=self.use_tqdm)
|
|
59
60
|
|
|
60
61
|
def dump(self, rv: Mapping[str, str]) -> None:
|
|
62
|
+
"""Write a TSV file."""
|
|
61
63
|
write_map_tsv(path=self.path, header=self.header, rv=rv)
|
|
62
64
|
|
|
63
65
|
|
|
@@ -77,23 +79,29 @@ def write_gzipped_graph(graph: nx.MultiDiGraph, path: Union[str, Path]) -> None:
|
|
|
77
79
|
|
|
78
80
|
|
|
79
81
|
class CachedGraph(Cached[nx.MultiDiGraph]):
|
|
82
|
+
"""A cache for multidigraphs."""
|
|
83
|
+
|
|
80
84
|
def load(self) -> nx.MultiDiGraph:
|
|
85
|
+
"""Load a graph file."""
|
|
81
86
|
return get_gzipped_graph(self.path)
|
|
82
87
|
|
|
83
88
|
def dump(self, rv: nx.MultiDiGraph) -> None:
|
|
89
|
+
"""Write a graph file."""
|
|
84
90
|
write_gzipped_graph(rv, self.path)
|
|
85
91
|
|
|
86
92
|
|
|
87
93
|
cached_graph = CachedGraph
|
|
88
94
|
|
|
89
95
|
|
|
90
|
-
class CachedMultidict(_CachedMapping[Mapping[str,
|
|
96
|
+
class CachedMultidict(_CachedMapping[Mapping[str, list[str]]]):
|
|
91
97
|
"""A cache for complex mappings."""
|
|
92
98
|
|
|
93
|
-
def load(self) -> Mapping[str,
|
|
99
|
+
def load(self) -> Mapping[str, list[str]]:
|
|
100
|
+
"""Load a TSV file representing a multimap."""
|
|
94
101
|
return open_multimap_tsv(self.path, use_tqdm=self.use_tqdm)
|
|
95
102
|
|
|
96
|
-
def dump(self, rv: Mapping[str,
|
|
103
|
+
def dump(self, rv: Mapping[str, list[str]]) -> None:
|
|
104
|
+
"""Write a TSV file representing a multimap."""
|
|
97
105
|
write_multimap_tsv(path=self.path, header=self.header, rv=rv)
|
|
98
106
|
|
|
99
107
|
|
pyobo/utils/io.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""I/O utilities."""
|
|
4
2
|
|
|
5
3
|
import collections.abc
|
|
@@ -8,9 +6,10 @@ import gzip
|
|
|
8
6
|
import logging
|
|
9
7
|
import time
|
|
10
8
|
from collections import defaultdict
|
|
9
|
+
from collections.abc import Iterable, Mapping
|
|
11
10
|
from contextlib import contextmanager
|
|
12
11
|
from pathlib import Path
|
|
13
|
-
from typing import
|
|
12
|
+
from typing import Optional, TypeVar, Union
|
|
14
13
|
from xml.etree.ElementTree import Element
|
|
15
14
|
|
|
16
15
|
import pandas as pd
|
|
@@ -78,7 +77,7 @@ def open_multimap_tsv(
|
|
|
78
77
|
*,
|
|
79
78
|
use_tqdm: bool = False,
|
|
80
79
|
has_header: bool = True,
|
|
81
|
-
) -> Mapping[str,
|
|
80
|
+
) -> Mapping[str, list[str]]:
|
|
82
81
|
"""Load a mapping TSV file that has multiple mappings for each."""
|
|
83
82
|
return multidict(_help_multimap_tsv(path=path, use_tqdm=use_tqdm, has_header=has_header))
|
|
84
83
|
|
|
@@ -88,7 +87,7 @@ def _help_multimap_tsv(
|
|
|
88
87
|
*,
|
|
89
88
|
use_tqdm: bool = False,
|
|
90
89
|
has_header: bool = True,
|
|
91
|
-
) -> Iterable[
|
|
90
|
+
) -> Iterable[tuple[str, str]]:
|
|
92
91
|
with open(path) as file:
|
|
93
92
|
if has_header:
|
|
94
93
|
next(file) # throw away header
|
|
@@ -97,7 +96,7 @@ def _help_multimap_tsv(
|
|
|
97
96
|
yield from get_reader(file)
|
|
98
97
|
|
|
99
98
|
|
|
100
|
-
def multidict(pairs: Iterable[
|
|
99
|
+
def multidict(pairs: Iterable[tuple[X, Y]]) -> Mapping[X, list[Y]]:
|
|
101
100
|
"""Accumulate a multidict from a list of pairs."""
|
|
102
101
|
rv = defaultdict(list)
|
|
103
102
|
for key, value in pairs:
|
|
@@ -105,7 +104,7 @@ def multidict(pairs: Iterable[Tuple[X, Y]]) -> Mapping[X, List[Y]]:
|
|
|
105
104
|
return dict(rv)
|
|
106
105
|
|
|
107
106
|
|
|
108
|
-
def multisetdict(pairs: Iterable[
|
|
107
|
+
def multisetdict(pairs: Iterable[tuple[X, Y]]) -> dict[X, set[Y]]:
|
|
109
108
|
"""Accumulate a multisetdict from a list of pairs."""
|
|
110
109
|
rv = defaultdict(set)
|
|
111
110
|
for key, value in pairs:
|
|
@@ -118,7 +117,7 @@ def write_map_tsv(
|
|
|
118
117
|
*,
|
|
119
118
|
path: Union[str, Path],
|
|
120
119
|
header: Optional[Iterable[str]] = None,
|
|
121
|
-
rv: Union[Iterable[
|
|
120
|
+
rv: Union[Iterable[tuple[str, str]], Mapping[str, str]],
|
|
122
121
|
sep: str = "\t",
|
|
123
122
|
) -> None:
|
|
124
123
|
"""Write a mapping dictionary to a TSV file."""
|
|
@@ -132,7 +131,7 @@ def write_multimap_tsv(
|
|
|
132
131
|
*,
|
|
133
132
|
path: Union[str, Path],
|
|
134
133
|
header: Iterable[str],
|
|
135
|
-
rv: Mapping[str,
|
|
134
|
+
rv: Mapping[str, list[str]],
|
|
136
135
|
sep: str = "\t",
|
|
137
136
|
) -> None:
|
|
138
137
|
"""Write a multiple mapping dictionary to a TSV file."""
|
|
@@ -144,7 +143,7 @@ def write_iterable_tsv(
|
|
|
144
143
|
*,
|
|
145
144
|
path: Union[str, Path],
|
|
146
145
|
header: Optional[Iterable[str]] = None,
|
|
147
|
-
it: Iterable[
|
|
146
|
+
it: Iterable[tuple[str, ...]],
|
|
148
147
|
sep: str = "\t",
|
|
149
148
|
) -> None:
|
|
150
149
|
"""Write a mapping dictionary to a TSV file."""
|
pyobo/utils/iter.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Tools for iterating over things."""
|
|
4
2
|
|
|
5
3
|
import csv
|
|
6
4
|
import gzip
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from typing import TypeVar
|
|
8
7
|
|
|
9
8
|
from more_itertools import peekable
|
|
10
9
|
|
|
@@ -18,7 +17,7 @@ Z = TypeVar("Z")
|
|
|
18
17
|
Y = TypeVar("Y")
|
|
19
18
|
|
|
20
19
|
|
|
21
|
-
def iterate_gzips_together(a_path, b_path) -> Iterable[
|
|
20
|
+
def iterate_gzips_together(a_path, b_path) -> Iterable[tuple[str, str, list[str]]]:
|
|
22
21
|
"""Iterate over two gzipped files together."""
|
|
23
22
|
with gzip.open(a_path, mode="rt", errors="ignore") as a, gzip.open(b_path, mode="rt") as b:
|
|
24
23
|
a = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
|
|
@@ -27,8 +26,8 @@ def iterate_gzips_together(a_path, b_path) -> Iterable[Tuple[str, str, List[str]
|
|
|
27
26
|
|
|
28
27
|
|
|
29
28
|
def iterate_together(
|
|
30
|
-
a: Iterable[
|
|
31
|
-
) -> Iterable[
|
|
29
|
+
a: Iterable[tuple[X, Y]], b: Iterable[tuple[X, Z]]
|
|
30
|
+
) -> Iterable[tuple[X, Y, list[Z]]]:
|
|
32
31
|
"""Iterate over two sorted lists that have the same keys.
|
|
33
32
|
|
|
34
33
|
The lists have to have the following invariants:
|
pyobo/utils/misc.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Miscellaneous utilities."""
|
|
4
2
|
|
|
5
3
|
import gzip
|
|
6
4
|
import logging
|
|
7
5
|
import os
|
|
8
6
|
from datetime import datetime
|
|
9
|
-
from subprocess import check_output
|
|
7
|
+
from subprocess import check_output
|
|
10
8
|
from typing import Optional
|
|
11
9
|
|
|
12
10
|
__all__ = [
|
pyobo/utils/ndex_utils.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities for caching files from NDEx."""
|
|
4
2
|
|
|
5
3
|
import json
|
|
6
4
|
import os
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable, Mapping
|
|
6
|
+
from typing import Any
|
|
8
7
|
|
|
9
8
|
import requests
|
|
10
9
|
from tqdm.auto import tqdm
|
|
@@ -21,7 +20,7 @@ __all__ = [
|
|
|
21
20
|
NDEX_BASE_URL = "http://public.ndexbio.org/v2"
|
|
22
21
|
NETWORK_ENDPOINT = f"{NDEX_BASE_URL}/network"
|
|
23
22
|
NETWORKSET_ENDPOINT = f"{NDEX_BASE_URL}/networkset"
|
|
24
|
-
CX =
|
|
23
|
+
CX = list[Mapping[str, Any]]
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
def iterate_aspect(cx: CX, aspect: str) -> Iterable[Any]:
|
|
@@ -47,7 +46,7 @@ def ensure_ndex_network(prefix: str, uuid: str, force: bool = False) -> CX:
|
|
|
47
46
|
|
|
48
47
|
def ensure_ndex_network_set(
|
|
49
48
|
prefix: str, uuid: str, use_tqdm: bool = False, force: bool = False
|
|
50
|
-
) -> Iterable[
|
|
49
|
+
) -> Iterable[tuple[str, CX]]:
|
|
51
50
|
"""Ensure the list of networks that goes with NCI PID on NDEx."""
|
|
52
51
|
it = _help_ensure_ndex_network_set(prefix, uuid, force=force)
|
|
53
52
|
if use_tqdm:
|
|
@@ -56,7 +55,7 @@ def ensure_ndex_network_set(
|
|
|
56
55
|
yield network_uuid, ensure_ndex_network(prefix, network_uuid, force=force)
|
|
57
56
|
|
|
58
57
|
|
|
59
|
-
def _help_ensure_ndex_network_set(prefix: str, uuid: str, force: bool = False) ->
|
|
58
|
+
def _help_ensure_ndex_network_set(prefix: str, uuid: str, force: bool = False) -> list[str]:
|
|
60
59
|
"""Ensure the list of networks that goes with NCI PID on NDEx."""
|
|
61
60
|
networkset_path = prefix_directory_join(prefix, name="networks.txt")
|
|
62
61
|
if os.path.exists(networkset_path) and not force:
|
|
@@ -69,5 +68,5 @@ def _help_ensure_ndex_network_set(prefix: str, uuid: str, force: bool = False) -
|
|
|
69
68
|
network_uuids = res_json["networks"]
|
|
70
69
|
with open(networkset_path, "w") as file:
|
|
71
70
|
for network_uuid in sorted(network_uuids):
|
|
72
|
-
print(network_uuid, file=file)
|
|
71
|
+
print(network_uuid, file=file)
|
|
73
72
|
return network_uuids
|
pyobo/utils/path.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities for building paths."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from pathlib import Path
|
|
7
|
-
from typing import Any, Callable,
|
|
5
|
+
from typing import Any, Callable, Literal, Optional, Union
|
|
8
6
|
|
|
9
7
|
import pandas as pd
|
|
10
8
|
import requests_ftp
|
|
@@ -46,7 +44,8 @@ def prefix_directory_join(
|
|
|
46
44
|
logger.info("[%s] got version %s", prefix, version)
|
|
47
45
|
elif not isinstance(version, str):
|
|
48
46
|
raise TypeError(f"Invalid type: {version} ({type(version)})")
|
|
49
|
-
|
|
47
|
+
if version is None:
|
|
48
|
+
raise AssertionError
|
|
50
49
|
version = cleanup_version(version, prefix=prefix)
|
|
51
50
|
if version is not None and "/" in version:
|
|
52
51
|
raise ValueError(f"[{prefix}] Can not have slash in version: {version}")
|
|
@@ -78,7 +77,7 @@ def ensure_path(
|
|
|
78
77
|
if not path.exists() and error_on_missing:
|
|
79
78
|
raise FileNotFoundError
|
|
80
79
|
|
|
81
|
-
kwargs:
|
|
80
|
+
kwargs: dict[str, Any]
|
|
82
81
|
if verify:
|
|
83
82
|
kwargs = {"backend": backend}
|
|
84
83
|
else:
|
pyobo/version.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Version information for PyOBO.
|
|
4
2
|
|
|
5
3
|
Run with ``python -m pyobo.version``
|
|
6
4
|
"""
|
|
7
5
|
|
|
8
6
|
import os
|
|
9
|
-
from subprocess import CalledProcessError, check_output
|
|
7
|
+
from subprocess import CalledProcessError, check_output
|
|
10
8
|
|
|
11
9
|
__all__ = [
|
|
12
10
|
"VERSION",
|
|
@@ -14,14 +12,14 @@ __all__ = [
|
|
|
14
12
|
"get_git_hash",
|
|
15
13
|
]
|
|
16
14
|
|
|
17
|
-
VERSION = "0.
|
|
15
|
+
VERSION = "0.11.0"
|
|
18
16
|
|
|
19
17
|
|
|
20
18
|
def get_git_hash() -> str:
|
|
21
19
|
"""Get the PyOBO git hash."""
|
|
22
20
|
with open(os.devnull, "w") as devnull:
|
|
23
21
|
try:
|
|
24
|
-
ret = check_output(
|
|
22
|
+
ret = check_output(
|
|
25
23
|
["git", "rev-parse", "HEAD"],
|
|
26
24
|
cwd=os.path.dirname(__file__),
|
|
27
25
|
stderr=devnull,
|
pyobo/xrefdb/__init__.py
CHANGED
pyobo/xrefdb/canonicalizer.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Tools for canonicalizing a CURIE based on a priority list."""
|
|
4
2
|
|
|
3
|
+
from collections.abc import Iterable, Mapping
|
|
5
4
|
from dataclasses import dataclass, field
|
|
6
5
|
from functools import lru_cache
|
|
7
|
-
from typing import
|
|
6
|
+
from typing import Optional
|
|
8
7
|
|
|
9
8
|
import networkx as nx
|
|
10
9
|
import pandas as pd
|
|
@@ -34,7 +33,7 @@ class Canonicalizer:
|
|
|
34
33
|
graph: nx.Graph
|
|
35
34
|
|
|
36
35
|
#: A list of prefixes. The ones with the lower index are higher priority
|
|
37
|
-
priority: Optional[
|
|
36
|
+
priority: Optional[list[str]] = None
|
|
38
37
|
|
|
39
38
|
#: Longest length paths allowed
|
|
40
39
|
cutoff: int = 5
|
|
@@ -54,7 +53,7 @@ class Canonicalizer:
|
|
|
54
53
|
def _get_priority_dict(self, curie: str) -> Mapping[str, int]:
|
|
55
54
|
return dict(self._iterate_priority_targets(curie))
|
|
56
55
|
|
|
57
|
-
def _iterate_priority_targets(self, curie: str) -> Iterable[
|
|
56
|
+
def _iterate_priority_targets(self, curie: str) -> Iterable[tuple[str, int]]:
|
|
58
57
|
for target in nx.single_source_shortest_path(self.graph, curie, cutoff=self.cutoff):
|
|
59
58
|
priority = self._key(target)
|
|
60
59
|
if priority is not None:
|
|
@@ -79,20 +78,20 @@ class Canonicalizer:
|
|
|
79
78
|
return cls._get_default_helper(priority=priority)
|
|
80
79
|
|
|
81
80
|
@classmethod
|
|
82
|
-
@lru_cache
|
|
83
|
-
def _get_default_helper(cls, priority: Optional[
|
|
81
|
+
@lru_cache
|
|
82
|
+
def _get_default_helper(cls, priority: Optional[tuple[str, ...]] = None) -> "Canonicalizer":
|
|
84
83
|
"""Help get the default canonicalizer."""
|
|
85
84
|
graph = cls._get_default_graph()
|
|
86
85
|
return cls(graph=graph, priority=list(priority) if priority else None)
|
|
87
86
|
|
|
88
87
|
@staticmethod
|
|
89
|
-
@lru_cache
|
|
88
|
+
@lru_cache
|
|
90
89
|
def _get_default_graph() -> nx.Graph:
|
|
91
90
|
df = resource_utils.ensure_inspector_javert_df()
|
|
92
91
|
graph = get_graph_from_xref_df(df)
|
|
93
92
|
return graph
|
|
94
93
|
|
|
95
|
-
def iterate_flat_mapping(self, use_tqdm: bool = True) -> Iterable[
|
|
94
|
+
def iterate_flat_mapping(self, use_tqdm: bool = True) -> Iterable[tuple[str, str]]:
|
|
96
95
|
"""Iterate over the canonical mapping from all nodes to their canonical CURIEs."""
|
|
97
96
|
nodes = self.graph.nodes()
|
|
98
97
|
if use_tqdm:
|
|
@@ -114,13 +113,13 @@ class Canonicalizer:
|
|
|
114
113
|
self,
|
|
115
114
|
curie: str,
|
|
116
115
|
cutoff: Optional[int] = None,
|
|
117
|
-
) -> Optional[Mapping[str,
|
|
116
|
+
) -> Optional[Mapping[str, list[Mapping[str, str]]]]:
|
|
118
117
|
"""Get all shortest paths between given entity and its equivalent entities."""
|
|
119
118
|
return single_source_shortest_path(graph=self.graph, curie=curie, cutoff=cutoff)
|
|
120
119
|
|
|
121
120
|
def all_shortest_paths(
|
|
122
121
|
self, source_curie: str, target_curie: str
|
|
123
|
-
) ->
|
|
122
|
+
) -> list[list[Mapping[str, str]]]:
|
|
124
123
|
"""Get all shortest paths between the two entities."""
|
|
125
124
|
return all_shortest_paths(
|
|
126
125
|
graph=self.graph, source_curie=source_curie, target_curie=target_curie
|
|
@@ -134,11 +133,14 @@ class Canonicalizer:
|
|
|
134
133
|
|
|
135
134
|
def all_shortest_paths(
|
|
136
135
|
graph: nx.Graph, source_curie: str, target_curie: str
|
|
137
|
-
) ->
|
|
136
|
+
) -> list[list[Mapping[str, str]]]:
|
|
138
137
|
"""Get all shortest paths between the two CURIEs."""
|
|
139
138
|
_paths = nx.all_shortest_paths(graph, source=source_curie, target=target_curie)
|
|
140
139
|
return [
|
|
141
|
-
[
|
|
140
|
+
[
|
|
141
|
+
{"source": s, "target": t, "provenance": graph[s][t]["source"]}
|
|
142
|
+
for s, t in pairwise(_path)
|
|
143
|
+
]
|
|
142
144
|
for _path in _paths
|
|
143
145
|
]
|
|
144
146
|
|
|
@@ -147,7 +149,7 @@ def single_source_shortest_path(
|
|
|
147
149
|
graph: nx.Graph,
|
|
148
150
|
curie: str,
|
|
149
151
|
cutoff: Optional[int] = None,
|
|
150
|
-
) -> Optional[Mapping[str,
|
|
152
|
+
) -> Optional[Mapping[str, list[Mapping[str, str]]]]:
|
|
151
153
|
"""Get the shortest path from the CURIE to all elements of its equivalence class.
|
|
152
154
|
|
|
153
155
|
Things that didn't work:
|
|
@@ -156,7 +158,9 @@ def single_source_shortest_path(
|
|
|
156
158
|
------------
|
|
157
159
|
.. code-block:: python
|
|
158
160
|
|
|
159
|
-
for curies in tqdm(
|
|
161
|
+
for curies in tqdm(
|
|
162
|
+
nx.connected_components(graph), desc="filling connected components", unit_scale=True
|
|
163
|
+
):
|
|
160
164
|
for c1, c2 in itt.combinations(curies, r=2):
|
|
161
165
|
if not graph.has_edge(c1, c2):
|
|
162
166
|
graph.add_edge(c1, c2, inferred=True)
|
|
@@ -165,7 +169,9 @@ def single_source_shortest_path(
|
|
|
165
169
|
------------
|
|
166
170
|
.. code-block:: python
|
|
167
171
|
|
|
168
|
-
for curie in tqdm(
|
|
172
|
+
for curie in tqdm(
|
|
173
|
+
graph, total=graph.number_of_nodes(), desc="mapping connected components", unit_scale=True
|
|
174
|
+
):
|
|
169
175
|
for incident_curie in nx.node_connected_component(graph, curie):
|
|
170
176
|
if not graph.has_edge(curie, incident_curie):
|
|
171
177
|
graph.add_edge(curie, incident_curie, inferred=True)
|
|
@@ -177,13 +183,16 @@ def single_source_shortest_path(
|
|
|
177
183
|
return None
|
|
178
184
|
rv = nx.single_source_shortest_path(graph, curie, cutoff=cutoff)
|
|
179
185
|
return {
|
|
180
|
-
k: [
|
|
186
|
+
k: [
|
|
187
|
+
{"source": s, "target": t, "provenance": graph[s][t]["provenance"]}
|
|
188
|
+
for s, t in pairwise(v)
|
|
189
|
+
]
|
|
181
190
|
for k, v in rv.items()
|
|
182
191
|
if k != curie # don't map to self
|
|
183
192
|
}
|
|
184
193
|
|
|
185
194
|
|
|
186
|
-
def get_equivalent(curie: str, cutoff: Optional[int] = None) ->
|
|
195
|
+
def get_equivalent(curie: str, cutoff: Optional[int] = None) -> set[str]:
|
|
187
196
|
"""Get equivalent CURIEs."""
|
|
188
197
|
canonicalizer = Canonicalizer.get_default()
|
|
189
198
|
r = canonicalizer.single_source_shortest_path(curie=curie, cutoff=cutoff)
|
pyobo/xrefdb/priority.py
CHANGED
pyobo/xrefdb/sources/__init__.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Sources of xrefs not from OBO."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
6
5
|
from functools import lru_cache
|
|
7
|
-
from typing import Callable,
|
|
6
|
+
from typing import Callable, Optional
|
|
8
7
|
|
|
9
8
|
import pandas as pd
|
|
10
9
|
from class_resolver import FunctionResolver
|
|
@@ -22,7 +21,7 @@ logger = logging.getLogger(__name__)
|
|
|
22
21
|
XrefGetter = Callable[[], pd.DataFrame]
|
|
23
22
|
|
|
24
23
|
|
|
25
|
-
@lru_cache
|
|
24
|
+
@lru_cache
|
|
26
25
|
def _get_xref_plugins() -> Mapping[str, XrefGetter]:
|
|
27
26
|
resolver: FunctionResolver[XrefGetter] = FunctionResolver.from_entrypoint("pyobo.xrefs")
|
|
28
27
|
return resolver.lookup_dict
|
pyobo/xrefdb/sources/cbms2019.py
CHANGED
pyobo/xrefdb/sources/chembl.py
CHANGED
pyobo/xrefdb/sources/compath.py
CHANGED
pyobo/xrefdb/sources/famplex.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Get FamPlex xrefs."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
4
|
+
from collections.abc import Mapping
|
|
6
5
|
from functools import lru_cache
|
|
7
|
-
from typing import Mapping, Tuple
|
|
8
6
|
|
|
9
7
|
import bioregistry
|
|
10
8
|
import pandas as pd
|
|
@@ -50,8 +48,8 @@ def get_famplex_xrefs_df(force: bool = False) -> pd.DataFrame:
|
|
|
50
48
|
return df
|
|
51
49
|
|
|
52
50
|
|
|
53
|
-
@lru_cache
|
|
54
|
-
def get_remapping(force: bool = False) -> Mapping[
|
|
51
|
+
@lru_cache
|
|
52
|
+
def get_remapping(force: bool = False) -> Mapping[tuple[str, str], tuple[str, str, str]]:
|
|
55
53
|
"""Get a mapping from database/identifier pairs to famplex identifiers."""
|
|
56
54
|
df = _get_famplex_df(force=force)
|
|
57
55
|
rv = {}
|
pyobo/xrefdb/sources/gilda.py
CHANGED
pyobo/xrefdb/sources/intact.py
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Get the xrefs from IntAct."""
|
|
4
2
|
|
|
5
|
-
from
|
|
3
|
+
from collections.abc import Mapping
|
|
6
4
|
|
|
7
5
|
import pandas as pd
|
|
8
6
|
|
|
@@ -49,7 +47,8 @@ def get_complexportal_mapping() -> Mapping[str, str]:
|
|
|
49
47
|
.. code-block:: python
|
|
50
48
|
|
|
51
49
|
from pyobo import get_filtered_xrefs
|
|
52
|
-
|
|
50
|
+
|
|
51
|
+
intact_complexportal_mapping = get_filtered_xrefs("intact", "complexportal")
|
|
53
52
|
"""
|
|
54
53
|
|
|
55
54
|
@cached_mapping(
|
|
@@ -87,7 +86,8 @@ def get_reactome_mapping() -> Mapping[str, str]:
|
|
|
87
86
|
.. code-block:: python
|
|
88
87
|
|
|
89
88
|
from pyobo import get_filtered_xrefs
|
|
90
|
-
|
|
89
|
+
|
|
90
|
+
intact_complexportal_mapping = get_filtered_xrefs("intact", "reactome")
|
|
91
91
|
"""
|
|
92
92
|
|
|
93
93
|
@cached_mapping(
|
pyobo/xrefdb/sources/ncit.py
CHANGED
pyobo/xrefdb/sources/pubchem.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Get xrefs from PubChem Compound to MeSH."""
|
|
4
2
|
|
|
5
3
|
from typing import Optional
|
|
6
4
|
|
|
7
5
|
import pandas as pd
|
|
8
6
|
|
|
9
|
-
from ...api.utils import
|
|
7
|
+
from ...api.utils import safe_get_version
|
|
10
8
|
from ...constants import XREF_COLUMNS
|
|
11
9
|
from ...sources.pubchem import _get_pubchem_extras_url, get_pubchem_id_to_mesh_id
|
|
12
10
|
|
|
@@ -18,8 +16,7 @@ __all__ = [
|
|
|
18
16
|
def get_pubchem_mesh_df(version: Optional[str] = None) -> pd.DataFrame:
|
|
19
17
|
"""Get PubChem Compound-MeSH xrefs."""
|
|
20
18
|
if version is None:
|
|
21
|
-
version =
|
|
22
|
-
assert version is not None
|
|
19
|
+
version = safe_get_version("pubchem")
|
|
23
20
|
cid_mesh_url = _get_pubchem_extras_url(version, "CID-MeSH")
|
|
24
21
|
return pd.DataFrame(
|
|
25
22
|
[
|
pyobo/xrefdb/sources/wikidata.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Get Wikidata xrefs.
|
|
4
2
|
|
|
5
3
|
Run with ``python -m pyobo.xrefdb.sources.wikidata``.
|
|
@@ -7,7 +5,7 @@ Run with ``python -m pyobo.xrefdb.sources.wikidata``.
|
|
|
7
5
|
|
|
8
6
|
import json
|
|
9
7
|
import logging
|
|
10
|
-
from
|
|
8
|
+
from collections.abc import Iterable
|
|
11
9
|
|
|
12
10
|
import bioregistry
|
|
13
11
|
import click
|
|
@@ -68,7 +66,7 @@ def get_wikidata_df(prefix: str, wikidata_property: str) -> pd.DataFrame:
|
|
|
68
66
|
|
|
69
67
|
def iter_wikidata_mappings(
|
|
70
68
|
wikidata_property: str, *, cache: bool = True
|
|
71
|
-
) -> Iterable[
|
|
69
|
+
) -> Iterable[tuple[str, str]]:
|
|
72
70
|
"""Iterate over Wikidata xrefs."""
|
|
73
71
|
path = WIKIDATA_MAPPING_DIRECTORY.join(name=f"{wikidata_property}.json")
|
|
74
72
|
if path.exists() and cache:
|