pyobo 0.10.11__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +0 -2
- pyobo/__main__.py +0 -2
- pyobo/api/__init__.py +0 -2
- pyobo/api/alts.py +6 -7
- pyobo/api/hierarchy.py +14 -15
- pyobo/api/metadata.py +3 -4
- pyobo/api/names.py +51 -31
- pyobo/api/properties.py +6 -7
- pyobo/api/relations.py +12 -11
- pyobo/api/species.py +5 -6
- pyobo/api/typedefs.py +1 -3
- pyobo/api/utils.py +63 -2
- pyobo/api/xrefs.py +4 -5
- pyobo/aws.py +3 -5
- pyobo/cli/__init__.py +0 -2
- pyobo/cli/aws.py +0 -2
- pyobo/cli/cli.py +0 -4
- pyobo/cli/database.py +1 -3
- pyobo/cli/lookup.py +2 -4
- pyobo/cli/utils.py +0 -2
- pyobo/constants.py +0 -3
- pyobo/getters.py +19 -26
- pyobo/gilda_utils.py +28 -8
- pyobo/identifier_utils.py +32 -15
- pyobo/mocks.py +5 -6
- pyobo/normalizer.py +24 -24
- pyobo/obographs.py +3 -3
- pyobo/plugins.py +3 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +19 -21
- pyobo/registries/__init__.py +0 -2
- pyobo/registries/metaregistry.py +6 -8
- pyobo/resource_utils.py +1 -3
- pyobo/resources/__init__.py +0 -2
- pyobo/resources/ncbitaxon.py +2 -3
- pyobo/resources/ro.py +2 -4
- pyobo/sources/README.md +15 -0
- pyobo/sources/__init__.py +2 -2
- pyobo/sources/agrovoc.py +3 -3
- pyobo/sources/antibodyregistry.py +4 -5
- pyobo/sources/biogrid.py +7 -7
- pyobo/sources/ccle.py +3 -4
- pyobo/sources/cgnc.py +1 -3
- pyobo/sources/chebi.py +2 -4
- pyobo/sources/chembl.py +1 -3
- pyobo/sources/civic_gene.py +2 -3
- pyobo/sources/complexportal.py +3 -5
- pyobo/sources/conso.py +2 -4
- pyobo/sources/cpt.py +1 -3
- pyobo/sources/credit.py +68 -0
- pyobo/sources/cvx.py +1 -3
- pyobo/sources/depmap.py +3 -4
- pyobo/sources/dictybase_gene.py +1 -3
- pyobo/sources/drugbank.py +6 -7
- pyobo/sources/drugbank_salt.py +3 -4
- pyobo/sources/drugcentral.py +5 -7
- pyobo/sources/expasy.py +11 -12
- pyobo/sources/famplex.py +3 -5
- pyobo/sources/flybase.py +2 -4
- pyobo/sources/geonames.py +28 -10
- pyobo/sources/gmt_utils.py +5 -6
- pyobo/sources/go.py +4 -6
- pyobo/sources/gwascentral_phenotype.py +1 -3
- pyobo/sources/gwascentral_study.py +2 -3
- pyobo/sources/hgnc.py +8 -9
- pyobo/sources/hgncgenefamily.py +2 -4
- pyobo/sources/icd10.py +3 -4
- pyobo/sources/icd11.py +3 -4
- pyobo/sources/icd_utils.py +6 -7
- pyobo/sources/interpro.py +3 -5
- pyobo/sources/itis.py +1 -3
- pyobo/sources/kegg/__init__.py +0 -2
- pyobo/sources/kegg/api.py +3 -4
- pyobo/sources/kegg/genes.py +3 -4
- pyobo/sources/kegg/genome.py +1 -3
- pyobo/sources/kegg/pathway.py +5 -6
- pyobo/sources/mesh.py +19 -21
- pyobo/sources/mgi.py +1 -3
- pyobo/sources/mirbase.py +4 -6
- pyobo/sources/mirbase_constants.py +0 -2
- pyobo/sources/mirbase_family.py +1 -3
- pyobo/sources/mirbase_mature.py +1 -3
- pyobo/sources/msigdb.py +4 -5
- pyobo/sources/ncbigene.py +3 -5
- pyobo/sources/npass.py +2 -4
- pyobo/sources/omim_ps.py +1 -3
- pyobo/sources/pathbank.py +3 -5
- pyobo/sources/pfam.py +1 -3
- pyobo/sources/pfam_clan.py +1 -3
- pyobo/sources/pid.py +3 -5
- pyobo/sources/pombase.py +1 -3
- pyobo/sources/pubchem.py +5 -6
- pyobo/sources/reactome.py +2 -4
- pyobo/sources/rgd.py +3 -4
- pyobo/sources/rhea.py +9 -10
- pyobo/sources/ror.py +69 -22
- pyobo/sources/selventa/__init__.py +0 -2
- pyobo/sources/selventa/schem.py +1 -3
- pyobo/sources/selventa/scomp.py +1 -3
- pyobo/sources/selventa/sdis.py +1 -3
- pyobo/sources/selventa/sfam.py +1 -3
- pyobo/sources/sgd.py +1 -3
- pyobo/sources/slm.py +1 -3
- pyobo/sources/umls/__init__.py +0 -2
- pyobo/sources/umls/__main__.py +0 -2
- pyobo/sources/umls/get_synonym_types.py +1 -1
- pyobo/sources/umls/umls.py +2 -4
- pyobo/sources/uniprot/__init__.py +0 -2
- pyobo/sources/uniprot/uniprot.py +6 -6
- pyobo/sources/uniprot/uniprot_ptm.py +6 -5
- pyobo/sources/utils.py +3 -5
- pyobo/sources/wikipathways.py +1 -3
- pyobo/sources/zfin.py +2 -3
- pyobo/ssg/__init__.py +3 -2
- pyobo/struct/__init__.py +0 -2
- pyobo/struct/reference.py +13 -15
- pyobo/struct/struct.py +106 -99
- pyobo/struct/typedef.py +19 -10
- pyobo/struct/utils.py +0 -2
- pyobo/utils/__init__.py +0 -2
- pyobo/utils/cache.py +14 -6
- pyobo/utils/io.py +9 -10
- pyobo/utils/iter.py +5 -6
- pyobo/utils/misc.py +1 -3
- pyobo/utils/ndex_utils.py +6 -7
- pyobo/utils/path.py +5 -5
- pyobo/version.py +3 -5
- pyobo/xrefdb/__init__.py +0 -2
- pyobo/xrefdb/canonicalizer.py +27 -18
- pyobo/xrefdb/priority.py +0 -2
- pyobo/xrefdb/sources/__init__.py +9 -7
- pyobo/xrefdb/sources/biomappings.py +0 -2
- pyobo/xrefdb/sources/cbms2019.py +0 -2
- pyobo/xrefdb/sources/chembl.py +5 -7
- pyobo/xrefdb/sources/compath.py +1 -3
- pyobo/xrefdb/sources/famplex.py +3 -5
- pyobo/xrefdb/sources/gilda.py +0 -2
- pyobo/xrefdb/sources/intact.py +5 -5
- pyobo/xrefdb/sources/ncit.py +1 -3
- pyobo/xrefdb/sources/pubchem.py +2 -4
- pyobo/xrefdb/sources/wikidata.py +10 -5
- pyobo/xrefdb/xrefs_pipeline.py +15 -16
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/LICENSE +1 -1
- pyobo-0.11.0.dist-info/METADATA +723 -0
- pyobo-0.11.0.dist-info/RECORD +171 -0
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/WHEEL +1 -1
- pyobo-0.11.0.dist-info/entry_points.txt +2 -0
- pyobo/xrefdb/bengo.py +0 -44
- pyobo-0.10.11.dist-info/METADATA +0 -499
- pyobo-0.10.11.dist-info/RECORD +0 -169
- pyobo-0.10.11.dist-info/entry_points.txt +0 -15
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/top_level.txt +0 -0
pyobo/struct/typedef.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Default typedefs, references, and other structures."""
|
|
4
2
|
|
|
3
|
+
from collections.abc import Iterable
|
|
5
4
|
from dataclasses import dataclass, field
|
|
6
|
-
from typing import
|
|
5
|
+
from typing import Optional, Union
|
|
7
6
|
|
|
8
7
|
from .reference import Reference, Referenced
|
|
9
8
|
from ..identifier_utils import normalize_curie
|
|
@@ -42,9 +41,13 @@ __all__ = [
|
|
|
42
41
|
"has_participant",
|
|
43
42
|
"exact_match",
|
|
44
43
|
"has_dbxref",
|
|
44
|
+
"located_in",
|
|
45
|
+
"has_successor",
|
|
46
|
+
"has_predecessor",
|
|
45
47
|
# Properties
|
|
46
48
|
"has_inchi",
|
|
47
49
|
"has_smiles",
|
|
50
|
+
"has_homepage",
|
|
48
51
|
]
|
|
49
52
|
|
|
50
53
|
|
|
@@ -67,11 +70,11 @@ class TypeDef(Referenced):
|
|
|
67
70
|
is_symmetric: Optional[bool] = None
|
|
68
71
|
domain: Optional[Reference] = None
|
|
69
72
|
range: Optional[Reference] = None
|
|
70
|
-
parents:
|
|
71
|
-
xrefs:
|
|
73
|
+
parents: list[Reference] = field(default_factory=list)
|
|
74
|
+
xrefs: list[Reference] = field(default_factory=list)
|
|
72
75
|
inverse: Optional[Reference] = None
|
|
73
76
|
created_by: Optional[str] = None
|
|
74
|
-
holds_over_chain: Optional[
|
|
77
|
+
holds_over_chain: Optional[list[Reference]] = None
|
|
75
78
|
#: Whether this relationship is a metadata tag. Properties that are marked as metadata tags are
|
|
76
79
|
#: used to record object metadata. Object metadata is additional information about an object
|
|
77
80
|
#: that is useful to track, but does not impact the definition of the object or how it should
|
|
@@ -79,7 +82,7 @@ class TypeDef(Referenced):
|
|
|
79
82
|
#: structured notes about a term, for example.
|
|
80
83
|
is_metadata_tag: Optional[bool] = None
|
|
81
84
|
|
|
82
|
-
def __hash__(self) -> int:
|
|
85
|
+
def __hash__(self) -> int:
|
|
83
86
|
return hash((self.__class__, self.prefix, self.identifier))
|
|
84
87
|
|
|
85
88
|
def iterate_obo_lines(self) -> Iterable[str]:
|
|
@@ -136,10 +139,10 @@ class TypeDef(Referenced):
|
|
|
136
139
|
return cls.from_triple(prefix=prefix, identifier=identifier, name=name)
|
|
137
140
|
|
|
138
141
|
|
|
139
|
-
RelationHint = Union[Reference, TypeDef,
|
|
142
|
+
RelationHint = Union[Reference, TypeDef, tuple[str, str], str]
|
|
140
143
|
|
|
141
144
|
|
|
142
|
-
def get_reference_tuple(relation: RelationHint) ->
|
|
145
|
+
def get_reference_tuple(relation: RelationHint) -> tuple[str, str]:
|
|
143
146
|
"""Get tuple for typedef/reference."""
|
|
144
147
|
if isinstance(relation, (Reference, TypeDef)):
|
|
145
148
|
return relation.prefix, relation.identifier
|
|
@@ -323,6 +326,9 @@ enabled_by = TypeDef(reference=_enabled_by_reference, inverse=_enables_reference
|
|
|
323
326
|
has_input = TypeDef.from_triple(prefix=RO_PREFIX, identifier="0002233", name="has input")
|
|
324
327
|
has_output = TypeDef.from_triple(prefix=RO_PREFIX, identifier="0002234", name="has output")
|
|
325
328
|
|
|
329
|
+
has_successor = TypeDef.from_triple(prefix="BFO", identifier="0000063", name="has successor")
|
|
330
|
+
has_predecessor = TypeDef.from_triple(prefix="BFO", identifier="0000062", name="has predecessor")
|
|
331
|
+
|
|
326
332
|
"""ChEBI"""
|
|
327
333
|
|
|
328
334
|
is_conjugate_base_of = TypeDef(
|
|
@@ -355,8 +361,11 @@ has_inchi = TypeDef(
|
|
|
355
361
|
reference=Reference(prefix="debio", identifier="0000020", name="has InChI"),
|
|
356
362
|
)
|
|
357
363
|
|
|
364
|
+
has_homepage = TypeDef(
|
|
365
|
+
reference=Reference(prefix="foaf", identifier="homepage", name="homepage"), is_metadata_tag=True
|
|
366
|
+
)
|
|
358
367
|
|
|
359
|
-
default_typedefs:
|
|
368
|
+
default_typedefs: dict[tuple[str, str], TypeDef] = {
|
|
360
369
|
v.pair: v for k, v in locals().items() if isinstance(v, TypeDef)
|
|
361
370
|
}
|
|
362
371
|
|
pyobo/struct/utils.py
CHANGED
pyobo/utils/__init__.py
CHANGED
pyobo/utils/cache.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities for caching files."""
|
|
4
2
|
|
|
5
3
|
import gzip
|
|
6
4
|
import json
|
|
7
5
|
import logging
|
|
8
6
|
import os
|
|
7
|
+
from collections.abc import Iterable, Mapping
|
|
9
8
|
from pathlib import Path
|
|
10
|
-
from typing import Generic,
|
|
9
|
+
from typing import Generic, TypeVar, Union
|
|
11
10
|
|
|
12
11
|
import networkx as nx
|
|
13
12
|
from pystow.cache import Cached
|
|
@@ -46,6 +45,7 @@ class _CachedMapping(Cached[X], Generic[X]):
|
|
|
46
45
|
use_tqdm: bool = False,
|
|
47
46
|
force: bool = False,
|
|
48
47
|
):
|
|
48
|
+
"""Initialize the mapping cache."""
|
|
49
49
|
super().__init__(path=path, force=force)
|
|
50
50
|
self.header = header
|
|
51
51
|
self.use_tqdm = use_tqdm
|
|
@@ -55,9 +55,11 @@ class CachedMapping(_CachedMapping[Mapping[str, str]]):
|
|
|
55
55
|
"""A cache for simple mappings."""
|
|
56
56
|
|
|
57
57
|
def load(self) -> Mapping[str, str]:
|
|
58
|
+
"""Load a TSV file."""
|
|
58
59
|
return open_map_tsv(self.path, use_tqdm=self.use_tqdm)
|
|
59
60
|
|
|
60
61
|
def dump(self, rv: Mapping[str, str]) -> None:
|
|
62
|
+
"""Write a TSV file."""
|
|
61
63
|
write_map_tsv(path=self.path, header=self.header, rv=rv)
|
|
62
64
|
|
|
63
65
|
|
|
@@ -77,23 +79,29 @@ def write_gzipped_graph(graph: nx.MultiDiGraph, path: Union[str, Path]) -> None:
|
|
|
77
79
|
|
|
78
80
|
|
|
79
81
|
class CachedGraph(Cached[nx.MultiDiGraph]):
|
|
82
|
+
"""A cache for multidigraphs."""
|
|
83
|
+
|
|
80
84
|
def load(self) -> nx.MultiDiGraph:
|
|
85
|
+
"""Load a graph file."""
|
|
81
86
|
return get_gzipped_graph(self.path)
|
|
82
87
|
|
|
83
88
|
def dump(self, rv: nx.MultiDiGraph) -> None:
|
|
89
|
+
"""Write a graph file."""
|
|
84
90
|
write_gzipped_graph(rv, self.path)
|
|
85
91
|
|
|
86
92
|
|
|
87
93
|
cached_graph = CachedGraph
|
|
88
94
|
|
|
89
95
|
|
|
90
|
-
class CachedMultidict(_CachedMapping[Mapping[str,
|
|
96
|
+
class CachedMultidict(_CachedMapping[Mapping[str, list[str]]]):
|
|
91
97
|
"""A cache for complex mappings."""
|
|
92
98
|
|
|
93
|
-
def load(self) -> Mapping[str,
|
|
99
|
+
def load(self) -> Mapping[str, list[str]]:
|
|
100
|
+
"""Load a TSV file representing a multimap."""
|
|
94
101
|
return open_multimap_tsv(self.path, use_tqdm=self.use_tqdm)
|
|
95
102
|
|
|
96
|
-
def dump(self, rv: Mapping[str,
|
|
103
|
+
def dump(self, rv: Mapping[str, list[str]]) -> None:
|
|
104
|
+
"""Write a TSV file representing a multimap."""
|
|
97
105
|
write_multimap_tsv(path=self.path, header=self.header, rv=rv)
|
|
98
106
|
|
|
99
107
|
|
pyobo/utils/io.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""I/O utilities."""
|
|
4
2
|
|
|
5
3
|
import collections.abc
|
|
@@ -8,9 +6,10 @@ import gzip
|
|
|
8
6
|
import logging
|
|
9
7
|
import time
|
|
10
8
|
from collections import defaultdict
|
|
9
|
+
from collections.abc import Iterable, Mapping
|
|
11
10
|
from contextlib import contextmanager
|
|
12
11
|
from pathlib import Path
|
|
13
|
-
from typing import
|
|
12
|
+
from typing import Optional, TypeVar, Union
|
|
14
13
|
from xml.etree.ElementTree import Element
|
|
15
14
|
|
|
16
15
|
import pandas as pd
|
|
@@ -78,7 +77,7 @@ def open_multimap_tsv(
|
|
|
78
77
|
*,
|
|
79
78
|
use_tqdm: bool = False,
|
|
80
79
|
has_header: bool = True,
|
|
81
|
-
) -> Mapping[str,
|
|
80
|
+
) -> Mapping[str, list[str]]:
|
|
82
81
|
"""Load a mapping TSV file that has multiple mappings for each."""
|
|
83
82
|
return multidict(_help_multimap_tsv(path=path, use_tqdm=use_tqdm, has_header=has_header))
|
|
84
83
|
|
|
@@ -88,7 +87,7 @@ def _help_multimap_tsv(
|
|
|
88
87
|
*,
|
|
89
88
|
use_tqdm: bool = False,
|
|
90
89
|
has_header: bool = True,
|
|
91
|
-
) -> Iterable[
|
|
90
|
+
) -> Iterable[tuple[str, str]]:
|
|
92
91
|
with open(path) as file:
|
|
93
92
|
if has_header:
|
|
94
93
|
next(file) # throw away header
|
|
@@ -97,7 +96,7 @@ def _help_multimap_tsv(
|
|
|
97
96
|
yield from get_reader(file)
|
|
98
97
|
|
|
99
98
|
|
|
100
|
-
def multidict(pairs: Iterable[
|
|
99
|
+
def multidict(pairs: Iterable[tuple[X, Y]]) -> Mapping[X, list[Y]]:
|
|
101
100
|
"""Accumulate a multidict from a list of pairs."""
|
|
102
101
|
rv = defaultdict(list)
|
|
103
102
|
for key, value in pairs:
|
|
@@ -105,7 +104,7 @@ def multidict(pairs: Iterable[Tuple[X, Y]]) -> Mapping[X, List[Y]]:
|
|
|
105
104
|
return dict(rv)
|
|
106
105
|
|
|
107
106
|
|
|
108
|
-
def multisetdict(pairs: Iterable[
|
|
107
|
+
def multisetdict(pairs: Iterable[tuple[X, Y]]) -> dict[X, set[Y]]:
|
|
109
108
|
"""Accumulate a multisetdict from a list of pairs."""
|
|
110
109
|
rv = defaultdict(set)
|
|
111
110
|
for key, value in pairs:
|
|
@@ -118,7 +117,7 @@ def write_map_tsv(
|
|
|
118
117
|
*,
|
|
119
118
|
path: Union[str, Path],
|
|
120
119
|
header: Optional[Iterable[str]] = None,
|
|
121
|
-
rv: Union[Iterable[
|
|
120
|
+
rv: Union[Iterable[tuple[str, str]], Mapping[str, str]],
|
|
122
121
|
sep: str = "\t",
|
|
123
122
|
) -> None:
|
|
124
123
|
"""Write a mapping dictionary to a TSV file."""
|
|
@@ -132,7 +131,7 @@ def write_multimap_tsv(
|
|
|
132
131
|
*,
|
|
133
132
|
path: Union[str, Path],
|
|
134
133
|
header: Iterable[str],
|
|
135
|
-
rv: Mapping[str,
|
|
134
|
+
rv: Mapping[str, list[str]],
|
|
136
135
|
sep: str = "\t",
|
|
137
136
|
) -> None:
|
|
138
137
|
"""Write a multiple mapping dictionary to a TSV file."""
|
|
@@ -144,7 +143,7 @@ def write_iterable_tsv(
|
|
|
144
143
|
*,
|
|
145
144
|
path: Union[str, Path],
|
|
146
145
|
header: Optional[Iterable[str]] = None,
|
|
147
|
-
it: Iterable[
|
|
146
|
+
it: Iterable[tuple[str, ...]],
|
|
148
147
|
sep: str = "\t",
|
|
149
148
|
) -> None:
|
|
150
149
|
"""Write a mapping dictionary to a TSV file."""
|
pyobo/utils/iter.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Tools for iterating over things."""
|
|
4
2
|
|
|
5
3
|
import csv
|
|
6
4
|
import gzip
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from typing import TypeVar
|
|
8
7
|
|
|
9
8
|
from more_itertools import peekable
|
|
10
9
|
|
|
@@ -18,7 +17,7 @@ Z = TypeVar("Z")
|
|
|
18
17
|
Y = TypeVar("Y")
|
|
19
18
|
|
|
20
19
|
|
|
21
|
-
def iterate_gzips_together(a_path, b_path) -> Iterable[
|
|
20
|
+
def iterate_gzips_together(a_path, b_path) -> Iterable[tuple[str, str, list[str]]]:
|
|
22
21
|
"""Iterate over two gzipped files together."""
|
|
23
22
|
with gzip.open(a_path, mode="rt", errors="ignore") as a, gzip.open(b_path, mode="rt") as b:
|
|
24
23
|
a = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
|
|
@@ -27,8 +26,8 @@ def iterate_gzips_together(a_path, b_path) -> Iterable[Tuple[str, str, List[str]
|
|
|
27
26
|
|
|
28
27
|
|
|
29
28
|
def iterate_together(
|
|
30
|
-
a: Iterable[
|
|
31
|
-
) -> Iterable[
|
|
29
|
+
a: Iterable[tuple[X, Y]], b: Iterable[tuple[X, Z]]
|
|
30
|
+
) -> Iterable[tuple[X, Y, list[Z]]]:
|
|
32
31
|
"""Iterate over two sorted lists that have the same keys.
|
|
33
32
|
|
|
34
33
|
The lists have to have the following invariants:
|
pyobo/utils/misc.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Miscellaneous utilities."""
|
|
4
2
|
|
|
5
3
|
import gzip
|
|
6
4
|
import logging
|
|
7
5
|
import os
|
|
8
6
|
from datetime import datetime
|
|
9
|
-
from subprocess import check_output
|
|
7
|
+
from subprocess import check_output
|
|
10
8
|
from typing import Optional
|
|
11
9
|
|
|
12
10
|
__all__ = [
|
pyobo/utils/ndex_utils.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities for caching files from NDEx."""
|
|
4
2
|
|
|
5
3
|
import json
|
|
6
4
|
import os
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable, Mapping
|
|
6
|
+
from typing import Any
|
|
8
7
|
|
|
9
8
|
import requests
|
|
10
9
|
from tqdm.auto import tqdm
|
|
@@ -21,7 +20,7 @@ __all__ = [
|
|
|
21
20
|
NDEX_BASE_URL = "http://public.ndexbio.org/v2"
|
|
22
21
|
NETWORK_ENDPOINT = f"{NDEX_BASE_URL}/network"
|
|
23
22
|
NETWORKSET_ENDPOINT = f"{NDEX_BASE_URL}/networkset"
|
|
24
|
-
CX =
|
|
23
|
+
CX = list[Mapping[str, Any]]
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
def iterate_aspect(cx: CX, aspect: str) -> Iterable[Any]:
|
|
@@ -47,7 +46,7 @@ def ensure_ndex_network(prefix: str, uuid: str, force: bool = False) -> CX:
|
|
|
47
46
|
|
|
48
47
|
def ensure_ndex_network_set(
|
|
49
48
|
prefix: str, uuid: str, use_tqdm: bool = False, force: bool = False
|
|
50
|
-
) -> Iterable[
|
|
49
|
+
) -> Iterable[tuple[str, CX]]:
|
|
51
50
|
"""Ensure the list of networks that goes with NCI PID on NDEx."""
|
|
52
51
|
it = _help_ensure_ndex_network_set(prefix, uuid, force=force)
|
|
53
52
|
if use_tqdm:
|
|
@@ -56,7 +55,7 @@ def ensure_ndex_network_set(
|
|
|
56
55
|
yield network_uuid, ensure_ndex_network(prefix, network_uuid, force=force)
|
|
57
56
|
|
|
58
57
|
|
|
59
|
-
def _help_ensure_ndex_network_set(prefix: str, uuid: str, force: bool = False) ->
|
|
58
|
+
def _help_ensure_ndex_network_set(prefix: str, uuid: str, force: bool = False) -> list[str]:
|
|
60
59
|
"""Ensure the list of networks that goes with NCI PID on NDEx."""
|
|
61
60
|
networkset_path = prefix_directory_join(prefix, name="networks.txt")
|
|
62
61
|
if os.path.exists(networkset_path) and not force:
|
|
@@ -69,5 +68,5 @@ def _help_ensure_ndex_network_set(prefix: str, uuid: str, force: bool = False) -
|
|
|
69
68
|
network_uuids = res_json["networks"]
|
|
70
69
|
with open(networkset_path, "w") as file:
|
|
71
70
|
for network_uuid in sorted(network_uuids):
|
|
72
|
-
print(network_uuid, file=file)
|
|
71
|
+
print(network_uuid, file=file)
|
|
73
72
|
return network_uuids
|
pyobo/utils/path.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities for building paths."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from pathlib import Path
|
|
7
|
-
from typing import Any, Callable,
|
|
5
|
+
from typing import Any, Callable, Literal, Optional, Union
|
|
8
6
|
|
|
9
7
|
import pandas as pd
|
|
10
8
|
import requests_ftp
|
|
@@ -25,7 +23,7 @@ __all__ = [
|
|
|
25
23
|
|
|
26
24
|
logger = logging.getLogger(__name__)
|
|
27
25
|
|
|
28
|
-
VersionHint = Union[None, str, Callable[[], str]]
|
|
26
|
+
VersionHint = Union[None, str, Callable[[], Optional[str]]]
|
|
29
27
|
|
|
30
28
|
requests_ftp.monkeypatch_session()
|
|
31
29
|
|
|
@@ -46,6 +44,8 @@ def prefix_directory_join(
|
|
|
46
44
|
logger.info("[%s] got version %s", prefix, version)
|
|
47
45
|
elif not isinstance(version, str):
|
|
48
46
|
raise TypeError(f"Invalid type: {version} ({type(version)})")
|
|
47
|
+
if version is None:
|
|
48
|
+
raise AssertionError
|
|
49
49
|
version = cleanup_version(version, prefix=prefix)
|
|
50
50
|
if version is not None and "/" in version:
|
|
51
51
|
raise ValueError(f"[{prefix}] Can not have slash in version: {version}")
|
|
@@ -77,7 +77,7 @@ def ensure_path(
|
|
|
77
77
|
if not path.exists() and error_on_missing:
|
|
78
78
|
raise FileNotFoundError
|
|
79
79
|
|
|
80
|
-
kwargs:
|
|
80
|
+
kwargs: dict[str, Any]
|
|
81
81
|
if verify:
|
|
82
82
|
kwargs = {"backend": backend}
|
|
83
83
|
else:
|
pyobo/version.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Version information for PyOBO.
|
|
4
2
|
|
|
5
3
|
Run with ``python -m pyobo.version``
|
|
6
4
|
"""
|
|
7
5
|
|
|
8
6
|
import os
|
|
9
|
-
from subprocess import CalledProcessError, check_output
|
|
7
|
+
from subprocess import CalledProcessError, check_output
|
|
10
8
|
|
|
11
9
|
__all__ = [
|
|
12
10
|
"VERSION",
|
|
@@ -14,14 +12,14 @@ __all__ = [
|
|
|
14
12
|
"get_git_hash",
|
|
15
13
|
]
|
|
16
14
|
|
|
17
|
-
VERSION = "0.
|
|
15
|
+
VERSION = "0.11.0"
|
|
18
16
|
|
|
19
17
|
|
|
20
18
|
def get_git_hash() -> str:
|
|
21
19
|
"""Get the PyOBO git hash."""
|
|
22
20
|
with open(os.devnull, "w") as devnull:
|
|
23
21
|
try:
|
|
24
|
-
ret = check_output(
|
|
22
|
+
ret = check_output(
|
|
25
23
|
["git", "rev-parse", "HEAD"],
|
|
26
24
|
cwd=os.path.dirname(__file__),
|
|
27
25
|
stderr=devnull,
|
pyobo/xrefdb/__init__.py
CHANGED
pyobo/xrefdb/canonicalizer.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Tools for canonicalizing a CURIE based on a priority list."""
|
|
4
2
|
|
|
3
|
+
from collections.abc import Iterable, Mapping
|
|
5
4
|
from dataclasses import dataclass, field
|
|
6
5
|
from functools import lru_cache
|
|
7
|
-
from typing import
|
|
6
|
+
from typing import Optional
|
|
8
7
|
|
|
9
8
|
import networkx as nx
|
|
10
9
|
import pandas as pd
|
|
@@ -34,7 +33,7 @@ class Canonicalizer:
|
|
|
34
33
|
graph: nx.Graph
|
|
35
34
|
|
|
36
35
|
#: A list of prefixes. The ones with the lower index are higher priority
|
|
37
|
-
priority: Optional[
|
|
36
|
+
priority: Optional[list[str]] = None
|
|
38
37
|
|
|
39
38
|
#: Longest length paths allowed
|
|
40
39
|
cutoff: int = 5
|
|
@@ -54,7 +53,7 @@ class Canonicalizer:
|
|
|
54
53
|
def _get_priority_dict(self, curie: str) -> Mapping[str, int]:
|
|
55
54
|
return dict(self._iterate_priority_targets(curie))
|
|
56
55
|
|
|
57
|
-
def _iterate_priority_targets(self, curie: str) -> Iterable[
|
|
56
|
+
def _iterate_priority_targets(self, curie: str) -> Iterable[tuple[str, int]]:
|
|
58
57
|
for target in nx.single_source_shortest_path(self.graph, curie, cutoff=self.cutoff):
|
|
59
58
|
priority = self._key(target)
|
|
60
59
|
if priority is not None:
|
|
@@ -79,20 +78,20 @@ class Canonicalizer:
|
|
|
79
78
|
return cls._get_default_helper(priority=priority)
|
|
80
79
|
|
|
81
80
|
@classmethod
|
|
82
|
-
@lru_cache
|
|
83
|
-
def _get_default_helper(cls, priority: Optional[
|
|
81
|
+
@lru_cache
|
|
82
|
+
def _get_default_helper(cls, priority: Optional[tuple[str, ...]] = None) -> "Canonicalizer":
|
|
84
83
|
"""Help get the default canonicalizer."""
|
|
85
84
|
graph = cls._get_default_graph()
|
|
86
85
|
return cls(graph=graph, priority=list(priority) if priority else None)
|
|
87
86
|
|
|
88
87
|
@staticmethod
|
|
89
|
-
@lru_cache
|
|
88
|
+
@lru_cache
|
|
90
89
|
def _get_default_graph() -> nx.Graph:
|
|
91
90
|
df = resource_utils.ensure_inspector_javert_df()
|
|
92
91
|
graph = get_graph_from_xref_df(df)
|
|
93
92
|
return graph
|
|
94
93
|
|
|
95
|
-
def iterate_flat_mapping(self, use_tqdm: bool = True) -> Iterable[
|
|
94
|
+
def iterate_flat_mapping(self, use_tqdm: bool = True) -> Iterable[tuple[str, str]]:
|
|
96
95
|
"""Iterate over the canonical mapping from all nodes to their canonical CURIEs."""
|
|
97
96
|
nodes = self.graph.nodes()
|
|
98
97
|
if use_tqdm:
|
|
@@ -114,13 +113,13 @@ class Canonicalizer:
|
|
|
114
113
|
self,
|
|
115
114
|
curie: str,
|
|
116
115
|
cutoff: Optional[int] = None,
|
|
117
|
-
) -> Optional[Mapping[str,
|
|
116
|
+
) -> Optional[Mapping[str, list[Mapping[str, str]]]]:
|
|
118
117
|
"""Get all shortest paths between given entity and its equivalent entities."""
|
|
119
118
|
return single_source_shortest_path(graph=self.graph, curie=curie, cutoff=cutoff)
|
|
120
119
|
|
|
121
120
|
def all_shortest_paths(
|
|
122
121
|
self, source_curie: str, target_curie: str
|
|
123
|
-
) ->
|
|
122
|
+
) -> list[list[Mapping[str, str]]]:
|
|
124
123
|
"""Get all shortest paths between the two entities."""
|
|
125
124
|
return all_shortest_paths(
|
|
126
125
|
graph=self.graph, source_curie=source_curie, target_curie=target_curie
|
|
@@ -134,11 +133,14 @@ class Canonicalizer:
|
|
|
134
133
|
|
|
135
134
|
def all_shortest_paths(
|
|
136
135
|
graph: nx.Graph, source_curie: str, target_curie: str
|
|
137
|
-
) ->
|
|
136
|
+
) -> list[list[Mapping[str, str]]]:
|
|
138
137
|
"""Get all shortest paths between the two CURIEs."""
|
|
139
138
|
_paths = nx.all_shortest_paths(graph, source=source_curie, target=target_curie)
|
|
140
139
|
return [
|
|
141
|
-
[
|
|
140
|
+
[
|
|
141
|
+
{"source": s, "target": t, "provenance": graph[s][t]["source"]}
|
|
142
|
+
for s, t in pairwise(_path)
|
|
143
|
+
]
|
|
142
144
|
for _path in _paths
|
|
143
145
|
]
|
|
144
146
|
|
|
@@ -147,7 +149,7 @@ def single_source_shortest_path(
|
|
|
147
149
|
graph: nx.Graph,
|
|
148
150
|
curie: str,
|
|
149
151
|
cutoff: Optional[int] = None,
|
|
150
|
-
) -> Optional[Mapping[str,
|
|
152
|
+
) -> Optional[Mapping[str, list[Mapping[str, str]]]]:
|
|
151
153
|
"""Get the shortest path from the CURIE to all elements of its equivalence class.
|
|
152
154
|
|
|
153
155
|
Things that didn't work:
|
|
@@ -156,7 +158,9 @@ def single_source_shortest_path(
|
|
|
156
158
|
------------
|
|
157
159
|
.. code-block:: python
|
|
158
160
|
|
|
159
|
-
for curies in tqdm(
|
|
161
|
+
for curies in tqdm(
|
|
162
|
+
nx.connected_components(graph), desc="filling connected components", unit_scale=True
|
|
163
|
+
):
|
|
160
164
|
for c1, c2 in itt.combinations(curies, r=2):
|
|
161
165
|
if not graph.has_edge(c1, c2):
|
|
162
166
|
graph.add_edge(c1, c2, inferred=True)
|
|
@@ -165,7 +169,9 @@ def single_source_shortest_path(
|
|
|
165
169
|
------------
|
|
166
170
|
.. code-block:: python
|
|
167
171
|
|
|
168
|
-
for curie in tqdm(
|
|
172
|
+
for curie in tqdm(
|
|
173
|
+
graph, total=graph.number_of_nodes(), desc="mapping connected components", unit_scale=True
|
|
174
|
+
):
|
|
169
175
|
for incident_curie in nx.node_connected_component(graph, curie):
|
|
170
176
|
if not graph.has_edge(curie, incident_curie):
|
|
171
177
|
graph.add_edge(curie, incident_curie, inferred=True)
|
|
@@ -177,13 +183,16 @@ def single_source_shortest_path(
|
|
|
177
183
|
return None
|
|
178
184
|
rv = nx.single_source_shortest_path(graph, curie, cutoff=cutoff)
|
|
179
185
|
return {
|
|
180
|
-
k: [
|
|
186
|
+
k: [
|
|
187
|
+
{"source": s, "target": t, "provenance": graph[s][t]["provenance"]}
|
|
188
|
+
for s, t in pairwise(v)
|
|
189
|
+
]
|
|
181
190
|
for k, v in rv.items()
|
|
182
191
|
if k != curie # don't map to self
|
|
183
192
|
}
|
|
184
193
|
|
|
185
194
|
|
|
186
|
-
def get_equivalent(curie: str, cutoff: Optional[int] = None) ->
|
|
195
|
+
def get_equivalent(curie: str, cutoff: Optional[int] = None) -> set[str]:
|
|
187
196
|
"""Get equivalent CURIEs."""
|
|
188
197
|
canonicalizer = Canonicalizer.get_default()
|
|
189
198
|
r = canonicalizer.single_source_shortest_path(curie=curie, cutoff=cutoff)
|
pyobo/xrefdb/priority.py
CHANGED
pyobo/xrefdb/sources/__init__.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Sources of xrefs not from OBO."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
6
5
|
from functools import lru_cache
|
|
7
|
-
from typing import Callable,
|
|
6
|
+
from typing import Callable, Optional
|
|
8
7
|
|
|
9
8
|
import pandas as pd
|
|
10
|
-
from
|
|
9
|
+
from class_resolver import FunctionResolver
|
|
11
10
|
from tqdm.auto import tqdm
|
|
12
11
|
|
|
13
12
|
__all__ = [
|
|
@@ -19,10 +18,13 @@ __all__ = [
|
|
|
19
18
|
|
|
20
19
|
logger = logging.getLogger(__name__)
|
|
21
20
|
|
|
21
|
+
XrefGetter = Callable[[], pd.DataFrame]
|
|
22
|
+
|
|
22
23
|
|
|
23
|
-
@lru_cache
|
|
24
|
-
def _get_xref_plugins() -> Mapping[str,
|
|
25
|
-
|
|
24
|
+
@lru_cache
|
|
25
|
+
def _get_xref_plugins() -> Mapping[str, XrefGetter]:
|
|
26
|
+
resolver: FunctionResolver[XrefGetter] = FunctionResolver.from_entrypoint("pyobo.xrefs")
|
|
27
|
+
return resolver.lookup_dict
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
def has_xref_plugin(prefix: str) -> bool:
|
pyobo/xrefdb/sources/cbms2019.py
CHANGED
pyobo/xrefdb/sources/chembl.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Get ChEMBL xrefs."""
|
|
4
2
|
|
|
5
3
|
from typing import Optional
|
|
6
4
|
|
|
7
|
-
import bioversions
|
|
8
5
|
import pandas as pd
|
|
9
6
|
|
|
7
|
+
from pyobo.api.utils import get_version
|
|
10
8
|
from pyobo.constants import (
|
|
11
9
|
PROVENANCE,
|
|
12
10
|
SOURCE_ID,
|
|
@@ -26,7 +24,7 @@ def get_chembl_compound_equivalences_raw(
|
|
|
26
24
|
) -> pd.DataFrame:
|
|
27
25
|
"""Get the chemical representations raw dataframe."""
|
|
28
26
|
if version is None:
|
|
29
|
-
version =
|
|
27
|
+
version = get_version("chembl")
|
|
30
28
|
|
|
31
29
|
base_url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}"
|
|
32
30
|
url = f"{base_url}/chembl_{version}_chemreps.txt.gz"
|
|
@@ -36,7 +34,7 @@ def get_chembl_compound_equivalences_raw(
|
|
|
36
34
|
def get_chembl_compound_equivalences(version: Optional[str] = None) -> pd.DataFrame:
|
|
37
35
|
"""Get ChEMBL chemical equivalences."""
|
|
38
36
|
if version is None:
|
|
39
|
-
version =
|
|
37
|
+
version = get_version("chembl")
|
|
40
38
|
|
|
41
39
|
df = get_chembl_compound_equivalences_raw(version=version)
|
|
42
40
|
rows = []
|
|
@@ -55,7 +53,7 @@ def get_chembl_compound_equivalences(version: Optional[str] = None) -> pd.DataFr
|
|
|
55
53
|
def get_chembl_protein_equivalences(version: Optional[str] = None) -> pd.DataFrame:
|
|
56
54
|
"""Get ChEMBL protein equivalences."""
|
|
57
55
|
if version is None:
|
|
58
|
-
version =
|
|
56
|
+
version = get_version("chembl")
|
|
59
57
|
|
|
60
58
|
url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}/chembl_uniprot_mapping.txt"
|
|
61
59
|
df = ensure_df(
|
|
@@ -75,7 +73,7 @@ def get_chembl_protein_equivalences(version: Optional[str] = None) -> pd.DataFra
|
|
|
75
73
|
def get_chembl_xrefs_df(version: Optional[str] = None) -> pd.DataFrame:
|
|
76
74
|
"""Get all ChEBML equivalences."""
|
|
77
75
|
if version is None:
|
|
78
|
-
version =
|
|
76
|
+
version = get_version("chembl")
|
|
79
77
|
|
|
80
78
|
return pd.concat(
|
|
81
79
|
[
|