pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
- pyobo/apps/__init__.py +0 -3
- pyobo/apps/cli.py +0 -24
- pyobo/apps/gilda/__init__.py +0 -3
- pyobo/apps/gilda/__main__.py +0 -8
- pyobo/apps/gilda/app.py +0 -48
- pyobo/apps/gilda/cli.py +0 -36
- pyobo/apps/gilda/templates/base.html +0 -33
- pyobo/apps/gilda/templates/home.html +0 -11
- pyobo/apps/gilda/templates/matches.html +0 -32
- pyobo/apps/mapper/__init__.py +0 -3
- pyobo/apps/mapper/__main__.py +0 -11
- pyobo/apps/mapper/cli.py +0 -37
- pyobo/apps/mapper/mapper.py +0 -187
- pyobo/apps/mapper/templates/base.html +0 -35
- pyobo/apps/mapper/templates/mapper_home.html +0 -64
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.1.dist-info/RECORD +0 -173
- pyobo-0.11.1.dist-info/WHEEL +0 -5
- pyobo-0.11.1.dist-info/top_level.txt +0 -1
pyobo/sources/icd11.py
DELETED
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
"""Convert ICD11 to OBO.
|
|
2
|
-
|
|
3
|
-
Run with python -m pyobo.sources.icd11 -v
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import json
|
|
7
|
-
import logging
|
|
8
|
-
import os
|
|
9
|
-
from collections.abc import Iterable, Mapping
|
|
10
|
-
from typing import Any
|
|
11
|
-
|
|
12
|
-
import click
|
|
13
|
-
from more_click import verbose_option
|
|
14
|
-
from tqdm.auto import tqdm
|
|
15
|
-
|
|
16
|
-
from ..sources.icd_utils import (
|
|
17
|
-
ICD11_TOP_LEVEL_URL,
|
|
18
|
-
get_child_identifiers,
|
|
19
|
-
get_icd,
|
|
20
|
-
visiter,
|
|
21
|
-
)
|
|
22
|
-
from ..struct import Obo, Reference, Synonym, Term
|
|
23
|
-
from ..utils.path import prefix_directory_join
|
|
24
|
-
|
|
25
|
-
__all__ = [
|
|
26
|
-
"ICD11Getter",
|
|
27
|
-
]
|
|
28
|
-
|
|
29
|
-
logger = logging.getLogger(__name__)
|
|
30
|
-
|
|
31
|
-
PREFIX = "icd11"
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class ICD11Getter(Obo):
|
|
35
|
-
"""An ontology representation of ICD-11."""
|
|
36
|
-
|
|
37
|
-
ontology = PREFIX
|
|
38
|
-
dynamic_version = True
|
|
39
|
-
|
|
40
|
-
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
41
|
-
"""Iterate over terms in the ontology."""
|
|
42
|
-
return iterate_icd11()
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def get_obo() -> Obo:
|
|
46
|
-
"""Get ICD11 as OBO."""
|
|
47
|
-
return ICD11Getter()
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def iterate_icd11() -> Iterable[Term]:
|
|
51
|
-
"""Iterate over the terms in ICD11.
|
|
52
|
-
|
|
53
|
-
The API doesn't seem to have a rate limit, but returns pretty slow.
|
|
54
|
-
This means that it only gets results at at about 5 calls/second.
|
|
55
|
-
Get ready to be patient - the API token expires every hour so there's
|
|
56
|
-
a caching mechanism with :mod:`cachier` that gets a new one every hour.
|
|
57
|
-
"""
|
|
58
|
-
res = get_icd(ICD11_TOP_LEVEL_URL)
|
|
59
|
-
res_json = res.json()
|
|
60
|
-
|
|
61
|
-
version = res_json["releaseId"]
|
|
62
|
-
directory = prefix_directory_join(PREFIX, version=version)
|
|
63
|
-
|
|
64
|
-
with open(os.path.join(directory, "top.json"), "w") as file:
|
|
65
|
-
json.dump(res_json, file, indent=2)
|
|
66
|
-
|
|
67
|
-
tqdm.write(f'There are {len(res_json["child"])} top level entities')
|
|
68
|
-
|
|
69
|
-
visited_identifiers: set[str] = set()
|
|
70
|
-
for identifier in get_child_identifiers(ICD11_TOP_LEVEL_URL, res_json):
|
|
71
|
-
yield from visiter(
|
|
72
|
-
identifier,
|
|
73
|
-
visited_identifiers,
|
|
74
|
-
directory,
|
|
75
|
-
endpoint=ICD11_TOP_LEVEL_URL,
|
|
76
|
-
converter=_extract_icd11,
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def _extract_icd11(res_json: Mapping[str, Any]) -> Term:
|
|
81
|
-
identifier = res_json["@id"][len(ICD11_TOP_LEVEL_URL) :].lstrip("/")
|
|
82
|
-
definition = res_json["definition"]["@value"] if "definition" in res_json else None
|
|
83
|
-
name = res_json["title"]["@value"]
|
|
84
|
-
synonyms = [Synonym(synonym["label"]["@value"]) for synonym in res_json.get("synonym", [])]
|
|
85
|
-
parents = [
|
|
86
|
-
Reference(prefix=PREFIX, identifier=url[len("http://id.who.int/icd/entity/") :])
|
|
87
|
-
for url in res_json["parent"]
|
|
88
|
-
if url[len("http://id.who.int/icd/entity/") :]
|
|
89
|
-
]
|
|
90
|
-
return Term(
|
|
91
|
-
reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
|
|
92
|
-
definition=definition,
|
|
93
|
-
synonyms=synonyms,
|
|
94
|
-
parents=parents,
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
@click.command()
|
|
99
|
-
@verbose_option
|
|
100
|
-
def _main():
|
|
101
|
-
get_obo().write_default(use_tqdm=True)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
if __name__ == "__main__":
|
|
105
|
-
_main()
|
pyobo/xrefdb/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Extraction of mappings from OBO documents."""
|
pyobo/xrefdb/canonicalizer.py
DELETED
|
@@ -1,214 +0,0 @@
|
|
|
1
|
-
"""Tools for canonicalizing a CURIE based on a priority list."""
|
|
2
|
-
|
|
3
|
-
from collections.abc import Iterable, Mapping
|
|
4
|
-
from dataclasses import dataclass, field
|
|
5
|
-
from functools import lru_cache
|
|
6
|
-
from typing import Optional
|
|
7
|
-
|
|
8
|
-
import networkx as nx
|
|
9
|
-
import pandas as pd
|
|
10
|
-
from more_itertools import pairwise
|
|
11
|
-
from tqdm.auto import tqdm
|
|
12
|
-
|
|
13
|
-
from .priority import DEFAULT_PRIORITY_LIST
|
|
14
|
-
from .xrefs_pipeline import get_graph_from_xref_df
|
|
15
|
-
from .. import resource_utils
|
|
16
|
-
from ..utils.io import get_reader, get_writer
|
|
17
|
-
|
|
18
|
-
__all__ = [
|
|
19
|
-
"Canonicalizer",
|
|
20
|
-
"all_shortest_paths",
|
|
21
|
-
"single_source_shortest_path",
|
|
22
|
-
"get_equivalent",
|
|
23
|
-
"get_priority_curie",
|
|
24
|
-
"remap_file_stream",
|
|
25
|
-
]
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@dataclass
|
|
29
|
-
class Canonicalizer:
|
|
30
|
-
"""Wraps a graph and priority list to allow getting the best identifier."""
|
|
31
|
-
|
|
32
|
-
#: A graph from :func:`get_graph_from_xref_df`
|
|
33
|
-
graph: nx.Graph
|
|
34
|
-
|
|
35
|
-
#: A list of prefixes. The ones with the lower index are higher priority
|
|
36
|
-
priority: Optional[list[str]] = None
|
|
37
|
-
|
|
38
|
-
#: Longest length paths allowed
|
|
39
|
-
cutoff: int = 5
|
|
40
|
-
|
|
41
|
-
_priority: Mapping[str, int] = field(init=False)
|
|
42
|
-
|
|
43
|
-
def __post_init__(self):
|
|
44
|
-
"""Initialize the priority map based on the priority list."""
|
|
45
|
-
if self.priority is None:
|
|
46
|
-
self.priority = DEFAULT_PRIORITY_LIST
|
|
47
|
-
self._priority = {entry: len(self.priority) - i for i, entry in enumerate(self.priority)}
|
|
48
|
-
|
|
49
|
-
def _key(self, curie: str) -> Optional[int]:
|
|
50
|
-
prefix = self.graph.nodes[curie]["prefix"]
|
|
51
|
-
return self._priority.get(prefix)
|
|
52
|
-
|
|
53
|
-
def _get_priority_dict(self, curie: str) -> Mapping[str, int]:
|
|
54
|
-
return dict(self._iterate_priority_targets(curie))
|
|
55
|
-
|
|
56
|
-
def _iterate_priority_targets(self, curie: str) -> Iterable[tuple[str, int]]:
|
|
57
|
-
for target in nx.single_source_shortest_path(self.graph, curie, cutoff=self.cutoff):
|
|
58
|
-
priority = self._key(target)
|
|
59
|
-
if priority is not None:
|
|
60
|
-
yield target, priority
|
|
61
|
-
elif target == curie:
|
|
62
|
-
yield target, 0
|
|
63
|
-
else:
|
|
64
|
-
yield target, -1
|
|
65
|
-
|
|
66
|
-
def canonicalize(self, curie: str) -> str:
|
|
67
|
-
"""Get the best CURIE from the given CURIE."""
|
|
68
|
-
if curie not in self.graph:
|
|
69
|
-
return curie
|
|
70
|
-
priority_dict = self._get_priority_dict(curie)
|
|
71
|
-
return max(priority_dict, key=priority_dict.get) # type:ignore
|
|
72
|
-
|
|
73
|
-
@classmethod
|
|
74
|
-
def get_default(cls, priority: Optional[Iterable[str]] = None) -> "Canonicalizer":
|
|
75
|
-
"""Get the default canonicalizer."""
|
|
76
|
-
if priority is not None:
|
|
77
|
-
priority = tuple(priority)
|
|
78
|
-
return cls._get_default_helper(priority=priority)
|
|
79
|
-
|
|
80
|
-
@classmethod
|
|
81
|
-
@lru_cache
|
|
82
|
-
def _get_default_helper(cls, priority: Optional[tuple[str, ...]] = None) -> "Canonicalizer":
|
|
83
|
-
"""Help get the default canonicalizer."""
|
|
84
|
-
graph = cls._get_default_graph()
|
|
85
|
-
return cls(graph=graph, priority=list(priority) if priority else None)
|
|
86
|
-
|
|
87
|
-
@staticmethod
|
|
88
|
-
@lru_cache
|
|
89
|
-
def _get_default_graph() -> nx.Graph:
|
|
90
|
-
df = resource_utils.ensure_inspector_javert_df()
|
|
91
|
-
graph = get_graph_from_xref_df(df)
|
|
92
|
-
return graph
|
|
93
|
-
|
|
94
|
-
def iterate_flat_mapping(self, use_tqdm: bool = True) -> Iterable[tuple[str, str]]:
|
|
95
|
-
"""Iterate over the canonical mapping from all nodes to their canonical CURIEs."""
|
|
96
|
-
nodes = self.graph.nodes()
|
|
97
|
-
if use_tqdm:
|
|
98
|
-
nodes = tqdm(
|
|
99
|
-
nodes,
|
|
100
|
-
total=self.graph.number_of_nodes(),
|
|
101
|
-
desc="building flat mapping",
|
|
102
|
-
unit_scale=True,
|
|
103
|
-
unit="CURIE",
|
|
104
|
-
)
|
|
105
|
-
for node in nodes:
|
|
106
|
-
yield node, self.canonicalize(node)
|
|
107
|
-
|
|
108
|
-
def get_flat_mapping(self, use_tqdm: bool = True) -> Mapping[str, str]:
|
|
109
|
-
"""Get a canonical mapping from all nodes to their canonical CURIEs."""
|
|
110
|
-
return dict(self.iterate_flat_mapping(use_tqdm=use_tqdm))
|
|
111
|
-
|
|
112
|
-
def single_source_shortest_path(
|
|
113
|
-
self,
|
|
114
|
-
curie: str,
|
|
115
|
-
cutoff: Optional[int] = None,
|
|
116
|
-
) -> Optional[Mapping[str, list[Mapping[str, str]]]]:
|
|
117
|
-
"""Get all shortest paths between given entity and its equivalent entities."""
|
|
118
|
-
return single_source_shortest_path(graph=self.graph, curie=curie, cutoff=cutoff)
|
|
119
|
-
|
|
120
|
-
def all_shortest_paths(
|
|
121
|
-
self, source_curie: str, target_curie: str
|
|
122
|
-
) -> list[list[Mapping[str, str]]]:
|
|
123
|
-
"""Get all shortest paths between the two entities."""
|
|
124
|
-
return all_shortest_paths(
|
|
125
|
-
graph=self.graph, source_curie=source_curie, target_curie=target_curie
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
@classmethod
|
|
129
|
-
def from_df(cls, df: pd.DataFrame) -> "Canonicalizer":
|
|
130
|
-
"""Instantiate from a dataframe."""
|
|
131
|
-
return cls(graph=get_graph_from_xref_df(df))
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def all_shortest_paths(
|
|
135
|
-
graph: nx.Graph, source_curie: str, target_curie: str
|
|
136
|
-
) -> list[list[Mapping[str, str]]]:
|
|
137
|
-
"""Get all shortest paths between the two CURIEs."""
|
|
138
|
-
_paths = nx.all_shortest_paths(graph, source=source_curie, target=target_curie)
|
|
139
|
-
return [
|
|
140
|
-
[
|
|
141
|
-
{"source": s, "target": t, "provenance": graph[s][t]["source"]}
|
|
142
|
-
for s, t in pairwise(_path)
|
|
143
|
-
]
|
|
144
|
-
for _path in _paths
|
|
145
|
-
]
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def single_source_shortest_path(
|
|
149
|
-
graph: nx.Graph,
|
|
150
|
-
curie: str,
|
|
151
|
-
cutoff: Optional[int] = None,
|
|
152
|
-
) -> Optional[Mapping[str, list[Mapping[str, str]]]]:
|
|
153
|
-
"""Get the shortest path from the CURIE to all elements of its equivalence class.
|
|
154
|
-
|
|
155
|
-
Things that didn't work:
|
|
156
|
-
|
|
157
|
-
Unresponsive
|
|
158
|
-
------------
|
|
159
|
-
.. code-block:: python
|
|
160
|
-
|
|
161
|
-
for curies in tqdm(
|
|
162
|
-
nx.connected_components(graph), desc="filling connected components", unit_scale=True
|
|
163
|
-
):
|
|
164
|
-
for c1, c2 in itt.combinations(curies, r=2):
|
|
165
|
-
if not graph.has_edge(c1, c2):
|
|
166
|
-
graph.add_edge(c1, c2, inferred=True)
|
|
167
|
-
|
|
168
|
-
Way too slow
|
|
169
|
-
------------
|
|
170
|
-
.. code-block:: python
|
|
171
|
-
|
|
172
|
-
for curie in tqdm(
|
|
173
|
-
graph, total=graph.number_of_nodes(), desc="mapping connected components", unit_scale=True
|
|
174
|
-
):
|
|
175
|
-
for incident_curie in nx.node_connected_component(graph, curie):
|
|
176
|
-
if not graph.has_edge(curie, incident_curie):
|
|
177
|
-
graph.add_edge(curie, incident_curie, inferred=True)
|
|
178
|
-
|
|
179
|
-
Also consider the condensation of the graph:
|
|
180
|
-
https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.components.condensation.html#networkx.algorithms.components.condensation
|
|
181
|
-
"""
|
|
182
|
-
if curie not in graph:
|
|
183
|
-
return None
|
|
184
|
-
rv = nx.single_source_shortest_path(graph, curie, cutoff=cutoff)
|
|
185
|
-
return {
|
|
186
|
-
k: [
|
|
187
|
-
{"source": s, "target": t, "provenance": graph[s][t]["provenance"]}
|
|
188
|
-
for s, t in pairwise(v)
|
|
189
|
-
]
|
|
190
|
-
for k, v in rv.items()
|
|
191
|
-
if k != curie # don't map to self
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
def get_equivalent(curie: str, cutoff: Optional[int] = None) -> set[str]:
|
|
196
|
-
"""Get equivalent CURIEs."""
|
|
197
|
-
canonicalizer = Canonicalizer.get_default()
|
|
198
|
-
r = canonicalizer.single_source_shortest_path(curie=curie, cutoff=cutoff)
|
|
199
|
-
return set(r or [])
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
def get_priority_curie(curie: str) -> str:
|
|
203
|
-
"""Get the priority CURIE mapped to the best namespace."""
|
|
204
|
-
canonicalizer = Canonicalizer.get_default()
|
|
205
|
-
return canonicalizer.canonicalize(curie)
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
def remap_file_stream(file_in, file_out, column: int, sep="\t") -> None:
|
|
209
|
-
"""Remap a file."""
|
|
210
|
-
reader = get_reader(file_in, sep=sep)
|
|
211
|
-
writer = get_writer(file_out, sep=sep)
|
|
212
|
-
for row in reader:
|
|
213
|
-
row[column] = get_priority_curie(row[column])
|
|
214
|
-
writer.writerow(row)
|
pyobo/xrefdb/priority.py
DELETED
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
"""Configuration for the default priority list."""
|
|
2
|
-
|
|
3
|
-
import bioregistry
|
|
4
|
-
|
|
5
|
-
__all__ = [
|
|
6
|
-
"DEFAULT_PRIORITY_LIST",
|
|
7
|
-
]
|
|
8
|
-
|
|
9
|
-
_DEFAULT_PRIORITY_LIST = [
|
|
10
|
-
# Genes
|
|
11
|
-
"ncbigene",
|
|
12
|
-
"hgnc",
|
|
13
|
-
"rgd",
|
|
14
|
-
"mgi",
|
|
15
|
-
"ensembl",
|
|
16
|
-
"uniprot",
|
|
17
|
-
# Chemicals
|
|
18
|
-
# 'inchikey',
|
|
19
|
-
# 'inchi',
|
|
20
|
-
# 'smiles',
|
|
21
|
-
"pubchem.compound",
|
|
22
|
-
"chebi",
|
|
23
|
-
"drugbank",
|
|
24
|
-
"chembl.compound",
|
|
25
|
-
"zinc",
|
|
26
|
-
# protein families and complexes (and famplexes :))
|
|
27
|
-
"complexportal",
|
|
28
|
-
"fplx",
|
|
29
|
-
"ec-code",
|
|
30
|
-
"interpro",
|
|
31
|
-
"pfam",
|
|
32
|
-
"signor",
|
|
33
|
-
# Pathologies/phenotypes
|
|
34
|
-
"mondo",
|
|
35
|
-
"efo",
|
|
36
|
-
"doid",
|
|
37
|
-
"hp",
|
|
38
|
-
# Taxa
|
|
39
|
-
"ncbitaxon",
|
|
40
|
-
# If you can get away from MeSH, do it
|
|
41
|
-
"mesh",
|
|
42
|
-
"icd",
|
|
43
|
-
]
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def _get_default_priority_list():
|
|
47
|
-
rv = []
|
|
48
|
-
for _entry in _DEFAULT_PRIORITY_LIST:
|
|
49
|
-
_prefix = bioregistry.normalize_prefix(_entry)
|
|
50
|
-
if not _prefix:
|
|
51
|
-
raise RuntimeError(f"unresolved prefix: {_entry}")
|
|
52
|
-
if _prefix in rv:
|
|
53
|
-
raise RuntimeError(f"duplicate found in priority list: {_entry}/{_prefix}")
|
|
54
|
-
rv.append(_prefix)
|
|
55
|
-
return rv
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
DEFAULT_PRIORITY_LIST = _get_default_priority_list()
|
|
59
|
-
del _get_default_priority_list
|
pyobo/xrefdb/sources/__init__.py
DELETED
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
"""Sources of xrefs not from OBO."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
from collections.abc import Iterable, Mapping
|
|
5
|
-
from functools import lru_cache
|
|
6
|
-
from typing import Callable, Optional
|
|
7
|
-
|
|
8
|
-
import pandas as pd
|
|
9
|
-
from class_resolver import FunctionResolver
|
|
10
|
-
from tqdm.auto import tqdm
|
|
11
|
-
|
|
12
|
-
__all__ = [
|
|
13
|
-
"iter_xref_plugins",
|
|
14
|
-
"has_xref_plugin",
|
|
15
|
-
"run_xref_plugin",
|
|
16
|
-
"iter_xref_plugins",
|
|
17
|
-
]
|
|
18
|
-
|
|
19
|
-
logger = logging.getLogger(__name__)
|
|
20
|
-
|
|
21
|
-
XrefGetter = Callable[[], pd.DataFrame]
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
@lru_cache
|
|
25
|
-
def _get_xref_plugins() -> Mapping[str, XrefGetter]:
|
|
26
|
-
resolver: FunctionResolver[XrefGetter] = FunctionResolver.from_entrypoint("pyobo.xrefs")
|
|
27
|
-
return resolver.lookup_dict
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def has_xref_plugin(prefix: str) -> bool:
|
|
31
|
-
"""Check if there's a plugin for converting the prefix."""
|
|
32
|
-
return prefix in _get_xref_plugins()
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def run_xref_plugin(prefix: str) -> pd.DataFrame:
|
|
36
|
-
"""Get a converted PyOBO source."""
|
|
37
|
-
rv = _get_xref_plugins()[prefix]()
|
|
38
|
-
|
|
39
|
-
if isinstance(rv, pd.DataFrame):
|
|
40
|
-
return rv
|
|
41
|
-
|
|
42
|
-
logger.warning("can not load %s since it yields many dataframes", prefix)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def iter_xref_plugins(
|
|
46
|
-
use_tqdm: bool = True, skip_below: Optional[str] = None
|
|
47
|
-
) -> Iterable[pd.DataFrame]:
|
|
48
|
-
"""Get all modules in the PyOBO sources."""
|
|
49
|
-
it = tqdm(sorted(_get_xref_plugins().items()), desc="Mapping Plugins", disable=not use_tqdm)
|
|
50
|
-
for prefix, get_df in it:
|
|
51
|
-
if skip_below and prefix < skip_below:
|
|
52
|
-
continue
|
|
53
|
-
it.set_postfix({"prefix": prefix})
|
|
54
|
-
rv = get_df()
|
|
55
|
-
if isinstance(rv, pd.DataFrame):
|
|
56
|
-
yield rv
|
|
57
|
-
elif isinstance(rv, Iterable):
|
|
58
|
-
yield from rv
|
|
59
|
-
else:
|
|
60
|
-
raise TypeError
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
"""Get the Biomappings manually curated equivalences."""
|
|
2
|
-
|
|
3
|
-
import pandas as pd
|
|
4
|
-
from pystow.utils import get_commit
|
|
5
|
-
|
|
6
|
-
from pyobo.constants import (
|
|
7
|
-
PROVENANCE,
|
|
8
|
-
SOURCE_ID,
|
|
9
|
-
SOURCE_PREFIX,
|
|
10
|
-
TARGET_ID,
|
|
11
|
-
TARGET_PREFIX,
|
|
12
|
-
XREF_COLUMNS,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
__all__ = [
|
|
16
|
-
"get_biomappings_df",
|
|
17
|
-
]
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def get_biomappings_df() -> pd.DataFrame:
|
|
21
|
-
"""Get biomappings equivalences."""
|
|
22
|
-
sha = get_commit("biopragmatics", "biomappings")
|
|
23
|
-
url = f"https://raw.githubusercontent.com/biopragmatics/biomappings/{sha}/src/biomappings/resources/mappings.tsv"
|
|
24
|
-
df = pd.read_csv(url, sep="\t")
|
|
25
|
-
df[PROVENANCE] = url
|
|
26
|
-
df.rename(
|
|
27
|
-
columns={
|
|
28
|
-
"source prefix": SOURCE_PREFIX,
|
|
29
|
-
"source identifier": SOURCE_ID,
|
|
30
|
-
"target prefix": TARGET_PREFIX,
|
|
31
|
-
"target identifier": TARGET_ID,
|
|
32
|
-
},
|
|
33
|
-
inplace=True,
|
|
34
|
-
)
|
|
35
|
-
df = df[XREF_COLUMNS]
|
|
36
|
-
return df
|
pyobo/xrefdb/sources/cbms2019.py
DELETED
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
"""Cross references from cbms2019.
|
|
2
|
-
|
|
3
|
-
.. seealso:: https://github.com/pantapps/cbms2019
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import pandas as pd
|
|
7
|
-
|
|
8
|
-
from pyobo.constants import (
|
|
9
|
-
PROVENANCE,
|
|
10
|
-
SOURCE_ID,
|
|
11
|
-
SOURCE_PREFIX,
|
|
12
|
-
TARGET_ID,
|
|
13
|
-
TARGET_PREFIX,
|
|
14
|
-
XREF_COLUMNS,
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
__all__ = [
|
|
18
|
-
"get_cbms2019_xrefs_df",
|
|
19
|
-
]
|
|
20
|
-
|
|
21
|
-
#: Columns: DOID, DO name, xref xb, xref ix
|
|
22
|
-
base_url = "https://raw.githubusercontent.com/pantapps/cbms2019/master"
|
|
23
|
-
doid_to_all = f"{base_url}/mesh_icd10cm_via_do_not_mapped_umls.tsv"
|
|
24
|
-
#: Columns: SNOMEDCT_ID, SNOMEDCIT_NAME, ICD10CM_ID, ICD10CM_NAME, MESH_ID
|
|
25
|
-
all_to_all = f"{base_url}/mesh_icd10cm_via_snomedct_not_mapped_umls.tsv"
|
|
26
|
-
#: Columns: DOID, DO name, xref xb, xref ix
|
|
27
|
-
doid_to_all_2 = f"{base_url}/mesh_snomedct_via_do_not_mapped_umls.tsv"
|
|
28
|
-
#: Columns: SNOMEDCT_ID, SNOMEDCIT_NAME, ICD10CM_ID, ICD10CM_NAME, MESH_ID
|
|
29
|
-
all_to_all_2 = f"{base_url}/mesh_snomedct_via_icd10cm_not_mapped_umls.tsv"
|
|
30
|
-
|
|
31
|
-
NSM = {
|
|
32
|
-
"MESH": "mesh",
|
|
33
|
-
"ICD10CM": "icd",
|
|
34
|
-
"SNOMEDCT_US_2016_03_01": "snomedct",
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def _get_doid(url: str) -> pd.DataFrame:
|
|
39
|
-
df = pd.read_csv(url, sep="\t", usecols=["DO_ID", "resource", "resource_ID"])
|
|
40
|
-
df.columns = [SOURCE_ID, TARGET_PREFIX, TARGET_ID]
|
|
41
|
-
|
|
42
|
-
df[SOURCE_PREFIX] = "doid"
|
|
43
|
-
df[SOURCE_ID] = df[SOURCE_ID].map(lambda s: s[len("DOID:") :])
|
|
44
|
-
df[PROVENANCE] = url
|
|
45
|
-
df[TARGET_PREFIX] = df[TARGET_PREFIX].map(NSM.get)
|
|
46
|
-
df = df[XREF_COLUMNS]
|
|
47
|
-
return df
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def _get_mesh_to_icd_via_doid() -> pd.DataFrame:
|
|
51
|
-
return _get_doid(doid_to_all)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def _get_mesh_to_icd_via_snomedct() -> pd.DataFrame:
|
|
55
|
-
df = pd.read_csv(all_to_all, sep="\t", usecols=["SNOMEDCT_ID", "ICD10CM_ID", "MESH_ID"])
|
|
56
|
-
rows = []
|
|
57
|
-
for snomedct_id, icd_id, mesh_id in df.values:
|
|
58
|
-
rows.append(("mesh", mesh_id, "snomedct", snomedct_id, all_to_all))
|
|
59
|
-
rows.append(("snomedct", snomedct_id, "icd", icd_id, all_to_all))
|
|
60
|
-
return pd.DataFrame(rows, columns=XREF_COLUMNS)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def _get_mesh_to_snomedct_via_doid() -> pd.DataFrame:
|
|
64
|
-
return _get_doid(doid_to_all_2)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def _get_mesh_to_snomedct_via_icd() -> pd.DataFrame:
|
|
68
|
-
df = pd.read_csv(
|
|
69
|
-
all_to_all_2,
|
|
70
|
-
sep="\t",
|
|
71
|
-
usecols=["SNOMEDCT_ID", "ICD10CM_ID", "MESH_ID"],
|
|
72
|
-
dtype={"SNOMEDCT_ID": float},
|
|
73
|
-
)
|
|
74
|
-
rows = []
|
|
75
|
-
for snomedct_id, icd_id, mesh_id in df.values:
|
|
76
|
-
snomedct_id = str(int(snomedct_id))
|
|
77
|
-
rows.append(("mesh", mesh_id, "icd", icd_id, all_to_all))
|
|
78
|
-
rows.append(("icd", icd_id, "snomedct", snomedct_id, all_to_all))
|
|
79
|
-
return pd.DataFrame(rows, columns=XREF_COLUMNS)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def get_cbms2019_xrefs_df() -> pd.DataFrame:
|
|
83
|
-
"""Get all CBMS2019 xrefs."""
|
|
84
|
-
return pd.concat(
|
|
85
|
-
[
|
|
86
|
-
_get_mesh_to_icd_via_doid(),
|
|
87
|
-
_get_mesh_to_icd_via_snomedct(),
|
|
88
|
-
_get_mesh_to_snomedct_via_doid(),
|
|
89
|
-
_get_mesh_to_snomedct_via_icd(),
|
|
90
|
-
]
|
|
91
|
-
).drop_duplicates()
|
pyobo/xrefdb/sources/chembl.py
DELETED
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
"""Get ChEMBL xrefs."""
|
|
2
|
-
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
|
-
import pandas as pd
|
|
6
|
-
|
|
7
|
-
from pyobo.api.utils import get_version
|
|
8
|
-
from pyobo.constants import (
|
|
9
|
-
PROVENANCE,
|
|
10
|
-
SOURCE_ID,
|
|
11
|
-
SOURCE_PREFIX,
|
|
12
|
-
TARGET_ID,
|
|
13
|
-
TARGET_PREFIX,
|
|
14
|
-
XREF_COLUMNS,
|
|
15
|
-
)
|
|
16
|
-
from pyobo.utils.path import ensure_df
|
|
17
|
-
|
|
18
|
-
CHEMBL_COMPOUND_PREFIX = "chembl.compound"
|
|
19
|
-
CHEMBL_TARGET_PREFIX = "chembl.target"
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def get_chembl_compound_equivalences_raw(
|
|
23
|
-
usecols=None, version: Optional[str] = None
|
|
24
|
-
) -> pd.DataFrame:
|
|
25
|
-
"""Get the chemical representations raw dataframe."""
|
|
26
|
-
if version is None:
|
|
27
|
-
version = get_version("chembl")
|
|
28
|
-
|
|
29
|
-
base_url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}"
|
|
30
|
-
url = f"{base_url}/chembl_{version}_chemreps.txt.gz"
|
|
31
|
-
return ensure_df(CHEMBL_COMPOUND_PREFIX, url=url, sep="\t", usecols=usecols)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def get_chembl_compound_equivalences(version: Optional[str] = None) -> pd.DataFrame:
|
|
35
|
-
"""Get ChEMBL chemical equivalences."""
|
|
36
|
-
if version is None:
|
|
37
|
-
version = get_version("chembl")
|
|
38
|
-
|
|
39
|
-
df = get_chembl_compound_equivalences_raw(version=version)
|
|
40
|
-
rows = []
|
|
41
|
-
for chembl, _smiles, _inchi, inchi_key in df.values:
|
|
42
|
-
rows.extend(
|
|
43
|
-
[
|
|
44
|
-
# No smiles/inchi since they can have variable length
|
|
45
|
-
# ("chembl.compound", chembl, "smiles", smiles, f"chembl{version}"),
|
|
46
|
-
# ("chembl.compound", chembl, "inchi", inchi, f"chembl{version}"),
|
|
47
|
-
("chembl.compound", chembl, "inchikey", inchi_key, f"chembl{version}"),
|
|
48
|
-
]
|
|
49
|
-
)
|
|
50
|
-
return pd.DataFrame(rows, columns=XREF_COLUMNS)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def get_chembl_protein_equivalences(version: Optional[str] = None) -> pd.DataFrame:
|
|
54
|
-
"""Get ChEMBL protein equivalences."""
|
|
55
|
-
if version is None:
|
|
56
|
-
version = get_version("chembl")
|
|
57
|
-
|
|
58
|
-
url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}/chembl_uniprot_mapping.txt"
|
|
59
|
-
df = ensure_df(
|
|
60
|
-
CHEMBL_TARGET_PREFIX,
|
|
61
|
-
url=url,
|
|
62
|
-
sep="\t",
|
|
63
|
-
usecols=[0, 1],
|
|
64
|
-
names=[TARGET_ID, SOURCE_ID], # switch around
|
|
65
|
-
)
|
|
66
|
-
df.loc[:, SOURCE_PREFIX] = "chembl.target"
|
|
67
|
-
df.loc[:, TARGET_PREFIX] = "uniprot"
|
|
68
|
-
df.loc[:, PROVENANCE] = f"chembl{version}"
|
|
69
|
-
df = df[XREF_COLUMNS]
|
|
70
|
-
return df
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def get_chembl_xrefs_df(version: Optional[str] = None) -> pd.DataFrame:
|
|
74
|
-
"""Get all ChEBML equivalences."""
|
|
75
|
-
if version is None:
|
|
76
|
-
version = get_version("chembl")
|
|
77
|
-
|
|
78
|
-
return pd.concat(
|
|
79
|
-
[
|
|
80
|
-
get_chembl_compound_equivalences(version=version),
|
|
81
|
-
get_chembl_protein_equivalences(version=version),
|
|
82
|
-
]
|
|
83
|
-
)
|