pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/xrefdb/sources/compath.py
DELETED
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
"""Import ComPath mappings between pathways."""
|
|
2
|
-
|
|
3
|
-
from collections.abc import Iterable
|
|
4
|
-
|
|
5
|
-
import pandas as pd
|
|
6
|
-
from pystow.utils import get_commit
|
|
7
|
-
|
|
8
|
-
from pyobo.constants import (
|
|
9
|
-
PROVENANCE,
|
|
10
|
-
SOURCE_ID,
|
|
11
|
-
SOURCE_PREFIX,
|
|
12
|
-
TARGET_ID,
|
|
13
|
-
TARGET_PREFIX,
|
|
14
|
-
XREF_COLUMNS,
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
__all__ = [
|
|
18
|
-
"iter_compath_dfs",
|
|
19
|
-
]
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def _get_df(name: str, *, sha: str, sep: str = ",") -> pd.DataFrame:
|
|
23
|
-
url = f"https://raw.githubusercontent.com/ComPath/compath-resources/{sha}/mappings/{name}"
|
|
24
|
-
df = pd.read_csv(
|
|
25
|
-
url,
|
|
26
|
-
sep=sep,
|
|
27
|
-
usecols=["Source Resource", "Source ID", "Mapping Type", "Target Resource", "Target ID"],
|
|
28
|
-
)
|
|
29
|
-
df.rename(
|
|
30
|
-
columns={
|
|
31
|
-
"Source Resource": SOURCE_PREFIX,
|
|
32
|
-
"Source ID": SOURCE_ID,
|
|
33
|
-
"Target Resource": TARGET_PREFIX,
|
|
34
|
-
"Target ID": TARGET_ID,
|
|
35
|
-
},
|
|
36
|
-
inplace=True,
|
|
37
|
-
)
|
|
38
|
-
df = df[df["Mapping Type"] == "equivalentTo"]
|
|
39
|
-
del df["Mapping Type"]
|
|
40
|
-
df[PROVENANCE] = url
|
|
41
|
-
df = df[XREF_COLUMNS]
|
|
42
|
-
|
|
43
|
-
df[SOURCE_PREFIX] = df[SOURCE_PREFIX].map(_fix_kegg_prefix)
|
|
44
|
-
df[TARGET_PREFIX] = df[TARGET_PREFIX].map(_fix_kegg_prefix)
|
|
45
|
-
df[SOURCE_ID] = [
|
|
46
|
-
_fix_kegg_identifier(prefix, identifier)
|
|
47
|
-
for prefix, identifier in df[[SOURCE_PREFIX, SOURCE_ID]].values
|
|
48
|
-
]
|
|
49
|
-
df[TARGET_ID] = [
|
|
50
|
-
_fix_kegg_identifier(prefix, identifier)
|
|
51
|
-
for prefix, identifier in df[[TARGET_PREFIX, TARGET_ID]].values
|
|
52
|
-
]
|
|
53
|
-
|
|
54
|
-
return df
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def _fix_kegg_identifier(prefix, identifier) -> str:
|
|
58
|
-
if prefix == "kegg.pathway":
|
|
59
|
-
return identifier[len("path:") :]
|
|
60
|
-
return identifier
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def _fix_kegg_prefix(s):
|
|
64
|
-
return s if s != "kegg" else "kegg.pathway"
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def iter_compath_dfs() -> Iterable[pd.DataFrame]:
|
|
68
|
-
"""Iterate over all ComPath mappings."""
|
|
69
|
-
sha = get_commit("ComPath", "compath-resources")
|
|
70
|
-
|
|
71
|
-
yield _get_df("kegg_reactome.csv", sha=sha)
|
|
72
|
-
yield _get_df("kegg_wikipathways.csv", sha=sha)
|
|
73
|
-
yield _get_df("pathbank_kegg.csv", sha=sha)
|
|
74
|
-
yield _get_df("pathbank_reactome.csv", sha=sha)
|
|
75
|
-
yield _get_df("pathbank_wikipathways.csv", sha=sha)
|
|
76
|
-
yield _get_df("special_mappings.csv", sha=sha)
|
|
77
|
-
yield _get_df("wikipathways_reactome.csv", sha=sha)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def get_compath_xrefs_df() -> pd.DataFrame:
|
|
81
|
-
"""Iterate over all ComPath mappings."""
|
|
82
|
-
return pd.concat(iter_compath_dfs())
|
pyobo/xrefdb/sources/famplex.py
DELETED
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
"""Get FamPlex xrefs."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
from collections.abc import Mapping
|
|
5
|
-
from functools import lru_cache
|
|
6
|
-
|
|
7
|
-
import bioregistry
|
|
8
|
-
import pandas as pd
|
|
9
|
-
|
|
10
|
-
from ...constants import (
|
|
11
|
-
PROVENANCE,
|
|
12
|
-
SOURCE_ID,
|
|
13
|
-
SOURCE_PREFIX,
|
|
14
|
-
TARGET_ID,
|
|
15
|
-
TARGET_PREFIX,
|
|
16
|
-
XREF_COLUMNS,
|
|
17
|
-
)
|
|
18
|
-
from ...utils.path import ensure_df
|
|
19
|
-
|
|
20
|
-
__all__ = [
|
|
21
|
-
"get_famplex_xrefs_df",
|
|
22
|
-
]
|
|
23
|
-
|
|
24
|
-
logger = logging.getLogger(__name__)
|
|
25
|
-
|
|
26
|
-
URL = "https://github.com/sorgerlab/famplex/raw/master/equivalences.csv"
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def _get_famplex_df(force: bool = False) -> pd.DataFrame:
|
|
30
|
-
return ensure_df(
|
|
31
|
-
prefix="fplx",
|
|
32
|
-
url=URL,
|
|
33
|
-
force=force,
|
|
34
|
-
header=None,
|
|
35
|
-
names=[TARGET_PREFIX, TARGET_ID, SOURCE_ID],
|
|
36
|
-
sep=",",
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def get_famplex_xrefs_df(force: bool = False) -> pd.DataFrame:
|
|
41
|
-
"""Get xrefs from FamPlex."""
|
|
42
|
-
df = _get_famplex_df(force=force)
|
|
43
|
-
df[TARGET_PREFIX] = df[TARGET_PREFIX].map(bioregistry.normalize_prefix)
|
|
44
|
-
df = df[df[TARGET_PREFIX].notna()]
|
|
45
|
-
df[SOURCE_PREFIX] = "fplx"
|
|
46
|
-
df[PROVENANCE] = "https://github.com/sorgerlab/famplex/raw/master/equivalences.csv"
|
|
47
|
-
df = df[XREF_COLUMNS]
|
|
48
|
-
return df
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
@lru_cache
|
|
52
|
-
def get_remapping(force: bool = False) -> Mapping[tuple[str, str], tuple[str, str, str]]:
|
|
53
|
-
"""Get a mapping from database/identifier pairs to famplex identifiers."""
|
|
54
|
-
df = _get_famplex_df(force=force)
|
|
55
|
-
rv = {}
|
|
56
|
-
for target_ns, target_id, source_id in df.values:
|
|
57
|
-
if target_ns.lower() == "medscan":
|
|
58
|
-
continue # MEDSCAN is proprietary and Ben said to skip using these identifiers
|
|
59
|
-
remapped_prefix = bioregistry.normalize_prefix(target_ns)
|
|
60
|
-
if remapped_prefix is None:
|
|
61
|
-
logger.warning("could not remap %s", target_ns)
|
|
62
|
-
else:
|
|
63
|
-
rv[remapped_prefix, target_id] = "fplx", source_id, source_id
|
|
64
|
-
return rv
|
pyobo/xrefdb/sources/gilda.py
DELETED
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
"""Cross references from Gilda.
|
|
2
|
-
|
|
3
|
-
.. seealso:: https://github.com/indralabs/gilda
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import bioregistry
|
|
7
|
-
import pandas as pd
|
|
8
|
-
|
|
9
|
-
from pyobo.constants import (
|
|
10
|
-
PROVENANCE,
|
|
11
|
-
SOURCE_ID,
|
|
12
|
-
SOURCE_PREFIX,
|
|
13
|
-
TARGET_ID,
|
|
14
|
-
TARGET_PREFIX,
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
__all__ = [
|
|
18
|
-
"get_gilda_xrefs_df",
|
|
19
|
-
]
|
|
20
|
-
|
|
21
|
-
GILDA_MAPPINGS = (
|
|
22
|
-
"https://raw.githubusercontent.com/indralab/gilda/master/gilda/resources/mesh_mappings.tsv"
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def get_gilda_xrefs_df() -> pd.DataFrame:
|
|
27
|
-
"""Get xrefs from Gilda."""
|
|
28
|
-
df = pd.read_csv(
|
|
29
|
-
GILDA_MAPPINGS,
|
|
30
|
-
sep="\t",
|
|
31
|
-
header=None,
|
|
32
|
-
usecols=[0, 1, 3, 4],
|
|
33
|
-
names=[SOURCE_PREFIX, SOURCE_ID, TARGET_PREFIX, TARGET_ID],
|
|
34
|
-
)
|
|
35
|
-
df[PROVENANCE] = GILDA_MAPPINGS
|
|
36
|
-
|
|
37
|
-
for k in SOURCE_PREFIX, TARGET_PREFIX:
|
|
38
|
-
df[k] = df[k].map(bioregistry.normalize_prefix)
|
|
39
|
-
|
|
40
|
-
for k in SOURCE_ID, TARGET_ID:
|
|
41
|
-
df[k] = df[k].map(_fix_gogo)
|
|
42
|
-
|
|
43
|
-
return df
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def _fix_gogo(s):
|
|
47
|
-
for prefix in ("CHEBI:", "DOID:", "HP:", "GO:"):
|
|
48
|
-
if s.startswith(prefix):
|
|
49
|
-
return s[len(prefix) :]
|
|
50
|
-
return s
|
pyobo/xrefdb/sources/intact.py
DELETED
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
"""Get the xrefs from IntAct."""
|
|
2
|
-
|
|
3
|
-
from collections.abc import Mapping
|
|
4
|
-
|
|
5
|
-
import pandas as pd
|
|
6
|
-
|
|
7
|
-
from pyobo.api.utils import get_version
|
|
8
|
-
from pyobo.constants import PROVENANCE, SOURCE_PREFIX, TARGET_PREFIX, XREF_COLUMNS
|
|
9
|
-
from pyobo.utils.cache import cached_mapping
|
|
10
|
-
from pyobo.utils.path import prefix_cache_join
|
|
11
|
-
|
|
12
|
-
__all__ = [
|
|
13
|
-
"COMPLEXPORTAL_MAPPINGS",
|
|
14
|
-
"get_intact_complex_portal_xrefs_df",
|
|
15
|
-
"get_complexportal_mapping",
|
|
16
|
-
"get_intact_reactome_xrefs_df",
|
|
17
|
-
"get_reactome_mapping",
|
|
18
|
-
]
|
|
19
|
-
|
|
20
|
-
COMPLEXPORTAL_MAPPINGS = (
|
|
21
|
-
"ftp://ftp.ebi.ac.uk/pub/databases/intact/current/various/cpx_ebi_ac_translation.txt"
|
|
22
|
-
)
|
|
23
|
-
REACTOME_MAPPINGS = "ftp://ftp.ebi.ac.uk/pub/databases/intact/current/various/reactome.dat"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def _get_complexportal_df():
|
|
27
|
-
return pd.read_csv(
|
|
28
|
-
COMPLEXPORTAL_MAPPINGS, sep="\t", header=None, names=["source_id", "target_id"]
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def get_intact_complex_portal_xrefs_df() -> pd.DataFrame:
|
|
33
|
-
"""Get IntAct-Complex Portal xrefs."""
|
|
34
|
-
df = _get_complexportal_df()
|
|
35
|
-
df[SOURCE_PREFIX] = "intact"
|
|
36
|
-
df[TARGET_PREFIX] = "complexportal"
|
|
37
|
-
df[PROVENANCE] = COMPLEXPORTAL_MAPPINGS
|
|
38
|
-
df = df[XREF_COLUMNS]
|
|
39
|
-
return df
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def get_complexportal_mapping() -> Mapping[str, str]:
|
|
43
|
-
"""Get IntAct to Complex Portal mapping.
|
|
44
|
-
|
|
45
|
-
Is basically equivalent to:
|
|
46
|
-
|
|
47
|
-
.. code-block:: python
|
|
48
|
-
|
|
49
|
-
from pyobo import get_filtered_xrefs
|
|
50
|
-
|
|
51
|
-
intact_complexportal_mapping = get_filtered_xrefs("intact", "complexportal")
|
|
52
|
-
"""
|
|
53
|
-
|
|
54
|
-
@cached_mapping(
|
|
55
|
-
path=prefix_cache_join(
|
|
56
|
-
"intact", "xrefs", name="complexportal.tsv", version=get_version("intact")
|
|
57
|
-
),
|
|
58
|
-
header=["intact_id", "complexportal_id"],
|
|
59
|
-
)
|
|
60
|
-
def _cache():
|
|
61
|
-
df = _get_complexportal_df()
|
|
62
|
-
return dict(df.values)
|
|
63
|
-
|
|
64
|
-
return _cache()
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def _get_reactome_df():
|
|
68
|
-
return pd.read_csv(REACTOME_MAPPINGS, sep="\t", header=None, names=["source_id", "target_id"])
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def get_intact_reactome_xrefs_df() -> pd.DataFrame:
|
|
72
|
-
"""Get IntAct-Reactome xrefs."""
|
|
73
|
-
df = _get_reactome_df()
|
|
74
|
-
df[SOURCE_PREFIX] = "intact"
|
|
75
|
-
df[TARGET_PREFIX] = "reactome"
|
|
76
|
-
df[PROVENANCE] = REACTOME_MAPPINGS
|
|
77
|
-
df = df[XREF_COLUMNS]
|
|
78
|
-
return df
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def get_reactome_mapping() -> Mapping[str, str]:
|
|
82
|
-
"""Get IntAct to Reactome mapping.
|
|
83
|
-
|
|
84
|
-
Is basically equivalent to:
|
|
85
|
-
|
|
86
|
-
.. code-block:: python
|
|
87
|
-
|
|
88
|
-
from pyobo import get_filtered_xrefs
|
|
89
|
-
|
|
90
|
-
intact_complexportal_mapping = get_filtered_xrefs("intact", "reactome")
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
@cached_mapping(
|
|
94
|
-
path=prefix_cache_join(
|
|
95
|
-
"intact", "xrefs", name="reactome.tsv", version=get_version("intact")
|
|
96
|
-
),
|
|
97
|
-
header=["intact_id", "reactome_id"],
|
|
98
|
-
)
|
|
99
|
-
def _cache():
|
|
100
|
-
df = _get_complexportal_df()
|
|
101
|
-
return dict(df.values)
|
|
102
|
-
|
|
103
|
-
return _cache()
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def get_xrefs_df() -> pd.DataFrame:
|
|
107
|
-
"""Get IntAct xrefs."""
|
|
108
|
-
return pd.concat(
|
|
109
|
-
[
|
|
110
|
-
get_intact_complex_portal_xrefs_df(),
|
|
111
|
-
get_intact_reactome_xrefs_df(),
|
|
112
|
-
]
|
|
113
|
-
)
|
pyobo/xrefdb/sources/ncit.py
DELETED
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
"""Import NCIT mappings."""
|
|
2
|
-
|
|
3
|
-
from collections.abc import Iterable
|
|
4
|
-
|
|
5
|
-
import pandas as pd
|
|
6
|
-
|
|
7
|
-
from ...constants import (
|
|
8
|
-
PROVENANCE,
|
|
9
|
-
SOURCE_ID,
|
|
10
|
-
SOURCE_PREFIX,
|
|
11
|
-
TARGET_ID,
|
|
12
|
-
TARGET_PREFIX,
|
|
13
|
-
XREF_COLUMNS,
|
|
14
|
-
)
|
|
15
|
-
from ...utils.path import ensure_df
|
|
16
|
-
|
|
17
|
-
__all__ = [
|
|
18
|
-
"iter_ncit_dfs",
|
|
19
|
-
"get_ncit_go_df",
|
|
20
|
-
"get_ncit_chebi_df",
|
|
21
|
-
"get_ncit_hgnc_df",
|
|
22
|
-
"get_ncit_uniprot_df",
|
|
23
|
-
]
|
|
24
|
-
|
|
25
|
-
PREFIX = "ncit"
|
|
26
|
-
|
|
27
|
-
HGNC_MAPPINGS_URL = (
|
|
28
|
-
"https://ncit.nci.nih.gov/ncitbrowser/ajax?action="
|
|
29
|
-
+ "export_mapping&dictionary=NCIt_to_HGNC_Mapping&version=1.0"
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
GO_MAPPINGS_URL = (
|
|
33
|
-
"https://ncit.nci.nih.gov/ncitbrowser/ajax?action="
|
|
34
|
-
+ "export_mapping&dictionary=GO_to_NCIt_Mapping&version=1.1"
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
CHEBI_MAPPINGS_URL = (
|
|
38
|
-
"https://ncit.nci.nih.gov/ncitbrowser/ajax?action="
|
|
39
|
-
+ "export_mapping&dictionary=NCIt_to_ChEBI_Mapping&version=1.0"
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
# url_swissprot = 'https://ncit.nci.nih.gov/ncitbrowser/ajax?action=' \
|
|
43
|
-
# 'export_mapping&uri=https://evs.nci.nih.gov/ftp1/' \
|
|
44
|
-
# 'NCI_Thesaurus/Mappings/NCIt-SwissProt_Mapping.txt'
|
|
45
|
-
|
|
46
|
-
UNIPROT_MAPPINGS_URL = (
|
|
47
|
-
"https://evs.nci.nih.gov/ftp1/NCI_Thesaurus/Mappings/NCIt-SwissProt_Mapping.txt"
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def get_ncit_xrefs_df() -> pd.DataFrame:
|
|
52
|
-
"""Get all NCIT mappings in a single dataframe."""
|
|
53
|
-
return pd.concat(iter_ncit_dfs())
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def iter_ncit_dfs() -> Iterable[pd.DataFrame]:
|
|
57
|
-
"""Iterate all NCIT mappings dataframes."""
|
|
58
|
-
yield get_ncit_hgnc_df()
|
|
59
|
-
yield get_ncit_chebi_df()
|
|
60
|
-
yield get_ncit_uniprot_df()
|
|
61
|
-
yield get_ncit_go_df()
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def get_ncit_hgnc_df() -> pd.DataFrame:
|
|
65
|
-
"""Get NCIT-HGNC mappings.
|
|
66
|
-
|
|
67
|
-
In this file, the only association type was mapsTo.
|
|
68
|
-
"""
|
|
69
|
-
df = ensure_df(
|
|
70
|
-
PREFIX,
|
|
71
|
-
url=HGNC_MAPPINGS_URL,
|
|
72
|
-
name="ncit_hgnc.csv",
|
|
73
|
-
sep=",",
|
|
74
|
-
usecols=["Source Code", "Target Code"],
|
|
75
|
-
)
|
|
76
|
-
df.rename(columns={"Source Code": SOURCE_ID, "Target Code": TARGET_ID}, inplace=True)
|
|
77
|
-
df[TARGET_ID] = df[TARGET_ID].map(lambda s: s[len("HGNC:") :])
|
|
78
|
-
df.dropna(inplace=True)
|
|
79
|
-
|
|
80
|
-
df[SOURCE_PREFIX] = "ncit"
|
|
81
|
-
df[TARGET_PREFIX] = "hgnc"
|
|
82
|
-
df[PROVENANCE] = HGNC_MAPPINGS_URL
|
|
83
|
-
df = df[XREF_COLUMNS]
|
|
84
|
-
return df
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def get_ncit_go_df() -> pd.DataFrame:
|
|
88
|
-
"""Get NCIT-GO mappings.
|
|
89
|
-
|
|
90
|
-
In this file, the only association type was mapsTo.
|
|
91
|
-
"""
|
|
92
|
-
df = ensure_df(PREFIX, url=GO_MAPPINGS_URL, name="ncit_go.csv", sep=",")
|
|
93
|
-
# The data is flipped here
|
|
94
|
-
df.rename(columns={"Source Code": TARGET_ID, "Target Code": SOURCE_ID}, inplace=True)
|
|
95
|
-
df[TARGET_ID] = df[TARGET_ID].map(lambda s: s[len("GO:")])
|
|
96
|
-
df.dropna(inplace=True)
|
|
97
|
-
|
|
98
|
-
df[SOURCE_PREFIX] = "ncit"
|
|
99
|
-
df[TARGET_PREFIX] = "go"
|
|
100
|
-
df[PROVENANCE] = GO_MAPPINGS_URL
|
|
101
|
-
df = df[XREF_COLUMNS]
|
|
102
|
-
return df
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def get_ncit_chebi_df() -> pd.DataFrame:
|
|
106
|
-
"""Get NCIT-ChEBI mappings.
|
|
107
|
-
|
|
108
|
-
In this file, the only association type was mapsTo.
|
|
109
|
-
"""
|
|
110
|
-
df = ensure_df(PREFIX, url=CHEBI_MAPPINGS_URL, name="ncit_chebi.csv", sep=",")
|
|
111
|
-
df.rename(columns={"Source Code": SOURCE_ID, "Target Code": TARGET_ID}, inplace=True)
|
|
112
|
-
df[TARGET_ID] = df[TARGET_ID].map(lambda s: s[len("CHEBI:")])
|
|
113
|
-
df.dropna(inplace=True)
|
|
114
|
-
|
|
115
|
-
df[SOURCE_PREFIX] = "ncit"
|
|
116
|
-
df[TARGET_PREFIX] = "chebi"
|
|
117
|
-
df[PROVENANCE] = CHEBI_MAPPINGS_URL
|
|
118
|
-
df = df[XREF_COLUMNS]
|
|
119
|
-
return df
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
def get_ncit_uniprot_df() -> pd.DataFrame:
|
|
123
|
-
"""Get NCIT-UniProt mappings.
|
|
124
|
-
|
|
125
|
-
In this file, the only association type was mapsTo.
|
|
126
|
-
"""
|
|
127
|
-
df = ensure_df(PREFIX, url=UNIPROT_MAPPINGS_URL, name="ncit_uniprot.csv")
|
|
128
|
-
df.rename(columns={"NCIt Code": SOURCE_ID, "SwissProt ID": TARGET_ID}, inplace=True)
|
|
129
|
-
df[SOURCE_PREFIX] = "ncit"
|
|
130
|
-
df[TARGET_PREFIX] = "uniprot"
|
|
131
|
-
df[PROVENANCE] = UNIPROT_MAPPINGS_URL
|
|
132
|
-
df = df[XREF_COLUMNS]
|
|
133
|
-
return df
|
pyobo/xrefdb/sources/pubchem.py
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
"""Get xrefs from PubChem Compound to MeSH."""
|
|
2
|
-
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
|
-
import pandas as pd
|
|
6
|
-
|
|
7
|
-
from ...api.utils import safe_get_version
|
|
8
|
-
from ...constants import XREF_COLUMNS
|
|
9
|
-
from ...sources.pubchem import _get_pubchem_extras_url, get_pubchem_id_to_mesh_id
|
|
10
|
-
|
|
11
|
-
__all__ = [
|
|
12
|
-
"get_pubchem_mesh_df",
|
|
13
|
-
]
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def get_pubchem_mesh_df(version: Optional[str] = None) -> pd.DataFrame:
|
|
17
|
-
"""Get PubChem Compound-MeSH xrefs."""
|
|
18
|
-
if version is None:
|
|
19
|
-
version = safe_get_version("pubchem")
|
|
20
|
-
cid_mesh_url = _get_pubchem_extras_url(version, "CID-MeSH")
|
|
21
|
-
return pd.DataFrame(
|
|
22
|
-
[
|
|
23
|
-
("pubchem.compound", k, "mesh", v, cid_mesh_url)
|
|
24
|
-
for k, v in get_pubchem_id_to_mesh_id(version=version).items()
|
|
25
|
-
],
|
|
26
|
-
columns=XREF_COLUMNS,
|
|
27
|
-
)
|
pyobo/xrefdb/sources/wikidata.py
DELETED
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
"""Get Wikidata xrefs.
|
|
2
|
-
|
|
3
|
-
Run with ``python -m pyobo.xrefdb.sources.wikidata``.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import json
|
|
7
|
-
import logging
|
|
8
|
-
from collections.abc import Iterable
|
|
9
|
-
|
|
10
|
-
import bioregistry
|
|
11
|
-
import click
|
|
12
|
-
import pandas as pd
|
|
13
|
-
import requests
|
|
14
|
-
from more_click import verbose_option
|
|
15
|
-
from tqdm.auto import tqdm
|
|
16
|
-
|
|
17
|
-
from ...constants import RAW_MODULE, XREF_COLUMNS
|
|
18
|
-
from ...version import get_version
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
#: WikiData SPARQL endpoint. See https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service#Interfacing
|
|
23
|
-
URL = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"
|
|
24
|
-
|
|
25
|
-
WIKIDATA_MAPPING_DIRECTORY = RAW_MODULE.module("wikidata", "mappings")
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def get_wikidata_xrefs_df(*, use_tqdm: bool = True) -> pd.DataFrame:
|
|
29
|
-
"""Get all Wikidata xrefs."""
|
|
30
|
-
return pd.concat(iterate_wikidata_dfs(use_tqdm=use_tqdm))
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def iterate_wikidata_dfs(*, use_tqdm: bool = True) -> Iterable[pd.DataFrame]:
|
|
34
|
-
"""Iterate over WikiData xref dataframes."""
|
|
35
|
-
wikidata_properties = {
|
|
36
|
-
prefix: entry.wikidata["prefix"]
|
|
37
|
-
for prefix, entry in bioregistry.read_registry().items()
|
|
38
|
-
if entry.wikidata and "prefix" in entry.wikidata
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
it = tqdm(sorted(wikidata_properties.items()), disable=not use_tqdm, desc="Wikidata properties")
|
|
42
|
-
for prefix, wikidata_property in it:
|
|
43
|
-
if prefix in {"pubmed", "pmc", "orcid", "inchi", "smiles"}:
|
|
44
|
-
continue # too many
|
|
45
|
-
it.set_postfix({"prefix": prefix})
|
|
46
|
-
try:
|
|
47
|
-
yield get_wikidata_df(prefix, wikidata_property)
|
|
48
|
-
except json.decoder.JSONDecodeError as e:
|
|
49
|
-
logger.warning(
|
|
50
|
-
"[%s] Problem decoding results from %s: %s", prefix, wikidata_property, e
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def get_wikidata_df(prefix: str, wikidata_property: str) -> pd.DataFrame:
|
|
55
|
-
"""Get Wikidata xrefs."""
|
|
56
|
-
df = pd.DataFrame(
|
|
57
|
-
[
|
|
58
|
-
("wikidata", wikidata_id, prefix, external_id, "wikidata")
|
|
59
|
-
for wikidata_id, external_id in iter_wikidata_mappings(wikidata_property)
|
|
60
|
-
],
|
|
61
|
-
columns=XREF_COLUMNS,
|
|
62
|
-
)
|
|
63
|
-
logger.debug("got wikidata (%s; %s): %d rows", prefix, wikidata_property, len(df.index))
|
|
64
|
-
return df
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def iter_wikidata_mappings(
|
|
68
|
-
wikidata_property: str, *, cache: bool = True
|
|
69
|
-
) -> Iterable[tuple[str, str]]:
|
|
70
|
-
"""Iterate over Wikidata xrefs."""
|
|
71
|
-
path = WIKIDATA_MAPPING_DIRECTORY.join(name=f"{wikidata_property}.json")
|
|
72
|
-
if path.exists() and cache:
|
|
73
|
-
with path.open() as file:
|
|
74
|
-
rows = json.load(file)
|
|
75
|
-
else:
|
|
76
|
-
query = f"SELECT ?wikidata_id ?id WHERE {{?wikidata_id wdt:{wikidata_property} ?id}}"
|
|
77
|
-
rows = _run_query(query)
|
|
78
|
-
with path.open("w") as file:
|
|
79
|
-
json.dump(rows, file, indent=2)
|
|
80
|
-
|
|
81
|
-
for row in rows:
|
|
82
|
-
wikidata_id = _removeprefix(row["wikidata_id"]["value"], "http://www.wikidata.org/entity/")
|
|
83
|
-
wikidata_id = _removeprefix(wikidata_id, "http://wikidata.org/entity/")
|
|
84
|
-
entity_id = row["id"]["value"]
|
|
85
|
-
yield wikidata_id, entity_id
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def _removeprefix(s, prefix):
|
|
89
|
-
if s.startswith(prefix):
|
|
90
|
-
return s[len(prefix) :]
|
|
91
|
-
return s
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
HEADERS = {
|
|
95
|
-
"User-Agent": f"pyobo/{get_version()}",
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def _run_query(query, base: str = URL):
|
|
100
|
-
logger.debug("running query: %s", query)
|
|
101
|
-
res = requests.get(base, params={"query": query, "format": "json"}, headers=HEADERS)
|
|
102
|
-
res.raise_for_status()
|
|
103
|
-
res_json = res.json()
|
|
104
|
-
return res_json["results"]["bindings"]
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
@click.command()
|
|
108
|
-
@verbose_option
|
|
109
|
-
def _main():
|
|
110
|
-
"""Summarize xrefs."""
|
|
111
|
-
for _ in iterate_wikidata_dfs():
|
|
112
|
-
pass
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
if __name__ == "__main__":
|
|
116
|
-
_main()
|