pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. pyobo/.DS_Store +0 -0
  2. pyobo/__init__.py +95 -20
  3. pyobo/__main__.py +0 -0
  4. pyobo/api/__init__.py +81 -10
  5. pyobo/api/alts.py +52 -42
  6. pyobo/api/combine.py +39 -0
  7. pyobo/api/edges.py +68 -0
  8. pyobo/api/hierarchy.py +231 -203
  9. pyobo/api/metadata.py +14 -19
  10. pyobo/api/names.py +207 -127
  11. pyobo/api/properties.py +117 -113
  12. pyobo/api/relations.py +68 -94
  13. pyobo/api/species.py +24 -21
  14. pyobo/api/typedefs.py +11 -11
  15. pyobo/api/utils.py +66 -13
  16. pyobo/api/xrefs.py +108 -114
  17. pyobo/cli/__init__.py +0 -0
  18. pyobo/cli/cli.py +35 -50
  19. pyobo/cli/database.py +183 -161
  20. pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
  21. pyobo/cli/lookup.py +163 -195
  22. pyobo/cli/utils.py +19 -6
  23. pyobo/constants.py +102 -3
  24. pyobo/getters.py +196 -118
  25. pyobo/gilda_utils.py +79 -200
  26. pyobo/identifier_utils/__init__.py +41 -0
  27. pyobo/identifier_utils/api.py +296 -0
  28. pyobo/identifier_utils/model.py +130 -0
  29. pyobo/identifier_utils/preprocessing.json +812 -0
  30. pyobo/identifier_utils/preprocessing.py +61 -0
  31. pyobo/identifier_utils/relations/__init__.py +8 -0
  32. pyobo/identifier_utils/relations/api.py +162 -0
  33. pyobo/identifier_utils/relations/data.json +5824 -0
  34. pyobo/identifier_utils/relations/data_owl.json +57 -0
  35. pyobo/identifier_utils/relations/data_rdf.json +1 -0
  36. pyobo/identifier_utils/relations/data_rdfs.json +7 -0
  37. pyobo/mocks.py +9 -6
  38. pyobo/ner/__init__.py +9 -0
  39. pyobo/ner/api.py +72 -0
  40. pyobo/ner/normalizer.py +33 -0
  41. pyobo/obographs.py +43 -39
  42. pyobo/plugins.py +5 -4
  43. pyobo/py.typed +0 -0
  44. pyobo/reader.py +1358 -395
  45. pyobo/reader_utils.py +155 -0
  46. pyobo/resource_utils.py +42 -22
  47. pyobo/resources/__init__.py +0 -0
  48. pyobo/resources/goc.py +75 -0
  49. pyobo/resources/goc.tsv +188 -0
  50. pyobo/resources/ncbitaxon.py +4 -5
  51. pyobo/resources/ncbitaxon.tsv.gz +0 -0
  52. pyobo/resources/ro.py +3 -2
  53. pyobo/resources/ro.tsv +0 -0
  54. pyobo/resources/so.py +0 -0
  55. pyobo/resources/so.tsv +0 -0
  56. pyobo/sources/README.md +12 -8
  57. pyobo/sources/__init__.py +52 -29
  58. pyobo/sources/agrovoc.py +0 -0
  59. pyobo/sources/antibodyregistry.py +11 -12
  60. pyobo/sources/bigg/__init__.py +13 -0
  61. pyobo/sources/bigg/bigg_compartment.py +81 -0
  62. pyobo/sources/bigg/bigg_metabolite.py +229 -0
  63. pyobo/sources/bigg/bigg_model.py +46 -0
  64. pyobo/sources/bigg/bigg_reaction.py +77 -0
  65. pyobo/sources/biogrid.py +1 -2
  66. pyobo/sources/ccle.py +7 -12
  67. pyobo/sources/cgnc.py +0 -5
  68. pyobo/sources/chebi.py +1 -1
  69. pyobo/sources/chembl/__init__.py +9 -0
  70. pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
  71. pyobo/sources/chembl/chembl_target.py +160 -0
  72. pyobo/sources/civic_gene.py +55 -15
  73. pyobo/sources/clinicaltrials.py +160 -0
  74. pyobo/sources/complexportal.py +24 -24
  75. pyobo/sources/conso.py +14 -22
  76. pyobo/sources/cpt.py +0 -0
  77. pyobo/sources/credit.py +1 -9
  78. pyobo/sources/cvx.py +27 -5
  79. pyobo/sources/depmap.py +9 -12
  80. pyobo/sources/dictybase_gene.py +2 -7
  81. pyobo/sources/drugbank/__init__.py +9 -0
  82. pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
  83. pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
  84. pyobo/sources/drugcentral.py +17 -13
  85. pyobo/sources/expasy.py +31 -34
  86. pyobo/sources/famplex.py +13 -18
  87. pyobo/sources/flybase.py +3 -8
  88. pyobo/sources/gard.py +62 -0
  89. pyobo/sources/geonames/__init__.py +9 -0
  90. pyobo/sources/geonames/features.py +28 -0
  91. pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
  92. pyobo/sources/geonames/utils.py +115 -0
  93. pyobo/sources/gmt_utils.py +6 -7
  94. pyobo/sources/go.py +20 -13
  95. pyobo/sources/gtdb.py +154 -0
  96. pyobo/sources/gwascentral/__init__.py +9 -0
  97. pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
  98. pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
  99. pyobo/sources/hgnc/__init__.py +9 -0
  100. pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
  101. pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
  102. pyobo/sources/icd/__init__.py +9 -0
  103. pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
  104. pyobo/sources/icd/icd11.py +148 -0
  105. pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
  106. pyobo/sources/interpro.py +4 -9
  107. pyobo/sources/itis.py +0 -5
  108. pyobo/sources/kegg/__init__.py +0 -0
  109. pyobo/sources/kegg/api.py +16 -38
  110. pyobo/sources/kegg/genes.py +9 -20
  111. pyobo/sources/kegg/genome.py +1 -7
  112. pyobo/sources/kegg/pathway.py +9 -21
  113. pyobo/sources/mesh.py +58 -24
  114. pyobo/sources/mgi.py +3 -10
  115. pyobo/sources/mirbase/__init__.py +11 -0
  116. pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
  117. pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
  118. pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
  119. pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
  120. pyobo/sources/msigdb.py +74 -39
  121. pyobo/sources/ncbi/__init__.py +9 -0
  122. pyobo/sources/ncbi/ncbi_gc.py +162 -0
  123. pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
  124. pyobo/sources/nih_reporter.py +60 -0
  125. pyobo/sources/nlm/__init__.py +9 -0
  126. pyobo/sources/nlm/nlm_catalog.py +48 -0
  127. pyobo/sources/nlm/nlm_publisher.py +36 -0
  128. pyobo/sources/nlm/utils.py +116 -0
  129. pyobo/sources/npass.py +6 -8
  130. pyobo/sources/omim_ps.py +10 -3
  131. pyobo/sources/pathbank.py +4 -8
  132. pyobo/sources/pfam/__init__.py +9 -0
  133. pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
  134. pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
  135. pyobo/sources/pharmgkb/__init__.py +15 -0
  136. pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
  137. pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
  138. pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
  139. pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
  140. pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
  141. pyobo/sources/pharmgkb/utils.py +86 -0
  142. pyobo/sources/pid.py +1 -6
  143. pyobo/sources/pombase.py +6 -10
  144. pyobo/sources/pubchem.py +4 -9
  145. pyobo/sources/reactome.py +5 -11
  146. pyobo/sources/rgd.py +11 -16
  147. pyobo/sources/rhea.py +37 -36
  148. pyobo/sources/ror.py +69 -42
  149. pyobo/sources/selventa/__init__.py +0 -0
  150. pyobo/sources/selventa/schem.py +4 -7
  151. pyobo/sources/selventa/scomp.py +1 -6
  152. pyobo/sources/selventa/sdis.py +4 -7
  153. pyobo/sources/selventa/sfam.py +1 -6
  154. pyobo/sources/sgd.py +6 -11
  155. pyobo/sources/signor/__init__.py +7 -0
  156. pyobo/sources/signor/download.py +41 -0
  157. pyobo/sources/signor/signor_complexes.py +105 -0
  158. pyobo/sources/slm.py +12 -15
  159. pyobo/sources/umls/__init__.py +7 -1
  160. pyobo/sources/umls/__main__.py +0 -0
  161. pyobo/sources/umls/get_synonym_types.py +20 -4
  162. pyobo/sources/umls/sty.py +57 -0
  163. pyobo/sources/umls/synonym_types.tsv +1 -1
  164. pyobo/sources/umls/umls.py +18 -22
  165. pyobo/sources/unimod.py +46 -0
  166. pyobo/sources/uniprot/__init__.py +1 -1
  167. pyobo/sources/uniprot/uniprot.py +40 -32
  168. pyobo/sources/uniprot/uniprot_ptm.py +4 -34
  169. pyobo/sources/utils.py +3 -2
  170. pyobo/sources/wikipathways.py +7 -10
  171. pyobo/sources/zfin.py +5 -10
  172. pyobo/ssg/__init__.py +12 -16
  173. pyobo/ssg/base.html +0 -0
  174. pyobo/ssg/index.html +26 -13
  175. pyobo/ssg/term.html +12 -2
  176. pyobo/ssg/typedef.html +0 -0
  177. pyobo/struct/__init__.py +54 -8
  178. pyobo/struct/functional/__init__.py +1 -0
  179. pyobo/struct/functional/dsl.py +2572 -0
  180. pyobo/struct/functional/macros.py +423 -0
  181. pyobo/struct/functional/obo_to_functional.py +385 -0
  182. pyobo/struct/functional/ontology.py +270 -0
  183. pyobo/struct/functional/utils.py +112 -0
  184. pyobo/struct/reference.py +331 -136
  185. pyobo/struct/struct.py +1413 -643
  186. pyobo/struct/struct_utils.py +1078 -0
  187. pyobo/struct/typedef.py +162 -210
  188. pyobo/struct/utils.py +12 -5
  189. pyobo/struct/vocabulary.py +138 -0
  190. pyobo/utils/__init__.py +0 -0
  191. pyobo/utils/cache.py +13 -11
  192. pyobo/utils/io.py +17 -31
  193. pyobo/utils/iter.py +5 -5
  194. pyobo/utils/misc.py +41 -53
  195. pyobo/utils/ndex_utils.py +0 -0
  196. pyobo/utils/path.py +76 -70
  197. pyobo/version.py +3 -3
  198. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
  199. pyobo-0.12.0.dist-info/RECORD +202 -0
  200. pyobo-0.12.0.dist-info/WHEEL +4 -0
  201. {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
  202. pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
  203. pyobo/aws.py +0 -162
  204. pyobo/cli/aws.py +0 -47
  205. pyobo/identifier_utils.py +0 -142
  206. pyobo/normalizer.py +0 -232
  207. pyobo/registries/__init__.py +0 -16
  208. pyobo/registries/metaregistry.json +0 -507
  209. pyobo/registries/metaregistry.py +0 -135
  210. pyobo/sources/icd11.py +0 -105
  211. pyobo/xrefdb/__init__.py +0 -1
  212. pyobo/xrefdb/canonicalizer.py +0 -214
  213. pyobo/xrefdb/priority.py +0 -59
  214. pyobo/xrefdb/sources/__init__.py +0 -60
  215. pyobo/xrefdb/sources/biomappings.py +0 -36
  216. pyobo/xrefdb/sources/cbms2019.py +0 -91
  217. pyobo/xrefdb/sources/chembl.py +0 -83
  218. pyobo/xrefdb/sources/compath.py +0 -82
  219. pyobo/xrefdb/sources/famplex.py +0 -64
  220. pyobo/xrefdb/sources/gilda.py +0 -50
  221. pyobo/xrefdb/sources/intact.py +0 -113
  222. pyobo/xrefdb/sources/ncit.py +0 -133
  223. pyobo/xrefdb/sources/pubchem.py +0 -27
  224. pyobo/xrefdb/sources/wikidata.py +0 -116
  225. pyobo-0.11.2.dist-info/RECORD +0 -157
  226. pyobo-0.11.2.dist-info/WHEEL +0 -5
  227. pyobo-0.11.2.dist-info/top_level.txt +0 -1
@@ -1,82 +0,0 @@
1
- """Import ComPath mappings between pathways."""
2
-
3
- from collections.abc import Iterable
4
-
5
- import pandas as pd
6
- from pystow.utils import get_commit
7
-
8
- from pyobo.constants import (
9
- PROVENANCE,
10
- SOURCE_ID,
11
- SOURCE_PREFIX,
12
- TARGET_ID,
13
- TARGET_PREFIX,
14
- XREF_COLUMNS,
15
- )
16
-
17
- __all__ = [
18
- "iter_compath_dfs",
19
- ]
20
-
21
-
22
- def _get_df(name: str, *, sha: str, sep: str = ",") -> pd.DataFrame:
23
- url = f"https://raw.githubusercontent.com/ComPath/compath-resources/{sha}/mappings/{name}"
24
- df = pd.read_csv(
25
- url,
26
- sep=sep,
27
- usecols=["Source Resource", "Source ID", "Mapping Type", "Target Resource", "Target ID"],
28
- )
29
- df.rename(
30
- columns={
31
- "Source Resource": SOURCE_PREFIX,
32
- "Source ID": SOURCE_ID,
33
- "Target Resource": TARGET_PREFIX,
34
- "Target ID": TARGET_ID,
35
- },
36
- inplace=True,
37
- )
38
- df = df[df["Mapping Type"] == "equivalentTo"]
39
- del df["Mapping Type"]
40
- df[PROVENANCE] = url
41
- df = df[XREF_COLUMNS]
42
-
43
- df[SOURCE_PREFIX] = df[SOURCE_PREFIX].map(_fix_kegg_prefix)
44
- df[TARGET_PREFIX] = df[TARGET_PREFIX].map(_fix_kegg_prefix)
45
- df[SOURCE_ID] = [
46
- _fix_kegg_identifier(prefix, identifier)
47
- for prefix, identifier in df[[SOURCE_PREFIX, SOURCE_ID]].values
48
- ]
49
- df[TARGET_ID] = [
50
- _fix_kegg_identifier(prefix, identifier)
51
- for prefix, identifier in df[[TARGET_PREFIX, TARGET_ID]].values
52
- ]
53
-
54
- return df
55
-
56
-
57
- def _fix_kegg_identifier(prefix, identifier) -> str:
58
- if prefix == "kegg.pathway":
59
- return identifier[len("path:") :]
60
- return identifier
61
-
62
-
63
- def _fix_kegg_prefix(s):
64
- return s if s != "kegg" else "kegg.pathway"
65
-
66
-
67
- def iter_compath_dfs() -> Iterable[pd.DataFrame]:
68
- """Iterate over all ComPath mappings."""
69
- sha = get_commit("ComPath", "compath-resources")
70
-
71
- yield _get_df("kegg_reactome.csv", sha=sha)
72
- yield _get_df("kegg_wikipathways.csv", sha=sha)
73
- yield _get_df("pathbank_kegg.csv", sha=sha)
74
- yield _get_df("pathbank_reactome.csv", sha=sha)
75
- yield _get_df("pathbank_wikipathways.csv", sha=sha)
76
- yield _get_df("special_mappings.csv", sha=sha)
77
- yield _get_df("wikipathways_reactome.csv", sha=sha)
78
-
79
-
80
- def get_compath_xrefs_df() -> pd.DataFrame:
81
- """Iterate over all ComPath mappings."""
82
- return pd.concat(iter_compath_dfs())
@@ -1,64 +0,0 @@
1
- """Get FamPlex xrefs."""
2
-
3
- import logging
4
- from collections.abc import Mapping
5
- from functools import lru_cache
6
-
7
- import bioregistry
8
- import pandas as pd
9
-
10
- from ...constants import (
11
- PROVENANCE,
12
- SOURCE_ID,
13
- SOURCE_PREFIX,
14
- TARGET_ID,
15
- TARGET_PREFIX,
16
- XREF_COLUMNS,
17
- )
18
- from ...utils.path import ensure_df
19
-
20
- __all__ = [
21
- "get_famplex_xrefs_df",
22
- ]
23
-
24
- logger = logging.getLogger(__name__)
25
-
26
- URL = "https://github.com/sorgerlab/famplex/raw/master/equivalences.csv"
27
-
28
-
29
- def _get_famplex_df(force: bool = False) -> pd.DataFrame:
30
- return ensure_df(
31
- prefix="fplx",
32
- url=URL,
33
- force=force,
34
- header=None,
35
- names=[TARGET_PREFIX, TARGET_ID, SOURCE_ID],
36
- sep=",",
37
- )
38
-
39
-
40
- def get_famplex_xrefs_df(force: bool = False) -> pd.DataFrame:
41
- """Get xrefs from FamPlex."""
42
- df = _get_famplex_df(force=force)
43
- df[TARGET_PREFIX] = df[TARGET_PREFIX].map(bioregistry.normalize_prefix)
44
- df = df[df[TARGET_PREFIX].notna()]
45
- df[SOURCE_PREFIX] = "fplx"
46
- df[PROVENANCE] = "https://github.com/sorgerlab/famplex/raw/master/equivalences.csv"
47
- df = df[XREF_COLUMNS]
48
- return df
49
-
50
-
51
- @lru_cache
52
- def get_remapping(force: bool = False) -> Mapping[tuple[str, str], tuple[str, str, str]]:
53
- """Get a mapping from database/identifier pairs to famplex identifiers."""
54
- df = _get_famplex_df(force=force)
55
- rv = {}
56
- for target_ns, target_id, source_id in df.values:
57
- if target_ns.lower() == "medscan":
58
- continue # MEDSCAN is proprietary and Ben said to skip using these identifiers
59
- remapped_prefix = bioregistry.normalize_prefix(target_ns)
60
- if remapped_prefix is None:
61
- logger.warning("could not remap %s", target_ns)
62
- else:
63
- rv[remapped_prefix, target_id] = "fplx", source_id, source_id
64
- return rv
@@ -1,50 +0,0 @@
1
- """Cross references from Gilda.
2
-
3
- .. seealso:: https://github.com/indralabs/gilda
4
- """
5
-
6
- import bioregistry
7
- import pandas as pd
8
-
9
- from pyobo.constants import (
10
- PROVENANCE,
11
- SOURCE_ID,
12
- SOURCE_PREFIX,
13
- TARGET_ID,
14
- TARGET_PREFIX,
15
- )
16
-
17
- __all__ = [
18
- "get_gilda_xrefs_df",
19
- ]
20
-
21
- GILDA_MAPPINGS = (
22
- "https://raw.githubusercontent.com/indralab/gilda/master/gilda/resources/mesh_mappings.tsv"
23
- )
24
-
25
-
26
- def get_gilda_xrefs_df() -> pd.DataFrame:
27
- """Get xrefs from Gilda."""
28
- df = pd.read_csv(
29
- GILDA_MAPPINGS,
30
- sep="\t",
31
- header=None,
32
- usecols=[0, 1, 3, 4],
33
- names=[SOURCE_PREFIX, SOURCE_ID, TARGET_PREFIX, TARGET_ID],
34
- )
35
- df[PROVENANCE] = GILDA_MAPPINGS
36
-
37
- for k in SOURCE_PREFIX, TARGET_PREFIX:
38
- df[k] = df[k].map(bioregistry.normalize_prefix)
39
-
40
- for k in SOURCE_ID, TARGET_ID:
41
- df[k] = df[k].map(_fix_gogo)
42
-
43
- return df
44
-
45
-
46
- def _fix_gogo(s):
47
- for prefix in ("CHEBI:", "DOID:", "HP:", "GO:"):
48
- if s.startswith(prefix):
49
- return s[len(prefix) :]
50
- return s
@@ -1,113 +0,0 @@
1
- """Get the xrefs from IntAct."""
2
-
3
- from collections.abc import Mapping
4
-
5
- import pandas as pd
6
-
7
- from pyobo.api.utils import get_version
8
- from pyobo.constants import PROVENANCE, SOURCE_PREFIX, TARGET_PREFIX, XREF_COLUMNS
9
- from pyobo.utils.cache import cached_mapping
10
- from pyobo.utils.path import prefix_cache_join
11
-
12
- __all__ = [
13
- "COMPLEXPORTAL_MAPPINGS",
14
- "get_intact_complex_portal_xrefs_df",
15
- "get_complexportal_mapping",
16
- "get_intact_reactome_xrefs_df",
17
- "get_reactome_mapping",
18
- ]
19
-
20
- COMPLEXPORTAL_MAPPINGS = (
21
- "ftp://ftp.ebi.ac.uk/pub/databases/intact/current/various/cpx_ebi_ac_translation.txt"
22
- )
23
- REACTOME_MAPPINGS = "ftp://ftp.ebi.ac.uk/pub/databases/intact/current/various/reactome.dat"
24
-
25
-
26
- def _get_complexportal_df():
27
- return pd.read_csv(
28
- COMPLEXPORTAL_MAPPINGS, sep="\t", header=None, names=["source_id", "target_id"]
29
- )
30
-
31
-
32
- def get_intact_complex_portal_xrefs_df() -> pd.DataFrame:
33
- """Get IntAct-Complex Portal xrefs."""
34
- df = _get_complexportal_df()
35
- df[SOURCE_PREFIX] = "intact"
36
- df[TARGET_PREFIX] = "complexportal"
37
- df[PROVENANCE] = COMPLEXPORTAL_MAPPINGS
38
- df = df[XREF_COLUMNS]
39
- return df
40
-
41
-
42
- def get_complexportal_mapping() -> Mapping[str, str]:
43
- """Get IntAct to Complex Portal mapping.
44
-
45
- Is basically equivalent to:
46
-
47
- .. code-block:: python
48
-
49
- from pyobo import get_filtered_xrefs
50
-
51
- intact_complexportal_mapping = get_filtered_xrefs("intact", "complexportal")
52
- """
53
-
54
- @cached_mapping(
55
- path=prefix_cache_join(
56
- "intact", "xrefs", name="complexportal.tsv", version=get_version("intact")
57
- ),
58
- header=["intact_id", "complexportal_id"],
59
- )
60
- def _cache():
61
- df = _get_complexportal_df()
62
- return dict(df.values)
63
-
64
- return _cache()
65
-
66
-
67
- def _get_reactome_df():
68
- return pd.read_csv(REACTOME_MAPPINGS, sep="\t", header=None, names=["source_id", "target_id"])
69
-
70
-
71
- def get_intact_reactome_xrefs_df() -> pd.DataFrame:
72
- """Get IntAct-Reactome xrefs."""
73
- df = _get_reactome_df()
74
- df[SOURCE_PREFIX] = "intact"
75
- df[TARGET_PREFIX] = "reactome"
76
- df[PROVENANCE] = REACTOME_MAPPINGS
77
- df = df[XREF_COLUMNS]
78
- return df
79
-
80
-
81
- def get_reactome_mapping() -> Mapping[str, str]:
82
- """Get IntAct to Reactome mapping.
83
-
84
- Is basically equivalent to:
85
-
86
- .. code-block:: python
87
-
88
- from pyobo import get_filtered_xrefs
89
-
90
- intact_complexportal_mapping = get_filtered_xrefs("intact", "reactome")
91
- """
92
-
93
- @cached_mapping(
94
- path=prefix_cache_join(
95
- "intact", "xrefs", name="reactome.tsv", version=get_version("intact")
96
- ),
97
- header=["intact_id", "reactome_id"],
98
- )
99
- def _cache():
100
- df = _get_complexportal_df()
101
- return dict(df.values)
102
-
103
- return _cache()
104
-
105
-
106
- def get_xrefs_df() -> pd.DataFrame:
107
- """Get IntAct xrefs."""
108
- return pd.concat(
109
- [
110
- get_intact_complex_portal_xrefs_df(),
111
- get_intact_reactome_xrefs_df(),
112
- ]
113
- )
@@ -1,133 +0,0 @@
1
- """Import NCIT mappings."""
2
-
3
- from collections.abc import Iterable
4
-
5
- import pandas as pd
6
-
7
- from ...constants import (
8
- PROVENANCE,
9
- SOURCE_ID,
10
- SOURCE_PREFIX,
11
- TARGET_ID,
12
- TARGET_PREFIX,
13
- XREF_COLUMNS,
14
- )
15
- from ...utils.path import ensure_df
16
-
17
- __all__ = [
18
- "iter_ncit_dfs",
19
- "get_ncit_go_df",
20
- "get_ncit_chebi_df",
21
- "get_ncit_hgnc_df",
22
- "get_ncit_uniprot_df",
23
- ]
24
-
25
- PREFIX = "ncit"
26
-
27
- HGNC_MAPPINGS_URL = (
28
- "https://ncit.nci.nih.gov/ncitbrowser/ajax?action="
29
- + "export_mapping&dictionary=NCIt_to_HGNC_Mapping&version=1.0"
30
- )
31
-
32
- GO_MAPPINGS_URL = (
33
- "https://ncit.nci.nih.gov/ncitbrowser/ajax?action="
34
- + "export_mapping&dictionary=GO_to_NCIt_Mapping&version=1.1"
35
- )
36
-
37
- CHEBI_MAPPINGS_URL = (
38
- "https://ncit.nci.nih.gov/ncitbrowser/ajax?action="
39
- + "export_mapping&dictionary=NCIt_to_ChEBI_Mapping&version=1.0"
40
- )
41
-
42
- # url_swissprot = 'https://ncit.nci.nih.gov/ncitbrowser/ajax?action=' \
43
- # 'export_mapping&uri=https://evs.nci.nih.gov/ftp1/' \
44
- # 'NCI_Thesaurus/Mappings/NCIt-SwissProt_Mapping.txt'
45
-
46
- UNIPROT_MAPPINGS_URL = (
47
- "https://evs.nci.nih.gov/ftp1/NCI_Thesaurus/Mappings/NCIt-SwissProt_Mapping.txt"
48
- )
49
-
50
-
51
- def get_ncit_xrefs_df() -> pd.DataFrame:
52
- """Get all NCIT mappings in a single dataframe."""
53
- return pd.concat(iter_ncit_dfs())
54
-
55
-
56
- def iter_ncit_dfs() -> Iterable[pd.DataFrame]:
57
- """Iterate all NCIT mappings dataframes."""
58
- yield get_ncit_hgnc_df()
59
- yield get_ncit_chebi_df()
60
- yield get_ncit_uniprot_df()
61
- yield get_ncit_go_df()
62
-
63
-
64
- def get_ncit_hgnc_df() -> pd.DataFrame:
65
- """Get NCIT-HGNC mappings.
66
-
67
- In this file, the only association type was mapsTo.
68
- """
69
- df = ensure_df(
70
- PREFIX,
71
- url=HGNC_MAPPINGS_URL,
72
- name="ncit_hgnc.csv",
73
- sep=",",
74
- usecols=["Source Code", "Target Code"],
75
- )
76
- df.rename(columns={"Source Code": SOURCE_ID, "Target Code": TARGET_ID}, inplace=True)
77
- df[TARGET_ID] = df[TARGET_ID].map(lambda s: s[len("HGNC:") :])
78
- df.dropna(inplace=True)
79
-
80
- df[SOURCE_PREFIX] = "ncit"
81
- df[TARGET_PREFIX] = "hgnc"
82
- df[PROVENANCE] = HGNC_MAPPINGS_URL
83
- df = df[XREF_COLUMNS]
84
- return df
85
-
86
-
87
- def get_ncit_go_df() -> pd.DataFrame:
88
- """Get NCIT-GO mappings.
89
-
90
- In this file, the only association type was mapsTo.
91
- """
92
- df = ensure_df(PREFIX, url=GO_MAPPINGS_URL, name="ncit_go.csv", sep=",")
93
- # The data is flipped here
94
- df.rename(columns={"Source Code": TARGET_ID, "Target Code": SOURCE_ID}, inplace=True)
95
- df[TARGET_ID] = df[TARGET_ID].map(lambda s: s[len("GO:")])
96
- df.dropna(inplace=True)
97
-
98
- df[SOURCE_PREFIX] = "ncit"
99
- df[TARGET_PREFIX] = "go"
100
- df[PROVENANCE] = GO_MAPPINGS_URL
101
- df = df[XREF_COLUMNS]
102
- return df
103
-
104
-
105
- def get_ncit_chebi_df() -> pd.DataFrame:
106
- """Get NCIT-ChEBI mappings.
107
-
108
- In this file, the only association type was mapsTo.
109
- """
110
- df = ensure_df(PREFIX, url=CHEBI_MAPPINGS_URL, name="ncit_chebi.csv", sep=",")
111
- df.rename(columns={"Source Code": SOURCE_ID, "Target Code": TARGET_ID}, inplace=True)
112
- df[TARGET_ID] = df[TARGET_ID].map(lambda s: s[len("CHEBI:")])
113
- df.dropna(inplace=True)
114
-
115
- df[SOURCE_PREFIX] = "ncit"
116
- df[TARGET_PREFIX] = "chebi"
117
- df[PROVENANCE] = CHEBI_MAPPINGS_URL
118
- df = df[XREF_COLUMNS]
119
- return df
120
-
121
-
122
- def get_ncit_uniprot_df() -> pd.DataFrame:
123
- """Get NCIT-UniProt mappings.
124
-
125
- In this file, the only association type was mapsTo.
126
- """
127
- df = ensure_df(PREFIX, url=UNIPROT_MAPPINGS_URL, name="ncit_uniprot.csv")
128
- df.rename(columns={"NCIt Code": SOURCE_ID, "SwissProt ID": TARGET_ID}, inplace=True)
129
- df[SOURCE_PREFIX] = "ncit"
130
- df[TARGET_PREFIX] = "uniprot"
131
- df[PROVENANCE] = UNIPROT_MAPPINGS_URL
132
- df = df[XREF_COLUMNS]
133
- return df
@@ -1,27 +0,0 @@
1
- """Get xrefs from PubChem Compound to MeSH."""
2
-
3
- from typing import Optional
4
-
5
- import pandas as pd
6
-
7
- from ...api.utils import safe_get_version
8
- from ...constants import XREF_COLUMNS
9
- from ...sources.pubchem import _get_pubchem_extras_url, get_pubchem_id_to_mesh_id
10
-
11
- __all__ = [
12
- "get_pubchem_mesh_df",
13
- ]
14
-
15
-
16
- def get_pubchem_mesh_df(version: Optional[str] = None) -> pd.DataFrame:
17
- """Get PubChem Compound-MeSH xrefs."""
18
- if version is None:
19
- version = safe_get_version("pubchem")
20
- cid_mesh_url = _get_pubchem_extras_url(version, "CID-MeSH")
21
- return pd.DataFrame(
22
- [
23
- ("pubchem.compound", k, "mesh", v, cid_mesh_url)
24
- for k, v in get_pubchem_id_to_mesh_id(version=version).items()
25
- ],
26
- columns=XREF_COLUMNS,
27
- )
@@ -1,116 +0,0 @@
1
- """Get Wikidata xrefs.
2
-
3
- Run with ``python -m pyobo.xrefdb.sources.wikidata``.
4
- """
5
-
6
- import json
7
- import logging
8
- from collections.abc import Iterable
9
-
10
- import bioregistry
11
- import click
12
- import pandas as pd
13
- import requests
14
- from more_click import verbose_option
15
- from tqdm.auto import tqdm
16
-
17
- from ...constants import RAW_MODULE, XREF_COLUMNS
18
- from ...version import get_version
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
- #: WikiData SPARQL endpoint. See https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service#Interfacing
23
- URL = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"
24
-
25
- WIKIDATA_MAPPING_DIRECTORY = RAW_MODULE.module("wikidata", "mappings")
26
-
27
-
28
- def get_wikidata_xrefs_df(*, use_tqdm: bool = True) -> pd.DataFrame:
29
- """Get all Wikidata xrefs."""
30
- return pd.concat(iterate_wikidata_dfs(use_tqdm=use_tqdm))
31
-
32
-
33
- def iterate_wikidata_dfs(*, use_tqdm: bool = True) -> Iterable[pd.DataFrame]:
34
- """Iterate over WikiData xref dataframes."""
35
- wikidata_properties = {
36
- prefix: entry.wikidata["prefix"]
37
- for prefix, entry in bioregistry.read_registry().items()
38
- if entry.wikidata and "prefix" in entry.wikidata
39
- }
40
-
41
- it = tqdm(sorted(wikidata_properties.items()), disable=not use_tqdm, desc="Wikidata properties")
42
- for prefix, wikidata_property in it:
43
- if prefix in {"pubmed", "pmc", "orcid", "inchi", "smiles"}:
44
- continue # too many
45
- it.set_postfix({"prefix": prefix})
46
- try:
47
- yield get_wikidata_df(prefix, wikidata_property)
48
- except json.decoder.JSONDecodeError as e:
49
- logger.warning(
50
- "[%s] Problem decoding results from %s: %s", prefix, wikidata_property, e
51
- )
52
-
53
-
54
- def get_wikidata_df(prefix: str, wikidata_property: str) -> pd.DataFrame:
55
- """Get Wikidata xrefs."""
56
- df = pd.DataFrame(
57
- [
58
- ("wikidata", wikidata_id, prefix, external_id, "wikidata")
59
- for wikidata_id, external_id in iter_wikidata_mappings(wikidata_property)
60
- ],
61
- columns=XREF_COLUMNS,
62
- )
63
- logger.debug("got wikidata (%s; %s): %d rows", prefix, wikidata_property, len(df.index))
64
- return df
65
-
66
-
67
- def iter_wikidata_mappings(
68
- wikidata_property: str, *, cache: bool = True
69
- ) -> Iterable[tuple[str, str]]:
70
- """Iterate over Wikidata xrefs."""
71
- path = WIKIDATA_MAPPING_DIRECTORY.join(name=f"{wikidata_property}.json")
72
- if path.exists() and cache:
73
- with path.open() as file:
74
- rows = json.load(file)
75
- else:
76
- query = f"SELECT ?wikidata_id ?id WHERE {{?wikidata_id wdt:{wikidata_property} ?id}}"
77
- rows = _run_query(query)
78
- with path.open("w") as file:
79
- json.dump(rows, file, indent=2)
80
-
81
- for row in rows:
82
- wikidata_id = _removeprefix(row["wikidata_id"]["value"], "http://www.wikidata.org/entity/")
83
- wikidata_id = _removeprefix(wikidata_id, "http://wikidata.org/entity/")
84
- entity_id = row["id"]["value"]
85
- yield wikidata_id, entity_id
86
-
87
-
88
- def _removeprefix(s, prefix):
89
- if s.startswith(prefix):
90
- return s[len(prefix) :]
91
- return s
92
-
93
-
94
- HEADERS = {
95
- "User-Agent": f"pyobo/{get_version()}",
96
- }
97
-
98
-
99
- def _run_query(query, base: str = URL):
100
- logger.debug("running query: %s", query)
101
- res = requests.get(base, params={"query": query, "format": "json"}, headers=HEADERS)
102
- res.raise_for_status()
103
- res_json = res.json()
104
- return res_json["results"]["bindings"]
105
-
106
-
107
- @click.command()
108
- @verbose_option
109
- def _main():
110
- """Summarize xrefs."""
111
- for _ in iterate_wikidata_dfs():
112
- pass
113
-
114
-
115
- if __name__ == "__main__":
116
- _main()