pyobo 0.10.11__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +0 -2
- pyobo/__main__.py +0 -2
- pyobo/api/__init__.py +0 -2
- pyobo/api/alts.py +6 -7
- pyobo/api/hierarchy.py +14 -15
- pyobo/api/metadata.py +3 -4
- pyobo/api/names.py +51 -31
- pyobo/api/properties.py +6 -7
- pyobo/api/relations.py +12 -11
- pyobo/api/species.py +5 -6
- pyobo/api/typedefs.py +1 -3
- pyobo/api/utils.py +63 -2
- pyobo/api/xrefs.py +4 -5
- pyobo/aws.py +3 -5
- pyobo/cli/__init__.py +0 -2
- pyobo/cli/aws.py +0 -2
- pyobo/cli/cli.py +0 -4
- pyobo/cli/database.py +1 -3
- pyobo/cli/lookup.py +2 -4
- pyobo/cli/utils.py +0 -2
- pyobo/constants.py +0 -3
- pyobo/getters.py +19 -26
- pyobo/gilda_utils.py +28 -8
- pyobo/identifier_utils.py +32 -15
- pyobo/mocks.py +5 -6
- pyobo/normalizer.py +24 -24
- pyobo/obographs.py +3 -3
- pyobo/plugins.py +3 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +19 -21
- pyobo/registries/__init__.py +0 -2
- pyobo/registries/metaregistry.py +6 -8
- pyobo/resource_utils.py +1 -3
- pyobo/resources/__init__.py +0 -2
- pyobo/resources/ncbitaxon.py +2 -3
- pyobo/resources/ro.py +2 -4
- pyobo/sources/README.md +15 -0
- pyobo/sources/__init__.py +2 -2
- pyobo/sources/agrovoc.py +3 -3
- pyobo/sources/antibodyregistry.py +4 -5
- pyobo/sources/biogrid.py +7 -7
- pyobo/sources/ccle.py +3 -4
- pyobo/sources/cgnc.py +1 -3
- pyobo/sources/chebi.py +2 -4
- pyobo/sources/chembl.py +1 -3
- pyobo/sources/civic_gene.py +2 -3
- pyobo/sources/complexportal.py +3 -5
- pyobo/sources/conso.py +2 -4
- pyobo/sources/cpt.py +1 -3
- pyobo/sources/credit.py +68 -0
- pyobo/sources/cvx.py +1 -3
- pyobo/sources/depmap.py +3 -4
- pyobo/sources/dictybase_gene.py +1 -3
- pyobo/sources/drugbank.py +6 -7
- pyobo/sources/drugbank_salt.py +3 -4
- pyobo/sources/drugcentral.py +5 -7
- pyobo/sources/expasy.py +11 -12
- pyobo/sources/famplex.py +3 -5
- pyobo/sources/flybase.py +2 -4
- pyobo/sources/geonames.py +28 -10
- pyobo/sources/gmt_utils.py +5 -6
- pyobo/sources/go.py +4 -6
- pyobo/sources/gwascentral_phenotype.py +1 -3
- pyobo/sources/gwascentral_study.py +2 -3
- pyobo/sources/hgnc.py +8 -9
- pyobo/sources/hgncgenefamily.py +2 -4
- pyobo/sources/icd10.py +3 -4
- pyobo/sources/icd11.py +3 -4
- pyobo/sources/icd_utils.py +6 -7
- pyobo/sources/interpro.py +3 -5
- pyobo/sources/itis.py +1 -3
- pyobo/sources/kegg/__init__.py +0 -2
- pyobo/sources/kegg/api.py +3 -4
- pyobo/sources/kegg/genes.py +3 -4
- pyobo/sources/kegg/genome.py +1 -3
- pyobo/sources/kegg/pathway.py +5 -6
- pyobo/sources/mesh.py +19 -21
- pyobo/sources/mgi.py +1 -3
- pyobo/sources/mirbase.py +4 -6
- pyobo/sources/mirbase_constants.py +0 -2
- pyobo/sources/mirbase_family.py +1 -3
- pyobo/sources/mirbase_mature.py +1 -3
- pyobo/sources/msigdb.py +4 -5
- pyobo/sources/ncbigene.py +3 -5
- pyobo/sources/npass.py +2 -4
- pyobo/sources/omim_ps.py +1 -3
- pyobo/sources/pathbank.py +3 -5
- pyobo/sources/pfam.py +1 -3
- pyobo/sources/pfam_clan.py +1 -3
- pyobo/sources/pid.py +3 -5
- pyobo/sources/pombase.py +1 -3
- pyobo/sources/pubchem.py +5 -6
- pyobo/sources/reactome.py +2 -4
- pyobo/sources/rgd.py +3 -4
- pyobo/sources/rhea.py +9 -10
- pyobo/sources/ror.py +69 -22
- pyobo/sources/selventa/__init__.py +0 -2
- pyobo/sources/selventa/schem.py +1 -3
- pyobo/sources/selventa/scomp.py +1 -3
- pyobo/sources/selventa/sdis.py +1 -3
- pyobo/sources/selventa/sfam.py +1 -3
- pyobo/sources/sgd.py +1 -3
- pyobo/sources/slm.py +1 -3
- pyobo/sources/umls/__init__.py +0 -2
- pyobo/sources/umls/__main__.py +0 -2
- pyobo/sources/umls/get_synonym_types.py +1 -1
- pyobo/sources/umls/umls.py +2 -4
- pyobo/sources/uniprot/__init__.py +0 -2
- pyobo/sources/uniprot/uniprot.py +6 -6
- pyobo/sources/uniprot/uniprot_ptm.py +6 -5
- pyobo/sources/utils.py +3 -5
- pyobo/sources/wikipathways.py +1 -3
- pyobo/sources/zfin.py +2 -3
- pyobo/ssg/__init__.py +3 -2
- pyobo/struct/__init__.py +0 -2
- pyobo/struct/reference.py +13 -15
- pyobo/struct/struct.py +106 -99
- pyobo/struct/typedef.py +19 -10
- pyobo/struct/utils.py +0 -2
- pyobo/utils/__init__.py +0 -2
- pyobo/utils/cache.py +14 -6
- pyobo/utils/io.py +9 -10
- pyobo/utils/iter.py +5 -6
- pyobo/utils/misc.py +1 -3
- pyobo/utils/ndex_utils.py +6 -7
- pyobo/utils/path.py +5 -5
- pyobo/version.py +3 -5
- pyobo/xrefdb/__init__.py +0 -2
- pyobo/xrefdb/canonicalizer.py +27 -18
- pyobo/xrefdb/priority.py +0 -2
- pyobo/xrefdb/sources/__init__.py +9 -7
- pyobo/xrefdb/sources/biomappings.py +0 -2
- pyobo/xrefdb/sources/cbms2019.py +0 -2
- pyobo/xrefdb/sources/chembl.py +5 -7
- pyobo/xrefdb/sources/compath.py +1 -3
- pyobo/xrefdb/sources/famplex.py +3 -5
- pyobo/xrefdb/sources/gilda.py +0 -2
- pyobo/xrefdb/sources/intact.py +5 -5
- pyobo/xrefdb/sources/ncit.py +1 -3
- pyobo/xrefdb/sources/pubchem.py +2 -4
- pyobo/xrefdb/sources/wikidata.py +10 -5
- pyobo/xrefdb/xrefs_pipeline.py +15 -16
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/LICENSE +1 -1
- pyobo-0.11.0.dist-info/METADATA +723 -0
- pyobo-0.11.0.dist-info/RECORD +171 -0
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/WHEEL +1 -1
- pyobo-0.11.0.dist-info/entry_points.txt +2 -0
- pyobo/xrefdb/bengo.py +0 -44
- pyobo-0.10.11.dist-info/METADATA +0 -499
- pyobo-0.10.11.dist-info/RECORD +0 -169
- pyobo-0.10.11.dist-info/entry_points.txt +0 -15
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/top_level.txt +0 -0
pyobo/sources/kegg/pathway.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Convert KEGG Pathways to OBO.
|
|
4
2
|
|
|
5
3
|
Run with ``python -m pyobo.sources.kegg.pathway``
|
|
@@ -8,8 +6,9 @@ Run with ``python -m pyobo.sources.kegg.pathway``
|
|
|
8
6
|
import logging
|
|
9
7
|
import urllib.error
|
|
10
8
|
from collections import defaultdict
|
|
9
|
+
from collections.abc import Iterable, Mapping
|
|
11
10
|
from functools import partial
|
|
12
|
-
from typing import
|
|
11
|
+
from typing import Union
|
|
13
12
|
|
|
14
13
|
from tqdm.auto import tqdm
|
|
15
14
|
from tqdm.contrib.concurrent import thread_map
|
|
@@ -76,7 +75,7 @@ def iter_terms(version: str, skip_missing: bool = True) -> Iterable[Term]:
|
|
|
76
75
|
)
|
|
77
76
|
|
|
78
77
|
|
|
79
|
-
def _get_link_pathway_map(path: str) -> Mapping[str,
|
|
78
|
+
def _get_link_pathway_map(path: str) -> Mapping[str, list[str]]:
|
|
80
79
|
rv = defaultdict(list)
|
|
81
80
|
with open(path) as file:
|
|
82
81
|
for line in file:
|
|
@@ -110,7 +109,7 @@ def _iter_genome_terms(
|
|
|
110
109
|
list_pathway_lines = [line.strip() for line in file]
|
|
111
110
|
for line in list_pathway_lines:
|
|
112
111
|
line = line.strip()
|
|
113
|
-
pathway_id, name =
|
|
112
|
+
pathway_id, name = (part.strip() for part in line.split("\t"))
|
|
114
113
|
pathway_id = pathway_id[len("path:") :]
|
|
115
114
|
|
|
116
115
|
terms[pathway_id] = term = Term.from_triple(
|
|
@@ -149,7 +148,7 @@ def _iter_genome_terms(
|
|
|
149
148
|
|
|
150
149
|
def iter_kegg_pathway_paths(
|
|
151
150
|
version: str, skip_missing: bool = True
|
|
152
|
-
) -> Iterable[Union[
|
|
151
|
+
) -> Iterable[Union[tuple[KEGGGenome, str, str], tuple[None, None, None]]]:
|
|
153
152
|
"""Get paths for the KEGG Pathway files."""
|
|
154
153
|
genomes = list(iter_kegg_genomes(version=version, desc="KEGG Pathways"))
|
|
155
154
|
func = partial(_process_genome, version=version, skip_missing=skip_missing)
|
pyobo/sources/mesh.py
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Parser for the MeSH descriptors."""
|
|
4
2
|
|
|
5
3
|
import datetime
|
|
6
4
|
import itertools as itt
|
|
7
5
|
import logging
|
|
8
6
|
import re
|
|
9
|
-
from
|
|
7
|
+
from collections.abc import Collection, Iterable, Mapping
|
|
8
|
+
from typing import Any, Optional
|
|
10
9
|
from xml.etree.ElementTree import Element
|
|
11
10
|
|
|
12
11
|
from tqdm.auto import tqdm
|
|
13
12
|
|
|
13
|
+
from pyobo.api.utils import safe_get_version
|
|
14
14
|
from pyobo.identifier_utils import standardize_ec
|
|
15
15
|
from pyobo.struct import Obo, Reference, Synonym, Term
|
|
16
16
|
from pyobo.utils.cache import cached_json, cached_mapping
|
|
@@ -69,7 +69,7 @@ def get_tree_to_mesh_id(version: str) -> Mapping[str, str]:
|
|
|
69
69
|
|
|
70
70
|
def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
71
71
|
"""Get MeSH OBO terms."""
|
|
72
|
-
mesh_id_to_term:
|
|
72
|
+
mesh_id_to_term: dict[str, Term] = {}
|
|
73
73
|
|
|
74
74
|
descriptors = ensure_mesh_descriptors(version=version, force=force)
|
|
75
75
|
supplemental_records = ensure_mesh_supplemental_records(version=version, force=force)
|
|
@@ -79,8 +79,8 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
79
79
|
name = entry["name"]
|
|
80
80
|
definition = entry.get("scope_note")
|
|
81
81
|
|
|
82
|
-
xrefs:
|
|
83
|
-
synonyms:
|
|
82
|
+
xrefs: list[Reference] = []
|
|
83
|
+
synonyms: set[str] = set()
|
|
84
84
|
for concept in entry["concepts"]:
|
|
85
85
|
synonyms.add(concept["name"])
|
|
86
86
|
for term in concept["terms"]:
|
|
@@ -106,7 +106,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
106
106
|
|
|
107
107
|
def ensure_mesh_descriptors(
|
|
108
108
|
version: str, force: bool = False, force_process: bool = False
|
|
109
|
-
) ->
|
|
109
|
+
) -> list[Mapping[str, Any]]:
|
|
110
110
|
"""Get the parsed MeSH dictionary, and cache it if it wasn't already."""
|
|
111
111
|
|
|
112
112
|
@cached_json(path=prefix_directory_join(PREFIX, name="desc.json", version=version), force=force)
|
|
@@ -132,7 +132,7 @@ def get_supplemental_url(version: str) -> str:
|
|
|
132
132
|
return f"https://nlmpubs.nlm.nih.gov/projects/mesh/{version}/xmlmesh/supp{version}.gz"
|
|
133
133
|
|
|
134
134
|
|
|
135
|
-
def ensure_mesh_supplemental_records(version: str, force: bool = False) ->
|
|
135
|
+
def ensure_mesh_supplemental_records(version: str, force: bool = False) -> list[Mapping[str, Any]]:
|
|
136
136
|
"""Get the parsed MeSH dictionary, and cache it if it wasn't already."""
|
|
137
137
|
|
|
138
138
|
@cached_json(path=prefix_directory_join(PREFIX, name="supp.json", version=version), force=force)
|
|
@@ -146,11 +146,11 @@ def ensure_mesh_supplemental_records(version: str, force: bool = False) -> List[
|
|
|
146
146
|
return _inner()
|
|
147
147
|
|
|
148
148
|
|
|
149
|
-
def get_descriptor_records(element: Element, id_key: str, name_key) ->
|
|
149
|
+
def get_descriptor_records(element: Element, id_key: str, name_key) -> list[dict[str, Any]]:
|
|
150
150
|
"""Get MeSH descriptor records."""
|
|
151
151
|
logger.info("extract MeSH descriptors, concepts, and terms")
|
|
152
152
|
|
|
153
|
-
rv:
|
|
153
|
+
rv: list[dict[str, Any]] = [
|
|
154
154
|
get_descriptor_record(descriptor, id_key=id_key, name_key=name_key)
|
|
155
155
|
for descriptor in tqdm(element, desc="Getting MeSH Descriptors", unit_scale=True)
|
|
156
156
|
]
|
|
@@ -203,7 +203,7 @@ def get_descriptor_record(
|
|
|
203
203
|
element: Element,
|
|
204
204
|
id_key: str,
|
|
205
205
|
name_key: str,
|
|
206
|
-
) ->
|
|
206
|
+
) -> dict[str, Any]:
|
|
207
207
|
"""Get descriptor records from the main element.
|
|
208
208
|
|
|
209
209
|
:param element: An XML element
|
|
@@ -227,13 +227,13 @@ def get_descriptor_record(
|
|
|
227
227
|
return rv
|
|
228
228
|
|
|
229
229
|
|
|
230
|
-
def get_concept_records(element: Element) ->
|
|
230
|
+
def get_concept_records(element: Element) -> list[Mapping[str, Any]]:
|
|
231
231
|
"""Get concepts from a record."""
|
|
232
232
|
return [get_concept_record(e) for e in element.findall("ConceptList/Concept")]
|
|
233
233
|
|
|
234
234
|
|
|
235
|
-
def _get_xrefs(element: Element) ->
|
|
236
|
-
raw_registry_numbers:
|
|
235
|
+
def _get_xrefs(element: Element) -> list[tuple[str, str]]:
|
|
236
|
+
raw_registry_numbers: list[str] = sorted(
|
|
237
237
|
{e.text for e in element.findall("RelatedRegistryNumberList/RegistryNumber") if e.text}
|
|
238
238
|
)
|
|
239
239
|
registry_number = element.findtext("RegistryNumber")
|
|
@@ -266,7 +266,7 @@ def get_concept_record(element: Element) -> Mapping[str, Any]:
|
|
|
266
266
|
if scope_note is not None:
|
|
267
267
|
scope_note = scope_note.replace("\\n", "\n").strip()
|
|
268
268
|
|
|
269
|
-
rv:
|
|
269
|
+
rv: dict[str, Any] = {
|
|
270
270
|
"concept_ui": element.findtext("ConceptUI"),
|
|
271
271
|
"name": element.findtext("ConceptName/String"),
|
|
272
272
|
"terms": get_term_records(element),
|
|
@@ -285,7 +285,7 @@ def get_concept_record(element: Element) -> Mapping[str, Any]:
|
|
|
285
285
|
return rv
|
|
286
286
|
|
|
287
287
|
|
|
288
|
-
def get_term_records(element: Element) ->
|
|
288
|
+
def get_term_records(element: Element) -> list[Mapping[str, Any]]:
|
|
289
289
|
"""Get all of the terms for a concept."""
|
|
290
290
|
return [get_term_record(term) for term in element.findall("TermList/Term")]
|
|
291
291
|
|
|
@@ -306,7 +306,7 @@ def _text_or_bust(element: Element, name: str) -> str:
|
|
|
306
306
|
return n
|
|
307
307
|
|
|
308
308
|
|
|
309
|
-
def _get_descriptor_qualifiers(descriptor: Element) ->
|
|
309
|
+
def _get_descriptor_qualifiers(descriptor: Element) -> list[Mapping[str, str]]:
|
|
310
310
|
return [
|
|
311
311
|
{
|
|
312
312
|
"qualifier_ui": _text_or_bust(qualifier, "QualifierUI"),
|
|
@@ -320,7 +320,7 @@ def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]:
|
|
|
320
320
|
|
|
321
321
|
def get_mesh_category_curies(
|
|
322
322
|
letter: str, *, skip: Optional[Collection[str]] = None, version: Optional[str] = None
|
|
323
|
-
) ->
|
|
323
|
+
) -> list[str]:
|
|
324
324
|
"""Get the MeSH LUIDs for a category, by letter (e.g., "A").
|
|
325
325
|
|
|
326
326
|
:param letter: The MeSH tree, A for anatomy, C for disease, etc.
|
|
@@ -331,9 +331,7 @@ def get_mesh_category_curies(
|
|
|
331
331
|
.. seealso:: https://meshb.nlm.nih.gov/treeView
|
|
332
332
|
"""
|
|
333
333
|
if version is None:
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
version = bioversions.get_version("mesh")
|
|
334
|
+
version = safe_get_version("mesh")
|
|
337
335
|
tree_to_mesh = get_tree_to_mesh_id(version=version)
|
|
338
336
|
rv = []
|
|
339
337
|
for i in range(1, 100):
|
pyobo/sources/mgi.py
CHANGED
pyobo/sources/mirbase.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for miRBase."""
|
|
4
2
|
|
|
5
3
|
import gzip
|
|
6
4
|
import logging
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable, Mapping
|
|
8
6
|
|
|
9
7
|
from tqdm.auto import tqdm
|
|
10
8
|
|
|
@@ -48,7 +46,7 @@ def get_obo(force: bool = False) -> Obo:
|
|
|
48
46
|
return MiRBaseGetter(force=force)
|
|
49
47
|
|
|
50
48
|
|
|
51
|
-
def get_terms(version: str, force: bool = False) ->
|
|
49
|
+
def get_terms(version: str, force: bool = False) -> list[Term]:
|
|
52
50
|
"""Parse miRNA data from filepath and convert it to dictionary."""
|
|
53
51
|
_assert_frozen_version(version)
|
|
54
52
|
url = f"{BASE_URL}/miRNA.dat.gz"
|
|
@@ -77,7 +75,7 @@ def _prepare_organisms(version: str, force: bool = False):
|
|
|
77
75
|
return {division: (taxonomy_id, name) for _, division, name, _tree, taxonomy_id in df.values}
|
|
78
76
|
|
|
79
77
|
|
|
80
|
-
def _prepare_aliases(version: str, force: bool = False) -> Mapping[str,
|
|
78
|
+
def _prepare_aliases(version: str, force: bool = False) -> Mapping[str, list[str]]:
|
|
81
79
|
_assert_frozen_version(version)
|
|
82
80
|
url = f"{BASE_URL}/aliases.txt.gz"
|
|
83
81
|
df = ensure_df(PREFIX, url=url, sep="\t", version=version, force=force)
|
|
@@ -94,7 +92,7 @@ def _process_definitions_lines(
|
|
|
94
92
|
organisms = _prepare_organisms(version, force=force)
|
|
95
93
|
aliases = _prepare_aliases(version, force=force)
|
|
96
94
|
|
|
97
|
-
groups:
|
|
95
|
+
groups: list[list[str]] = []
|
|
98
96
|
|
|
99
97
|
for line in lines: # TODO replace with itertools.groupby
|
|
100
98
|
if line.startswith("ID"):
|
pyobo/sources/mirbase_family.py
CHANGED
pyobo/sources/mirbase_mature.py
CHANGED
pyobo/sources/msigdb.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Parsers for MSig."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
|
-
from
|
|
7
|
-
from
|
|
4
|
+
from collections.abc import Iterable
|
|
5
|
+
from typing import Optional
|
|
8
6
|
|
|
7
|
+
from lxml.etree import ElementTree
|
|
9
8
|
from tqdm.auto import tqdm
|
|
10
9
|
|
|
11
10
|
from ..struct import Obo, Reference, Term, has_participant
|
|
@@ -137,7 +136,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
137
136
|
def _get_definition(attrib) -> Optional[str]:
|
|
138
137
|
rv = attrib["DESCRIPTION_FULL"].strip() or attrib["DESCRIPTION_BRIEF"].strip() or None
|
|
139
138
|
if rv is not None:
|
|
140
|
-
return rv.replace("\d", "").replace("\s", "")
|
|
139
|
+
return rv.replace(r"\d", "").replace(r"\s", "")
|
|
141
140
|
return None
|
|
142
141
|
|
|
143
142
|
|
pyobo/sources/ncbigene.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for Entrez."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
7
5
|
|
|
8
6
|
import bioregistry
|
|
9
7
|
import pandas as pd
|
|
@@ -47,7 +45,7 @@ GENE_INFO_COLUMNS = [
|
|
|
47
45
|
]
|
|
48
46
|
|
|
49
47
|
|
|
50
|
-
def get_ncbigene_ids() ->
|
|
48
|
+
def get_ncbigene_ids() -> set[str]:
|
|
51
49
|
"""Get the Entrez name mapping."""
|
|
52
50
|
df = _get_ncbigene_subset(["GeneID"])
|
|
53
51
|
return set(df["GeneID"])
|
|
@@ -68,7 +66,7 @@ def _get_ncbigene_info_subset(usecols) -> Mapping[str, str]:
|
|
|
68
66
|
return dict(df.values)
|
|
69
67
|
|
|
70
68
|
|
|
71
|
-
def _get_ncbigene_subset(usecols:
|
|
69
|
+
def _get_ncbigene_subset(usecols: list[str]) -> pd.DataFrame:
|
|
72
70
|
df = ensure_df(
|
|
73
71
|
PREFIX,
|
|
74
72
|
url=GENE_INFO_URL,
|
pyobo/sources/npass.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for NPASS."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable
|
|
7
5
|
|
|
8
6
|
import pandas as pd
|
|
9
7
|
from tqdm.auto import tqdm
|
|
@@ -64,7 +62,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
64
62
|
)
|
|
65
63
|
|
|
66
64
|
for xref_prefix, xref_id in [
|
|
67
|
-
("chembl", chembl_id),
|
|
65
|
+
("chembl.compound", chembl_id),
|
|
68
66
|
# ("zinc", zinc_id),
|
|
69
67
|
]:
|
|
70
68
|
if pd.notna(xref_id):
|
pyobo/sources/omim_ps.py
CHANGED
pyobo/sources/pathbank.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for PathBank."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from collections import defaultdict
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable, Mapping
|
|
8
6
|
|
|
9
7
|
import pandas as pd
|
|
10
8
|
from tqdm.auto import tqdm
|
|
@@ -98,7 +96,7 @@ def get_proteins_df(version: str, force: bool = False) -> pd.DataFrame:
|
|
|
98
96
|
return proteins_df
|
|
99
97
|
|
|
100
98
|
|
|
101
|
-
def get_protein_mapping(version: str, force: bool = False) -> Mapping[str,
|
|
99
|
+
def get_protein_mapping(version: str, force: bool = False) -> Mapping[str, set[Reference]]:
|
|
102
100
|
"""Make the protein mapping."""
|
|
103
101
|
proteins_df = get_proteins_df(version=version, force=force)
|
|
104
102
|
smpdb_id_to_proteins = defaultdict(set)
|
|
@@ -122,7 +120,7 @@ def get_metabolite_df(version: str, force: bool = False) -> pd.DataFrame:
|
|
|
122
120
|
)
|
|
123
121
|
|
|
124
122
|
|
|
125
|
-
def get_metabolite_mapping(version: str, force: bool = False) -> Mapping[str,
|
|
123
|
+
def get_metabolite_mapping(version: str, force: bool = False) -> Mapping[str, set[Reference]]:
|
|
126
124
|
"""Make the metabolite mapping."""
|
|
127
125
|
metabolites_df = get_metabolite_df(version=version, force=force)
|
|
128
126
|
smpdb_id_to_metabolites = defaultdict(set)
|
pyobo/sources/pfam.py
CHANGED
pyobo/sources/pfam_clan.py
CHANGED
pyobo/sources/pid.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for NCI PID."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from collections import defaultdict
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable, Mapping
|
|
8
6
|
|
|
9
7
|
import pandas as pd
|
|
10
8
|
|
|
@@ -45,7 +43,7 @@ def get_obo() -> Obo:
|
|
|
45
43
|
return PIDGetter()
|
|
46
44
|
|
|
47
45
|
|
|
48
|
-
def iter_networks(use_tqdm: bool = False, force: bool = False) -> Iterable[
|
|
46
|
+
def iter_networks(use_tqdm: bool = False, force: bool = False) -> Iterable[tuple[str, CX]]:
|
|
49
47
|
"""Iterate over NCI PID networks."""
|
|
50
48
|
yield from ensure_ndex_network_set(
|
|
51
49
|
PREFIX, NDEX_NETWORK_SET_UUID, use_tqdm=use_tqdm, force=force
|
|
@@ -117,7 +115,7 @@ def get_curation_df() -> pd.DataFrame:
|
|
|
117
115
|
return df[["Text from NDEx", "Type", "Namespace", "Identifier"]]
|
|
118
116
|
|
|
119
117
|
|
|
120
|
-
def get_remapping() -> Mapping[str,
|
|
118
|
+
def get_remapping() -> Mapping[str, list[tuple[str, str]]]:
|
|
121
119
|
"""Get a mapping from text to list of HGNC id/symbols."""
|
|
122
120
|
curation_df = get_curation_df()
|
|
123
121
|
rv = defaultdict(list)
|
pyobo/sources/pombase.py
CHANGED
pyobo/sources/pubchem.py
CHANGED
|
@@ -1,16 +1,15 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for PubChem Compound."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
5
|
+
from typing import Optional
|
|
7
6
|
|
|
8
|
-
import bioversions
|
|
9
7
|
import pandas as pd
|
|
10
8
|
from bioregistry.utils import removeprefix
|
|
11
9
|
from tqdm.auto import tqdm
|
|
12
10
|
|
|
13
11
|
from ..api import get_name_id_mapping
|
|
12
|
+
from ..api.utils import get_version
|
|
14
13
|
from ..struct import Obo, Reference, Synonym, Term
|
|
15
14
|
from ..utils.iter import iterate_gzips_together
|
|
16
15
|
from ..utils.path import ensure_df, ensure_path
|
|
@@ -26,7 +25,7 @@ PREFIX = "pubchem.compound"
|
|
|
26
25
|
|
|
27
26
|
def _get_pubchem_extras_url(version: Optional[str], end: str) -> str:
|
|
28
27
|
if version is None:
|
|
29
|
-
version =
|
|
28
|
+
version = get_version("pubchem")
|
|
30
29
|
return f"ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/Monthly/{version}/Extras/{end}"
|
|
31
30
|
|
|
32
31
|
|
|
@@ -100,7 +99,7 @@ def get_pubchem_id_to_mesh_id(version: str) -> Mapping[str, str]:
|
|
|
100
99
|
|
|
101
100
|
def _ensure_cid_name_path(*, version: Optional[str] = None, force: bool = False) -> str:
|
|
102
101
|
if version is None:
|
|
103
|
-
version =
|
|
102
|
+
version = get_version("pubchem")
|
|
104
103
|
# 2 tab-separated columns: compound_id, name
|
|
105
104
|
cid_name_url = _get_pubchem_extras_url(version, "CID-Title.gz")
|
|
106
105
|
cid_name_path = ensure_path(PREFIX, url=cid_name_url, version=version, force=force)
|
pyobo/sources/reactome.py
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for Reactome."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from collections import defaultdict
|
|
5
|
+
from collections.abc import Iterable, Mapping
|
|
7
6
|
from functools import lru_cache
|
|
8
|
-
from typing import Iterable, Mapping, Set
|
|
9
7
|
|
|
10
8
|
import pandas as pd
|
|
11
9
|
from tqdm.auto import tqdm
|
|
@@ -122,7 +120,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
122
120
|
|
|
123
121
|
|
|
124
122
|
@lru_cache(maxsize=1)
|
|
125
|
-
def get_protein_to_pathways() -> Mapping[str,
|
|
123
|
+
def get_protein_to_pathways() -> Mapping[str, set[str]]:
|
|
126
124
|
"""Get a mapping from proteins to the pathways they're in."""
|
|
127
125
|
protein_to_pathways = defaultdict(set)
|
|
128
126
|
x = get_id_multirelations_mapping("reactome", has_participant)
|
pyobo/sources/rgd.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for RGD."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable
|
|
5
|
+
from typing import Optional
|
|
7
6
|
|
|
8
7
|
import pandas as pd
|
|
9
8
|
from tqdm.auto import tqdm
|
|
@@ -28,7 +27,7 @@ old_name_type = SynonymTypeDef.from_text("old_name")
|
|
|
28
27
|
|
|
29
28
|
# NOTE unigene id was discontinue in January 18th, 2021 dump
|
|
30
29
|
|
|
31
|
-
GENES_URL = "https://download.rgd.mcw.edu/data_release/
|
|
30
|
+
GENES_URL = "https://download.rgd.mcw.edu/data_release/GENES_RAT.txt"
|
|
32
31
|
GENES_HEADER = [
|
|
33
32
|
"GENE_RGD_ID",
|
|
34
33
|
"SYMBOL",
|
pyobo/sources/rhea.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for Rhea."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable
|
|
5
|
+
from typing import TYPE_CHECKING, Optional
|
|
7
6
|
|
|
8
|
-
import bioversions
|
|
9
7
|
import pystow
|
|
10
8
|
|
|
9
|
+
from pyobo.api.utils import get_version
|
|
11
10
|
from pyobo.struct import Obo, Reference, Term
|
|
12
11
|
from pyobo.struct.typedef import (
|
|
13
12
|
TypeDef,
|
|
@@ -63,7 +62,7 @@ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdfl
|
|
|
63
62
|
"""Get the Rhea RDF graph."""
|
|
64
63
|
# see docs: https://ftp.expasy.org/databases/rhea/rdf/rhea_rdf_documentation.pdf
|
|
65
64
|
if version is None:
|
|
66
|
-
version =
|
|
65
|
+
version = get_version(PREFIX)
|
|
67
66
|
return pystow.ensure_rdf(
|
|
68
67
|
"pyobo",
|
|
69
68
|
"raw",
|
|
@@ -71,7 +70,7 @@ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdfl
|
|
|
71
70
|
version,
|
|
72
71
|
url=RHEA_RDF_GZ_URL,
|
|
73
72
|
force=force,
|
|
74
|
-
parse_kwargs=
|
|
73
|
+
parse_kwargs={"format": "xml"},
|
|
75
74
|
)
|
|
76
75
|
|
|
77
76
|
|
|
@@ -103,10 +102,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
103
102
|
)
|
|
104
103
|
names = {str(identifier): str(name) for _, identifier, name in result}
|
|
105
104
|
|
|
106
|
-
terms:
|
|
107
|
-
master_to_left:
|
|
108
|
-
master_to_right:
|
|
109
|
-
master_to_bi:
|
|
105
|
+
terms: dict[str, Term] = {}
|
|
106
|
+
master_to_left: dict[str, str] = {}
|
|
107
|
+
master_to_right: dict[str, str] = {}
|
|
108
|
+
master_to_bi: dict[str, str] = {}
|
|
110
109
|
|
|
111
110
|
directions = ensure_df(
|
|
112
111
|
PREFIX,
|