pyobo 0.10.12__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +0 -2
- pyobo/__main__.py +0 -2
- pyobo/api/__init__.py +0 -2
- pyobo/api/alts.py +6 -7
- pyobo/api/hierarchy.py +14 -15
- pyobo/api/metadata.py +3 -4
- pyobo/api/names.py +31 -32
- pyobo/api/properties.py +6 -7
- pyobo/api/relations.py +12 -11
- pyobo/api/species.py +5 -6
- pyobo/api/typedefs.py +1 -3
- pyobo/api/utils.py +61 -5
- pyobo/api/xrefs.py +4 -5
- pyobo/aws.py +3 -5
- pyobo/cli/__init__.py +0 -2
- pyobo/cli/aws.py +0 -2
- pyobo/cli/cli.py +0 -4
- pyobo/cli/database.py +1 -3
- pyobo/cli/lookup.py +0 -2
- pyobo/cli/utils.py +0 -2
- pyobo/constants.py +0 -33
- pyobo/getters.py +19 -26
- pyobo/gilda_utils.py +9 -10
- pyobo/identifier_utils.py +10 -10
- pyobo/mocks.py +5 -6
- pyobo/normalizer.py +24 -24
- pyobo/obographs.py +3 -3
- pyobo/plugins.py +3 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +19 -21
- pyobo/registries/__init__.py +0 -2
- pyobo/registries/metaregistry.py +6 -8
- pyobo/resource_utils.py +1 -3
- pyobo/resources/__init__.py +0 -2
- pyobo/resources/ncbitaxon.py +2 -3
- pyobo/resources/ro.py +2 -4
- pyobo/sources/README.md +15 -0
- pyobo/sources/__init__.py +0 -2
- pyobo/sources/agrovoc.py +3 -3
- pyobo/sources/antibodyregistry.py +2 -3
- pyobo/sources/biogrid.py +4 -4
- pyobo/sources/ccle.py +3 -4
- pyobo/sources/cgnc.py +1 -3
- pyobo/sources/chebi.py +2 -4
- pyobo/sources/chembl.py +1 -3
- pyobo/sources/civic_gene.py +2 -3
- pyobo/sources/complexportal.py +3 -5
- pyobo/sources/conso.py +2 -4
- pyobo/sources/cpt.py +1 -3
- pyobo/sources/credit.py +1 -1
- pyobo/sources/cvx.py +1 -3
- pyobo/sources/depmap.py +3 -4
- pyobo/sources/dictybase_gene.py +1 -3
- pyobo/sources/drugbank.py +6 -7
- pyobo/sources/drugbank_salt.py +3 -4
- pyobo/sources/drugcentral.py +5 -7
- pyobo/sources/expasy.py +11 -12
- pyobo/sources/famplex.py +3 -5
- pyobo/sources/flybase.py +2 -4
- pyobo/sources/geonames.py +1 -1
- pyobo/sources/gmt_utils.py +5 -6
- pyobo/sources/go.py +4 -6
- pyobo/sources/gwascentral_phenotype.py +1 -3
- pyobo/sources/gwascentral_study.py +2 -3
- pyobo/sources/hgnc.py +6 -7
- pyobo/sources/hgncgenefamily.py +2 -4
- pyobo/sources/icd10.py +3 -4
- pyobo/sources/icd11.py +3 -4
- pyobo/sources/icd_utils.py +6 -7
- pyobo/sources/interpro.py +3 -5
- pyobo/sources/itis.py +1 -3
- pyobo/sources/kegg/__init__.py +0 -2
- pyobo/sources/kegg/api.py +3 -4
- pyobo/sources/kegg/genes.py +3 -4
- pyobo/sources/kegg/genome.py +1 -3
- pyobo/sources/kegg/pathway.py +5 -6
- pyobo/sources/mesh.py +19 -21
- pyobo/sources/mgi.py +1 -3
- pyobo/sources/mirbase.py +4 -6
- pyobo/sources/mirbase_constants.py +0 -2
- pyobo/sources/mirbase_family.py +1 -3
- pyobo/sources/mirbase_mature.py +1 -3
- pyobo/sources/msigdb.py +4 -5
- pyobo/sources/ncbigene.py +3 -5
- pyobo/sources/npass.py +1 -3
- pyobo/sources/omim_ps.py +1 -3
- pyobo/sources/pathbank.py +3 -5
- pyobo/sources/pfam.py +1 -3
- pyobo/sources/pfam_clan.py +1 -3
- pyobo/sources/pid.py +3 -5
- pyobo/sources/pombase.py +1 -3
- pyobo/sources/pubchem.py +2 -3
- pyobo/sources/reactome.py +2 -4
- pyobo/sources/rgd.py +2 -3
- pyobo/sources/rhea.py +7 -8
- pyobo/sources/ror.py +3 -2
- pyobo/sources/selventa/__init__.py +0 -2
- pyobo/sources/selventa/schem.py +1 -3
- pyobo/sources/selventa/scomp.py +1 -3
- pyobo/sources/selventa/sdis.py +1 -3
- pyobo/sources/selventa/sfam.py +1 -3
- pyobo/sources/sgd.py +1 -3
- pyobo/sources/slm.py +1 -3
- pyobo/sources/umls/__init__.py +0 -2
- pyobo/sources/umls/__main__.py +0 -2
- pyobo/sources/umls/get_synonym_types.py +1 -1
- pyobo/sources/umls/umls.py +2 -4
- pyobo/sources/uniprot/__init__.py +0 -2
- pyobo/sources/uniprot/uniprot.py +4 -4
- pyobo/sources/uniprot/uniprot_ptm.py +6 -5
- pyobo/sources/utils.py +3 -5
- pyobo/sources/wikipathways.py +1 -3
- pyobo/sources/zfin.py +2 -3
- pyobo/ssg/__init__.py +3 -2
- pyobo/struct/__init__.py +0 -2
- pyobo/struct/reference.py +13 -15
- pyobo/struct/struct.py +102 -96
- pyobo/struct/typedef.py +9 -10
- pyobo/struct/utils.py +0 -2
- pyobo/utils/__init__.py +0 -2
- pyobo/utils/cache.py +14 -6
- pyobo/utils/io.py +9 -10
- pyobo/utils/iter.py +5 -6
- pyobo/utils/misc.py +1 -3
- pyobo/utils/ndex_utils.py +6 -7
- pyobo/utils/path.py +4 -5
- pyobo/version.py +3 -5
- pyobo/xrefdb/__init__.py +0 -2
- pyobo/xrefdb/canonicalizer.py +27 -18
- pyobo/xrefdb/priority.py +0 -2
- pyobo/xrefdb/sources/__init__.py +3 -4
- pyobo/xrefdb/sources/biomappings.py +0 -2
- pyobo/xrefdb/sources/cbms2019.py +0 -2
- pyobo/xrefdb/sources/chembl.py +0 -2
- pyobo/xrefdb/sources/compath.py +1 -3
- pyobo/xrefdb/sources/famplex.py +3 -5
- pyobo/xrefdb/sources/gilda.py +0 -2
- pyobo/xrefdb/sources/intact.py +5 -5
- pyobo/xrefdb/sources/ncit.py +1 -3
- pyobo/xrefdb/sources/pubchem.py +2 -5
- pyobo/xrefdb/sources/wikidata.py +2 -4
- pyobo/xrefdb/xrefs_pipeline.py +15 -16
- {pyobo-0.10.12.dist-info → pyobo-0.11.0.dist-info}/LICENSE +1 -1
- pyobo-0.11.0.dist-info/METADATA +723 -0
- pyobo-0.11.0.dist-info/RECORD +171 -0
- {pyobo-0.10.12.dist-info → pyobo-0.11.0.dist-info}/WHEEL +1 -1
- pyobo-0.11.0.dist-info/entry_points.txt +2 -0
- pyobo-0.10.12.dist-info/METADATA +0 -499
- pyobo-0.10.12.dist-info/RECORD +0 -169
- pyobo-0.10.12.dist-info/entry_points.txt +0 -15
- {pyobo-0.10.12.dist-info → pyobo-0.11.0.dist-info}/top_level.txt +0 -0
pyobo/sources/expasy.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Convert ExPASy to OBO."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from collections import defaultdict
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable, Mapping
|
|
6
|
+
from typing import Any, Optional
|
|
8
7
|
|
|
9
8
|
from .utils import get_go_mapping
|
|
10
9
|
from ..struct import Obo, Reference, Synonym, Term
|
|
@@ -76,7 +75,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
76
75
|
with open(tree_path) as file:
|
|
77
76
|
tree = get_tree(file)
|
|
78
77
|
|
|
79
|
-
terms:
|
|
78
|
+
terms: dict[str, Term] = {}
|
|
80
79
|
child_to_parents = defaultdict(list)
|
|
81
80
|
for ec_code, data in tree.items():
|
|
82
81
|
terms[ec_code] = Term(
|
|
@@ -176,7 +175,7 @@ def normalize_expasy_id(expasy_id: str) -> str:
|
|
|
176
175
|
return expasy_id.replace(" ", "")
|
|
177
176
|
|
|
178
177
|
|
|
179
|
-
def give_edge(unnormalized_ec_code: str) ->
|
|
178
|
+
def give_edge(unnormalized_ec_code: str) -> tuple[int, Optional[str], str]:
|
|
180
179
|
"""Return a (parent, child) tuple for given id."""
|
|
181
180
|
levels = [x for x in unnormalized_ec_code.replace(" ", "").replace("-", "").split(".") if x]
|
|
182
181
|
level = len(levels)
|
|
@@ -227,7 +226,7 @@ def get_database(lines: Iterable[str]) -> Mapping:
|
|
|
227
226
|
for groups in _group_by_id(lines):
|
|
228
227
|
_, expasy_id = groups[0]
|
|
229
228
|
|
|
230
|
-
ec_data_entry:
|
|
229
|
+
ec_data_entry: dict[str, Any] = {
|
|
231
230
|
"concept": {
|
|
232
231
|
"namespace": PREFIX,
|
|
233
232
|
"identifier": expasy_id,
|
|
@@ -269,11 +268,11 @@ def get_database(lines: Iterable[str]) -> Mapping:
|
|
|
269
268
|
continue
|
|
270
269
|
uniprot_id, uniprot_accession = uniprot_entry.split(",")
|
|
271
270
|
ec_data_entry["proteins"].append( # type:ignore
|
|
272
|
-
|
|
273
|
-
namespace
|
|
274
|
-
name
|
|
275
|
-
identifier
|
|
276
|
-
|
|
271
|
+
{
|
|
272
|
+
"namespace": "uniprot",
|
|
273
|
+
"name": uniprot_accession,
|
|
274
|
+
"identifier": uniprot_id,
|
|
275
|
+
}
|
|
277
276
|
)
|
|
278
277
|
|
|
279
278
|
rv[expasy_id] = ec_data_entry
|
|
@@ -300,7 +299,7 @@ def _group_by_id(lines):
|
|
|
300
299
|
return groups
|
|
301
300
|
|
|
302
301
|
|
|
303
|
-
def get_ec2go(version: str) -> Mapping[str,
|
|
302
|
+
def get_ec2go(version: str) -> Mapping[str, set[tuple[str, str]]]:
|
|
304
303
|
"""Get the EC mapping to GO activities."""
|
|
305
304
|
url = "http://current.geneontology.org/ontology/external2go/ec2go"
|
|
306
305
|
path = ensure_path(PREFIX, url=url, name="ec2go.tsv", version=version)
|
pyobo/sources/famplex.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for FamPlex."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from collections import defaultdict
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable, Mapping
|
|
8
6
|
|
|
9
7
|
import bioregistry
|
|
10
8
|
from pystow.utils import get_commit
|
|
@@ -62,7 +60,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
62
60
|
dtype=str,
|
|
63
61
|
force=force,
|
|
64
62
|
)
|
|
65
|
-
id_to_definition: Mapping[str,
|
|
63
|
+
id_to_definition: Mapping[str, tuple[str, str]] = {
|
|
66
64
|
identifier: (definition, provenance)
|
|
67
65
|
for identifier, provenance, definition in definitions_df.values
|
|
68
66
|
}
|
|
@@ -140,7 +138,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
140
138
|
yield term
|
|
141
139
|
|
|
142
140
|
|
|
143
|
-
def _get_xref_df(version: str) -> Mapping[str,
|
|
141
|
+
def _get_xref_df(version: str) -> Mapping[str, list[Reference]]:
|
|
144
142
|
base_url = f"https://raw.githubusercontent.com/sorgerlab/famplex/{version}"
|
|
145
143
|
xrefs_url = f"{base_url}/equivalences.csv"
|
|
146
144
|
xrefs_df = ensure_df(PREFIX, url=xrefs_url, version=version, header=None, sep=",", dtype=str)
|
pyobo/sources/flybase.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for FlyBase Genes."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
7
5
|
|
|
8
6
|
import pandas as pd
|
|
9
7
|
from tqdm.auto import tqdm
|
|
@@ -68,7 +66,7 @@ def _get_definitions(version: str, force: bool = False) -> Mapping[str, str]:
|
|
|
68
66
|
return dict(df.values)
|
|
69
67
|
|
|
70
68
|
|
|
71
|
-
def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str,
|
|
69
|
+
def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, set[str]]:
|
|
72
70
|
url = (
|
|
73
71
|
f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/"
|
|
74
72
|
f"orthologs/dmel_human_orthologs_disease_fb_{version}.tsv.gz"
|
pyobo/sources/geonames.py
CHANGED
pyobo/sources/gmt_utils.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""GMT utilities."""
|
|
4
2
|
|
|
3
|
+
from collections.abc import Iterable
|
|
5
4
|
from pathlib import Path
|
|
6
|
-
from typing import
|
|
5
|
+
from typing import Union
|
|
7
6
|
|
|
8
|
-
GMTSummary =
|
|
9
|
-
WikiPathwaysGMTSummary =
|
|
7
|
+
GMTSummary = tuple[str, str, set[str]]
|
|
8
|
+
WikiPathwaysGMTSummary = tuple[str, str, str, str, str, set[str]]
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
def parse_gmt_file(path: Union[str, Path]) -> Iterable[GMTSummary]:
|
|
@@ -20,7 +19,7 @@ def parse_gmt_file(path: Union[str, Path]) -> Iterable[GMTSummary]:
|
|
|
20
19
|
yield _process_line(line)
|
|
21
20
|
|
|
22
21
|
|
|
23
|
-
def _process_line(line: str) ->
|
|
22
|
+
def _process_line(line: str) -> tuple[str, str, set[str]]:
|
|
24
23
|
"""Return the pathway name, url, and gene sets associated.
|
|
25
24
|
|
|
26
25
|
:param line: gmt file line
|
pyobo/sources/go.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Gene Ontology."""
|
|
4
2
|
|
|
5
3
|
from pyobo import get_descendants
|
|
@@ -14,13 +12,13 @@ __all__ = [
|
|
|
14
12
|
def is_biological_process(identifier: str) -> bool:
|
|
15
13
|
"""Return if the given GO identifier is a biological process.
|
|
16
14
|
|
|
17
|
-
>>> is_biological_process(
|
|
15
|
+
>>> is_biological_process("0006915")
|
|
18
16
|
True
|
|
19
|
-
>>> is_biological_process(
|
|
17
|
+
>>> is_biological_process("GO:0006915")
|
|
20
18
|
True
|
|
21
|
-
>>> is_molecular_function(
|
|
19
|
+
>>> is_molecular_function("0006915")
|
|
22
20
|
False
|
|
23
|
-
>>> is_cellular_component(
|
|
21
|
+
>>> is_cellular_component("0006915")
|
|
24
22
|
False
|
|
25
23
|
"""
|
|
26
24
|
return _is_descendant(identifier, "0008150")
|
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for GWAS Central."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
import tarfile
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from typing import Optional
|
|
8
7
|
from xml.etree import ElementTree
|
|
9
8
|
|
|
10
9
|
from pyobo.struct import Obo, Reference, Term, has_part
|
pyobo/sources/hgnc.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for HGNC."""
|
|
4
2
|
|
|
5
3
|
import itertools as itt
|
|
@@ -7,8 +5,9 @@ import json
|
|
|
7
5
|
import logging
|
|
8
6
|
import typing
|
|
9
7
|
from collections import Counter, defaultdict
|
|
8
|
+
from collections.abc import Iterable
|
|
10
9
|
from operator import attrgetter
|
|
11
|
-
from typing import
|
|
10
|
+
from typing import Optional
|
|
12
11
|
|
|
13
12
|
from tabulate import tabulate
|
|
14
13
|
from tqdm.auto import tqdm
|
|
@@ -238,12 +237,12 @@ def get_obo(*, force: bool = False) -> Obo:
|
|
|
238
237
|
return HGNCGetter(force=force)
|
|
239
238
|
|
|
240
239
|
|
|
241
|
-
def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
|
|
240
|
+
def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
|
|
242
241
|
"""Get HGNC terms."""
|
|
243
242
|
if version is None:
|
|
244
243
|
version = get_version("hgnc")
|
|
245
244
|
unhandled_entry_keys: typing.Counter[str] = Counter()
|
|
246
|
-
unhandle_locus_types:
|
|
245
|
+
unhandle_locus_types: defaultdict[str, dict[str, Term]] = defaultdict(dict)
|
|
247
246
|
path = ensure_path(
|
|
248
247
|
PREFIX,
|
|
249
248
|
url=DEFINITIONS_URL_FMT.format(version=version),
|
|
@@ -459,8 +458,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
|
|
|
459
458
|
headers=["hgnc_id", "name", "obsolete", "link", "provenance"],
|
|
460
459
|
tablefmt="github",
|
|
461
460
|
)
|
|
462
|
-
print(f"## {k} ({len(v)})", file=file)
|
|
463
|
-
print(t, "\n", file=file)
|
|
461
|
+
print(f"## {k} ({len(v)})", file=file)
|
|
462
|
+
print(t, "\n", file=file)
|
|
464
463
|
|
|
465
464
|
unhandle_locus_type_counter = Counter(
|
|
466
465
|
{locus_type: len(d) for locus_type, d in unhandle_locus_types.items()}
|
pyobo/sources/hgncgenefamily.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for HGNC Gene Families."""
|
|
4
2
|
|
|
5
3
|
from collections import defaultdict
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
7
5
|
|
|
8
6
|
import pandas as pd
|
|
9
7
|
|
|
@@ -50,7 +48,7 @@ def get_obo(force: bool = False) -> Obo:
|
|
|
50
48
|
return HGNCGroupGetter(force=force)
|
|
51
49
|
|
|
52
50
|
|
|
53
|
-
def get_hierarchy(force: bool = False) -> Mapping[str,
|
|
51
|
+
def get_hierarchy(force: bool = False) -> Mapping[str, list[str]]:
|
|
54
52
|
"""Get the HGNC Gene Families hierarchy as a dictionary."""
|
|
55
53
|
path = ensure_path(PREFIX, url=HIERARCHY_URL, force=force)
|
|
56
54
|
df = pd.read_csv(path, dtype={"parent_fam_id": str, "child_fam_id": str})
|
pyobo/sources/icd10.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Convert ICD-10 to OBO.
|
|
4
2
|
|
|
5
3
|
Run with python -m pyobo.sources.icd10 -v
|
|
6
4
|
"""
|
|
7
5
|
|
|
8
6
|
import logging
|
|
9
|
-
from
|
|
7
|
+
from collections.abc import Iterable, Mapping
|
|
8
|
+
from typing import Any
|
|
10
9
|
|
|
11
10
|
import click
|
|
12
11
|
from more_click import verbose_option
|
|
@@ -57,7 +56,7 @@ def iter_terms() -> Iterable[Term]:
|
|
|
57
56
|
chapter_urls = res_json["child"]
|
|
58
57
|
tqdm.write(f"there are {len(chapter_urls)} chapters")
|
|
59
58
|
|
|
60
|
-
visited_identifiers:
|
|
59
|
+
visited_identifiers: set[str] = set()
|
|
61
60
|
for identifier in get_child_identifiers(ICD10_TOP_LEVEL_URL, res_json):
|
|
62
61
|
yield from visiter(
|
|
63
62
|
identifier,
|
pyobo/sources/icd11.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Convert ICD11 to OBO.
|
|
4
2
|
|
|
5
3
|
Run with python -m pyobo.sources.icd11 -v
|
|
@@ -8,7 +6,8 @@ Run with python -m pyobo.sources.icd11 -v
|
|
|
8
6
|
import json
|
|
9
7
|
import logging
|
|
10
8
|
import os
|
|
11
|
-
from
|
|
9
|
+
from collections.abc import Iterable, Mapping
|
|
10
|
+
from typing import Any
|
|
12
11
|
|
|
13
12
|
import click
|
|
14
13
|
from more_click import verbose_option
|
|
@@ -67,7 +66,7 @@ def iterate_icd11() -> Iterable[Term]:
|
|
|
67
66
|
|
|
68
67
|
tqdm.write(f'There are {len(res_json["child"])} top level entities')
|
|
69
68
|
|
|
70
|
-
visited_identifiers:
|
|
69
|
+
visited_identifiers: set[str] = set()
|
|
71
70
|
for identifier in get_child_identifiers(ICD11_TOP_LEVEL_URL, res_json):
|
|
72
71
|
yield from visiter(
|
|
73
72
|
identifier,
|
pyobo/sources/icd_utils.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities or interacting with the ICD API.
|
|
4
2
|
|
|
5
3
|
Want to get your own API cliend ID and client secret?
|
|
@@ -11,8 +9,9 @@ Want to get your own API cliend ID and client secret?
|
|
|
11
9
|
import datetime
|
|
12
10
|
import json
|
|
13
11
|
import os
|
|
12
|
+
from collections.abc import Iterable, Mapping
|
|
14
13
|
from pathlib import Path
|
|
15
|
-
from typing import Any, Callable,
|
|
14
|
+
from typing import Any, Callable, Union
|
|
16
15
|
|
|
17
16
|
import pystow
|
|
18
17
|
import requests
|
|
@@ -20,7 +19,7 @@ from cachier import cachier
|
|
|
20
19
|
from pystow.config_api import ConfigError
|
|
21
20
|
from tqdm.auto import tqdm
|
|
22
21
|
|
|
23
|
-
from ..getters import
|
|
22
|
+
from ..getters import NoBuildError
|
|
24
23
|
from ..struct import Term
|
|
25
24
|
|
|
26
25
|
TOKEN_URL = "https://icdaccessmanagement.who.int/connect/token" # noqa:S105
|
|
@@ -43,7 +42,7 @@ def _get_entity(endpoint: str, identifier: str):
|
|
|
43
42
|
return res.json()
|
|
44
43
|
|
|
45
44
|
|
|
46
|
-
def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) ->
|
|
45
|
+
def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) -> list[str]:
|
|
47
46
|
"""Ge the child identifiers."""
|
|
48
47
|
return [url[len(endpoint) :].lstrip("/") for url in res_json.get("child", [])]
|
|
49
48
|
|
|
@@ -55,7 +54,7 @@ def get_icd_api_headers() -> Mapping[str, str]:
|
|
|
55
54
|
icd_client_id = pystow.get_config("pyobo", "icd_client_id", raise_on_missing=True)
|
|
56
55
|
icd_client_secret = pystow.get_config("pyobo", "icd_client_secret", raise_on_missing=True)
|
|
57
56
|
except ConfigError as e:
|
|
58
|
-
raise
|
|
57
|
+
raise NoBuildError from e
|
|
59
58
|
|
|
60
59
|
grant_type = "client_credentials"
|
|
61
60
|
body_params = {"grant_type": grant_type}
|
|
@@ -73,7 +72,7 @@ def get_icd_api_headers() -> Mapping[str, str]:
|
|
|
73
72
|
|
|
74
73
|
def visiter(
|
|
75
74
|
identifier: str,
|
|
76
|
-
visited_identifiers:
|
|
75
|
+
visited_identifiers: set[str],
|
|
77
76
|
directory: Union[str, Path],
|
|
78
77
|
*,
|
|
79
78
|
endpoint: str,
|
pyobo/sources/interpro.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for InterPro."""
|
|
4
2
|
|
|
5
3
|
from collections import defaultdict
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
7
5
|
|
|
8
6
|
from .utils import get_go_mapping
|
|
9
7
|
from ..struct import Obo, Reference, Term
|
|
@@ -82,7 +80,7 @@ def iter_terms(*, version: str, proteins: bool = False, force: bool = False) ->
|
|
|
82
80
|
yield term
|
|
83
81
|
|
|
84
82
|
|
|
85
|
-
def get_interpro_go_df(version: str, force: bool = False) -> Mapping[str,
|
|
83
|
+
def get_interpro_go_df(version: str, force: bool = False) -> Mapping[str, set[tuple[str, str]]]:
|
|
86
84
|
"""Get InterPro to Gene Ontology molecular function mapping."""
|
|
87
85
|
url = f"https://ftp.ebi.ac.uk/pub/databases/interpro/releases/{version}/interpro2go"
|
|
88
86
|
path = ensure_path(PREFIX, url=url, name="interpro2go.tsv", version=version, force=force)
|
|
@@ -98,7 +96,7 @@ def get_interpro_tree(version: str, force: bool = False):
|
|
|
98
96
|
|
|
99
97
|
|
|
100
98
|
def _parse_tree_helper(lines: Iterable[str]):
|
|
101
|
-
rv1:
|
|
99
|
+
rv1: defaultdict[str, list[str]] = defaultdict(list)
|
|
102
100
|
previous_depth, previous_id = 0, ""
|
|
103
101
|
stack = [previous_id]
|
|
104
102
|
|
pyobo/sources/itis.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for the Integrated Taxonomic Information System (ITIS)."""
|
|
4
2
|
|
|
5
3
|
import os
|
|
6
4
|
import shutil
|
|
7
5
|
import sqlite3
|
|
8
6
|
import zipfile
|
|
7
|
+
from collections.abc import Iterable
|
|
9
8
|
from contextlib import closing
|
|
10
|
-
from typing import Iterable
|
|
11
9
|
|
|
12
10
|
from pyobo.struct import Obo, Reference, Term
|
|
13
11
|
from pyobo.utils.io import multidict
|
pyobo/sources/kegg/__init__.py
CHANGED
pyobo/sources/kegg/api.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""API utilities for KEGG."""
|
|
4
2
|
|
|
5
3
|
import urllib.error
|
|
4
|
+
from collections.abc import Mapping
|
|
6
5
|
from dataclasses import dataclass
|
|
7
|
-
from typing import
|
|
6
|
+
from typing import Optional
|
|
8
7
|
|
|
9
8
|
from pyobo import Reference, Term, ensure_path
|
|
10
9
|
from pyobo.struct import from_species
|
|
@@ -132,7 +131,7 @@ def _ensure_conv_genome_helper(
|
|
|
132
131
|
version=version,
|
|
133
132
|
)
|
|
134
133
|
with path_rv.open("w") as file:
|
|
135
|
-
print(file=file)
|
|
134
|
+
print(file=file)
|
|
136
135
|
return path_rv.as_posix()
|
|
137
136
|
except FileNotFoundError:
|
|
138
137
|
return None
|
pyobo/sources/kegg/genes.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Convert KEGG Genes to OBO.
|
|
4
2
|
|
|
5
3
|
Run with ``python -m pyobo.sources.kegg.genes``
|
|
6
4
|
"""
|
|
7
5
|
|
|
8
6
|
import logging
|
|
9
|
-
from
|
|
7
|
+
from collections.abc import Iterable
|
|
8
|
+
from typing import Optional
|
|
10
9
|
|
|
11
10
|
import click
|
|
12
11
|
from more_click import verbose_option
|
|
@@ -90,7 +89,7 @@ def _make_terms(
|
|
|
90
89
|
)
|
|
91
90
|
continue
|
|
92
91
|
if ";" in line:
|
|
93
|
-
*_extras, name =
|
|
92
|
+
*_extras, name = (part.strip() for part in extras.split(";"))
|
|
94
93
|
else:
|
|
95
94
|
name = extras
|
|
96
95
|
|
pyobo/sources/kegg/genome.py
CHANGED
pyobo/sources/kegg/pathway.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Convert KEGG Pathways to OBO.
|
|
4
2
|
|
|
5
3
|
Run with ``python -m pyobo.sources.kegg.pathway``
|
|
@@ -8,8 +6,9 @@ Run with ``python -m pyobo.sources.kegg.pathway``
|
|
|
8
6
|
import logging
|
|
9
7
|
import urllib.error
|
|
10
8
|
from collections import defaultdict
|
|
9
|
+
from collections.abc import Iterable, Mapping
|
|
11
10
|
from functools import partial
|
|
12
|
-
from typing import
|
|
11
|
+
from typing import Union
|
|
13
12
|
|
|
14
13
|
from tqdm.auto import tqdm
|
|
15
14
|
from tqdm.contrib.concurrent import thread_map
|
|
@@ -76,7 +75,7 @@ def iter_terms(version: str, skip_missing: bool = True) -> Iterable[Term]:
|
|
|
76
75
|
)
|
|
77
76
|
|
|
78
77
|
|
|
79
|
-
def _get_link_pathway_map(path: str) -> Mapping[str,
|
|
78
|
+
def _get_link_pathway_map(path: str) -> Mapping[str, list[str]]:
|
|
80
79
|
rv = defaultdict(list)
|
|
81
80
|
with open(path) as file:
|
|
82
81
|
for line in file:
|
|
@@ -110,7 +109,7 @@ def _iter_genome_terms(
|
|
|
110
109
|
list_pathway_lines = [line.strip() for line in file]
|
|
111
110
|
for line in list_pathway_lines:
|
|
112
111
|
line = line.strip()
|
|
113
|
-
pathway_id, name =
|
|
112
|
+
pathway_id, name = (part.strip() for part in line.split("\t"))
|
|
114
113
|
pathway_id = pathway_id[len("path:") :]
|
|
115
114
|
|
|
116
115
|
terms[pathway_id] = term = Term.from_triple(
|
|
@@ -149,7 +148,7 @@ def _iter_genome_terms(
|
|
|
149
148
|
|
|
150
149
|
def iter_kegg_pathway_paths(
|
|
151
150
|
version: str, skip_missing: bool = True
|
|
152
|
-
) -> Iterable[Union[
|
|
151
|
+
) -> Iterable[Union[tuple[KEGGGenome, str, str], tuple[None, None, None]]]:
|
|
153
152
|
"""Get paths for the KEGG Pathway files."""
|
|
154
153
|
genomes = list(iter_kegg_genomes(version=version, desc="KEGG Pathways"))
|
|
155
154
|
func = partial(_process_genome, version=version, skip_missing=skip_missing)
|
pyobo/sources/mesh.py
CHANGED
|
@@ -1,17 +1,16 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Parser for the MeSH descriptors."""
|
|
4
2
|
|
|
5
3
|
import datetime
|
|
6
4
|
import itertools as itt
|
|
7
5
|
import logging
|
|
8
6
|
import re
|
|
9
|
-
from
|
|
7
|
+
from collections.abc import Collection, Iterable, Mapping
|
|
8
|
+
from typing import Any, Optional
|
|
10
9
|
from xml.etree.ElementTree import Element
|
|
11
10
|
|
|
12
11
|
from tqdm.auto import tqdm
|
|
13
12
|
|
|
14
|
-
from pyobo.api.utils import
|
|
13
|
+
from pyobo.api.utils import safe_get_version
|
|
15
14
|
from pyobo.identifier_utils import standardize_ec
|
|
16
15
|
from pyobo.struct import Obo, Reference, Synonym, Term
|
|
17
16
|
from pyobo.utils.cache import cached_json, cached_mapping
|
|
@@ -70,7 +69,7 @@ def get_tree_to_mesh_id(version: str) -> Mapping[str, str]:
|
|
|
70
69
|
|
|
71
70
|
def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
72
71
|
"""Get MeSH OBO terms."""
|
|
73
|
-
mesh_id_to_term:
|
|
72
|
+
mesh_id_to_term: dict[str, Term] = {}
|
|
74
73
|
|
|
75
74
|
descriptors = ensure_mesh_descriptors(version=version, force=force)
|
|
76
75
|
supplemental_records = ensure_mesh_supplemental_records(version=version, force=force)
|
|
@@ -80,8 +79,8 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
80
79
|
name = entry["name"]
|
|
81
80
|
definition = entry.get("scope_note")
|
|
82
81
|
|
|
83
|
-
xrefs:
|
|
84
|
-
synonyms:
|
|
82
|
+
xrefs: list[Reference] = []
|
|
83
|
+
synonyms: set[str] = set()
|
|
85
84
|
for concept in entry["concepts"]:
|
|
86
85
|
synonyms.add(concept["name"])
|
|
87
86
|
for term in concept["terms"]:
|
|
@@ -107,7 +106,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
107
106
|
|
|
108
107
|
def ensure_mesh_descriptors(
|
|
109
108
|
version: str, force: bool = False, force_process: bool = False
|
|
110
|
-
) ->
|
|
109
|
+
) -> list[Mapping[str, Any]]:
|
|
111
110
|
"""Get the parsed MeSH dictionary, and cache it if it wasn't already."""
|
|
112
111
|
|
|
113
112
|
@cached_json(path=prefix_directory_join(PREFIX, name="desc.json", version=version), force=force)
|
|
@@ -133,7 +132,7 @@ def get_supplemental_url(version: str) -> str:
|
|
|
133
132
|
return f"https://nlmpubs.nlm.nih.gov/projects/mesh/{version}/xmlmesh/supp{version}.gz"
|
|
134
133
|
|
|
135
134
|
|
|
136
|
-
def ensure_mesh_supplemental_records(version: str, force: bool = False) ->
|
|
135
|
+
def ensure_mesh_supplemental_records(version: str, force: bool = False) -> list[Mapping[str, Any]]:
|
|
137
136
|
"""Get the parsed MeSH dictionary, and cache it if it wasn't already."""
|
|
138
137
|
|
|
139
138
|
@cached_json(path=prefix_directory_join(PREFIX, name="supp.json", version=version), force=force)
|
|
@@ -147,11 +146,11 @@ def ensure_mesh_supplemental_records(version: str, force: bool = False) -> List[
|
|
|
147
146
|
return _inner()
|
|
148
147
|
|
|
149
148
|
|
|
150
|
-
def get_descriptor_records(element: Element, id_key: str, name_key) ->
|
|
149
|
+
def get_descriptor_records(element: Element, id_key: str, name_key) -> list[dict[str, Any]]:
|
|
151
150
|
"""Get MeSH descriptor records."""
|
|
152
151
|
logger.info("extract MeSH descriptors, concepts, and terms")
|
|
153
152
|
|
|
154
|
-
rv:
|
|
153
|
+
rv: list[dict[str, Any]] = [
|
|
155
154
|
get_descriptor_record(descriptor, id_key=id_key, name_key=name_key)
|
|
156
155
|
for descriptor in tqdm(element, desc="Getting MeSH Descriptors", unit_scale=True)
|
|
157
156
|
]
|
|
@@ -204,7 +203,7 @@ def get_descriptor_record(
|
|
|
204
203
|
element: Element,
|
|
205
204
|
id_key: str,
|
|
206
205
|
name_key: str,
|
|
207
|
-
) ->
|
|
206
|
+
) -> dict[str, Any]:
|
|
208
207
|
"""Get descriptor records from the main element.
|
|
209
208
|
|
|
210
209
|
:param element: An XML element
|
|
@@ -228,13 +227,13 @@ def get_descriptor_record(
|
|
|
228
227
|
return rv
|
|
229
228
|
|
|
230
229
|
|
|
231
|
-
def get_concept_records(element: Element) ->
|
|
230
|
+
def get_concept_records(element: Element) -> list[Mapping[str, Any]]:
|
|
232
231
|
"""Get concepts from a record."""
|
|
233
232
|
return [get_concept_record(e) for e in element.findall("ConceptList/Concept")]
|
|
234
233
|
|
|
235
234
|
|
|
236
|
-
def _get_xrefs(element: Element) ->
|
|
237
|
-
raw_registry_numbers:
|
|
235
|
+
def _get_xrefs(element: Element) -> list[tuple[str, str]]:
|
|
236
|
+
raw_registry_numbers: list[str] = sorted(
|
|
238
237
|
{e.text for e in element.findall("RelatedRegistryNumberList/RegistryNumber") if e.text}
|
|
239
238
|
)
|
|
240
239
|
registry_number = element.findtext("RegistryNumber")
|
|
@@ -267,7 +266,7 @@ def get_concept_record(element: Element) -> Mapping[str, Any]:
|
|
|
267
266
|
if scope_note is not None:
|
|
268
267
|
scope_note = scope_note.replace("\\n", "\n").strip()
|
|
269
268
|
|
|
270
|
-
rv:
|
|
269
|
+
rv: dict[str, Any] = {
|
|
271
270
|
"concept_ui": element.findtext("ConceptUI"),
|
|
272
271
|
"name": element.findtext("ConceptName/String"),
|
|
273
272
|
"terms": get_term_records(element),
|
|
@@ -286,7 +285,7 @@ def get_concept_record(element: Element) -> Mapping[str, Any]:
|
|
|
286
285
|
return rv
|
|
287
286
|
|
|
288
287
|
|
|
289
|
-
def get_term_records(element: Element) ->
|
|
288
|
+
def get_term_records(element: Element) -> list[Mapping[str, Any]]:
|
|
290
289
|
"""Get all of the terms for a concept."""
|
|
291
290
|
return [get_term_record(term) for term in element.findall("TermList/Term")]
|
|
292
291
|
|
|
@@ -307,7 +306,7 @@ def _text_or_bust(element: Element, name: str) -> str:
|
|
|
307
306
|
return n
|
|
308
307
|
|
|
309
308
|
|
|
310
|
-
def _get_descriptor_qualifiers(descriptor: Element) ->
|
|
309
|
+
def _get_descriptor_qualifiers(descriptor: Element) -> list[Mapping[str, str]]:
|
|
311
310
|
return [
|
|
312
311
|
{
|
|
313
312
|
"qualifier_ui": _text_or_bust(qualifier, "QualifierUI"),
|
|
@@ -321,7 +320,7 @@ def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]:
|
|
|
321
320
|
|
|
322
321
|
def get_mesh_category_curies(
|
|
323
322
|
letter: str, *, skip: Optional[Collection[str]] = None, version: Optional[str] = None
|
|
324
|
-
) ->
|
|
323
|
+
) -> list[str]:
|
|
325
324
|
"""Get the MeSH LUIDs for a category, by letter (e.g., "A").
|
|
326
325
|
|
|
327
326
|
:param letter: The MeSH tree, A for anatomy, C for disease, etc.
|
|
@@ -332,8 +331,7 @@ def get_mesh_category_curies(
|
|
|
332
331
|
.. seealso:: https://meshb.nlm.nih.gov/treeView
|
|
333
332
|
"""
|
|
334
333
|
if version is None:
|
|
335
|
-
version =
|
|
336
|
-
assert version is not None
|
|
334
|
+
version = safe_get_version("mesh")
|
|
337
335
|
tree_to_mesh = get_tree_to_mesh_id(version=version)
|
|
338
336
|
rv = []
|
|
339
337
|
for i in range(1, 100):
|