pyobo 0.10.11__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +0 -2
- pyobo/__main__.py +0 -2
- pyobo/api/__init__.py +0 -2
- pyobo/api/alts.py +6 -7
- pyobo/api/hierarchy.py +14 -15
- pyobo/api/metadata.py +3 -4
- pyobo/api/names.py +51 -31
- pyobo/api/properties.py +6 -7
- pyobo/api/relations.py +12 -11
- pyobo/api/species.py +5 -6
- pyobo/api/typedefs.py +1 -3
- pyobo/api/utils.py +63 -2
- pyobo/api/xrefs.py +4 -5
- pyobo/aws.py +3 -5
- pyobo/cli/__init__.py +0 -2
- pyobo/cli/aws.py +0 -2
- pyobo/cli/cli.py +0 -4
- pyobo/cli/database.py +1 -3
- pyobo/cli/lookup.py +2 -4
- pyobo/cli/utils.py +0 -2
- pyobo/constants.py +0 -3
- pyobo/getters.py +19 -26
- pyobo/gilda_utils.py +28 -8
- pyobo/identifier_utils.py +32 -15
- pyobo/mocks.py +5 -6
- pyobo/normalizer.py +24 -24
- pyobo/obographs.py +3 -3
- pyobo/plugins.py +3 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +19 -21
- pyobo/registries/__init__.py +0 -2
- pyobo/registries/metaregistry.py +6 -8
- pyobo/resource_utils.py +1 -3
- pyobo/resources/__init__.py +0 -2
- pyobo/resources/ncbitaxon.py +2 -3
- pyobo/resources/ro.py +2 -4
- pyobo/sources/README.md +15 -0
- pyobo/sources/__init__.py +2 -2
- pyobo/sources/agrovoc.py +3 -3
- pyobo/sources/antibodyregistry.py +4 -5
- pyobo/sources/biogrid.py +7 -7
- pyobo/sources/ccle.py +3 -4
- pyobo/sources/cgnc.py +1 -3
- pyobo/sources/chebi.py +2 -4
- pyobo/sources/chembl.py +1 -3
- pyobo/sources/civic_gene.py +2 -3
- pyobo/sources/complexportal.py +3 -5
- pyobo/sources/conso.py +2 -4
- pyobo/sources/cpt.py +1 -3
- pyobo/sources/credit.py +68 -0
- pyobo/sources/cvx.py +1 -3
- pyobo/sources/depmap.py +3 -4
- pyobo/sources/dictybase_gene.py +1 -3
- pyobo/sources/drugbank.py +6 -7
- pyobo/sources/drugbank_salt.py +3 -4
- pyobo/sources/drugcentral.py +5 -7
- pyobo/sources/expasy.py +11 -12
- pyobo/sources/famplex.py +3 -5
- pyobo/sources/flybase.py +2 -4
- pyobo/sources/geonames.py +28 -10
- pyobo/sources/gmt_utils.py +5 -6
- pyobo/sources/go.py +4 -6
- pyobo/sources/gwascentral_phenotype.py +1 -3
- pyobo/sources/gwascentral_study.py +2 -3
- pyobo/sources/hgnc.py +8 -9
- pyobo/sources/hgncgenefamily.py +2 -4
- pyobo/sources/icd10.py +3 -4
- pyobo/sources/icd11.py +3 -4
- pyobo/sources/icd_utils.py +6 -7
- pyobo/sources/interpro.py +3 -5
- pyobo/sources/itis.py +1 -3
- pyobo/sources/kegg/__init__.py +0 -2
- pyobo/sources/kegg/api.py +3 -4
- pyobo/sources/kegg/genes.py +3 -4
- pyobo/sources/kegg/genome.py +1 -3
- pyobo/sources/kegg/pathway.py +5 -6
- pyobo/sources/mesh.py +19 -21
- pyobo/sources/mgi.py +1 -3
- pyobo/sources/mirbase.py +4 -6
- pyobo/sources/mirbase_constants.py +0 -2
- pyobo/sources/mirbase_family.py +1 -3
- pyobo/sources/mirbase_mature.py +1 -3
- pyobo/sources/msigdb.py +4 -5
- pyobo/sources/ncbigene.py +3 -5
- pyobo/sources/npass.py +2 -4
- pyobo/sources/omim_ps.py +1 -3
- pyobo/sources/pathbank.py +3 -5
- pyobo/sources/pfam.py +1 -3
- pyobo/sources/pfam_clan.py +1 -3
- pyobo/sources/pid.py +3 -5
- pyobo/sources/pombase.py +1 -3
- pyobo/sources/pubchem.py +5 -6
- pyobo/sources/reactome.py +2 -4
- pyobo/sources/rgd.py +3 -4
- pyobo/sources/rhea.py +9 -10
- pyobo/sources/ror.py +69 -22
- pyobo/sources/selventa/__init__.py +0 -2
- pyobo/sources/selventa/schem.py +1 -3
- pyobo/sources/selventa/scomp.py +1 -3
- pyobo/sources/selventa/sdis.py +1 -3
- pyobo/sources/selventa/sfam.py +1 -3
- pyobo/sources/sgd.py +1 -3
- pyobo/sources/slm.py +1 -3
- pyobo/sources/umls/__init__.py +0 -2
- pyobo/sources/umls/__main__.py +0 -2
- pyobo/sources/umls/get_synonym_types.py +1 -1
- pyobo/sources/umls/umls.py +2 -4
- pyobo/sources/uniprot/__init__.py +0 -2
- pyobo/sources/uniprot/uniprot.py +6 -6
- pyobo/sources/uniprot/uniprot_ptm.py +6 -5
- pyobo/sources/utils.py +3 -5
- pyobo/sources/wikipathways.py +1 -3
- pyobo/sources/zfin.py +2 -3
- pyobo/ssg/__init__.py +3 -2
- pyobo/struct/__init__.py +0 -2
- pyobo/struct/reference.py +13 -15
- pyobo/struct/struct.py +106 -99
- pyobo/struct/typedef.py +19 -10
- pyobo/struct/utils.py +0 -2
- pyobo/utils/__init__.py +0 -2
- pyobo/utils/cache.py +14 -6
- pyobo/utils/io.py +9 -10
- pyobo/utils/iter.py +5 -6
- pyobo/utils/misc.py +1 -3
- pyobo/utils/ndex_utils.py +6 -7
- pyobo/utils/path.py +5 -5
- pyobo/version.py +3 -5
- pyobo/xrefdb/__init__.py +0 -2
- pyobo/xrefdb/canonicalizer.py +27 -18
- pyobo/xrefdb/priority.py +0 -2
- pyobo/xrefdb/sources/__init__.py +9 -7
- pyobo/xrefdb/sources/biomappings.py +0 -2
- pyobo/xrefdb/sources/cbms2019.py +0 -2
- pyobo/xrefdb/sources/chembl.py +5 -7
- pyobo/xrefdb/sources/compath.py +1 -3
- pyobo/xrefdb/sources/famplex.py +3 -5
- pyobo/xrefdb/sources/gilda.py +0 -2
- pyobo/xrefdb/sources/intact.py +5 -5
- pyobo/xrefdb/sources/ncit.py +1 -3
- pyobo/xrefdb/sources/pubchem.py +2 -4
- pyobo/xrefdb/sources/wikidata.py +10 -5
- pyobo/xrefdb/xrefs_pipeline.py +15 -16
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/LICENSE +1 -1
- pyobo-0.11.0.dist-info/METADATA +723 -0
- pyobo-0.11.0.dist-info/RECORD +171 -0
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/WHEEL +1 -1
- pyobo-0.11.0.dist-info/entry_points.txt +2 -0
- pyobo/xrefdb/bengo.py +0 -44
- pyobo-0.10.11.dist-info/METADATA +0 -499
- pyobo-0.10.11.dist-info/RECORD +0 -169
- pyobo-0.10.11.dist-info/entry_points.txt +0 -15
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/top_level.txt +0 -0
pyobo/sources/dictybase_gene.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for dictyBase gene.
|
|
4
2
|
|
|
5
3
|
Note that normal dictybase idenififers are for sequences
|
|
6
4
|
"""
|
|
7
5
|
|
|
8
6
|
import logging
|
|
9
|
-
from
|
|
7
|
+
from collections.abc import Iterable
|
|
10
8
|
|
|
11
9
|
import pandas as pd
|
|
12
10
|
from tqdm.auto import tqdm
|
pyobo/sources/drugbank.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Convert DrugBank to OBO.
|
|
4
2
|
|
|
5
3
|
Run with ``python -m pyobo.sources.drugbank``
|
|
@@ -8,14 +6,15 @@ Run with ``python -m pyobo.sources.drugbank``
|
|
|
8
6
|
import datetime
|
|
9
7
|
import itertools as itt
|
|
10
8
|
import logging
|
|
9
|
+
from collections.abc import Iterable, Mapping
|
|
11
10
|
from functools import lru_cache
|
|
12
|
-
from typing import Any,
|
|
11
|
+
from typing import Any, Optional
|
|
13
12
|
from xml.etree import ElementTree
|
|
14
13
|
|
|
15
14
|
import pystow
|
|
16
15
|
from tqdm.auto import tqdm
|
|
17
16
|
|
|
18
|
-
from ..getters import
|
|
17
|
+
from ..getters import NoBuildError
|
|
19
18
|
from ..struct import Obo, Reference, Term
|
|
20
19
|
from ..struct.typedef import has_inchi, has_salt, has_smiles
|
|
21
20
|
from ..utils.cache import cached_pickle
|
|
@@ -139,7 +138,7 @@ def _make_term(drug_info: Mapping[str, Any]) -> Term:
|
|
|
139
138
|
return term
|
|
140
139
|
|
|
141
140
|
|
|
142
|
-
@lru_cache
|
|
141
|
+
@lru_cache
|
|
143
142
|
def get_xml_root(version: Optional[str] = None) -> ElementTree.Element:
|
|
144
143
|
"""Get the DrugBank XML parser root.
|
|
145
144
|
|
|
@@ -152,7 +151,7 @@ def get_xml_root(version: Optional[str] = None) -> ElementTree.Element:
|
|
|
152
151
|
username = pystow.get_config("pyobo", "drugbank_username", raise_on_missing=True)
|
|
153
152
|
password = pystow.get_config("pyobo", "drugbank_password", raise_on_missing=True)
|
|
154
153
|
except ConfigError as e:
|
|
155
|
-
raise
|
|
154
|
+
raise NoBuildError from e
|
|
156
155
|
|
|
157
156
|
element = parse_drugbank(version=version, username=username, password=password)
|
|
158
157
|
return element.getroot()
|
|
@@ -167,7 +166,7 @@ smiles_template = f"{ns}calculated-properties/{ns}property[{ns}kind='SMILES']/{n
|
|
|
167
166
|
def _extract_drug_info(drug_xml: ElementTree.Element) -> Mapping[str, Any]:
|
|
168
167
|
"""Extract information from an XML element representing a drug."""
|
|
169
168
|
# assert drug_xml.tag == f'{ns}drug'
|
|
170
|
-
row:
|
|
169
|
+
row: dict[str, Any] = {
|
|
171
170
|
"type": drug_xml.get("type"),
|
|
172
171
|
"drugbank_id": drug_xml.findtext(f"{ns}drugbank-id[@primary='true']"),
|
|
173
172
|
"cas": drug_xml.findtext(f"{ns}cas-number"),
|
pyobo/sources/drugbank_salt.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Convert DrugBank Salts to OBO.
|
|
4
2
|
|
|
5
3
|
Run with ``python -m pyobo.sources.drugbank_salt``
|
|
@@ -10,11 +8,12 @@ Get relations between drugbank salts and drugbank parents with
|
|
|
10
8
|
.. code-block:: python
|
|
11
9
|
|
|
12
10
|
import pyobo
|
|
13
|
-
|
|
11
|
+
|
|
12
|
+
df = pyobo.get_filtered_relations_df("drugbank", "obo:has_salt")
|
|
14
13
|
"""
|
|
15
14
|
|
|
16
15
|
import logging
|
|
17
|
-
from
|
|
16
|
+
from collections.abc import Iterable
|
|
18
17
|
|
|
19
18
|
from .drugbank import iterate_drug_info
|
|
20
19
|
from ..struct import Obo, Reference, Term
|
pyobo/sources/drugcentral.py
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Get DrugCentral as OBO."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from collections import defaultdict
|
|
5
|
+
from collections.abc import Iterable
|
|
7
6
|
from contextlib import closing
|
|
8
|
-
from typing import DefaultDict, Iterable, List
|
|
9
7
|
|
|
10
8
|
import bioregistry
|
|
11
9
|
import psycopg2
|
|
@@ -25,9 +23,9 @@ PREFIX = "drugcentral"
|
|
|
25
23
|
HOST = "unmtid-dbs.net"
|
|
26
24
|
PORT = 5433
|
|
27
25
|
USER = "drugman"
|
|
28
|
-
PASSWORD = "dosage"
|
|
26
|
+
PASSWORD = "dosage" # noqa:S105
|
|
29
27
|
DBNAME = "drugcentral"
|
|
30
|
-
PARAMS =
|
|
28
|
+
PARAMS = {"dbname": DBNAME, "user": USER, "password": PASSWORD, "host": HOST, "port": PORT}
|
|
31
29
|
|
|
32
30
|
|
|
33
31
|
class DrugCentralGetter(Obo):
|
|
@@ -58,7 +56,7 @@ def iter_terms() -> Iterable[Term]:
|
|
|
58
56
|
with closing(conn.cursor()) as cur:
|
|
59
57
|
cur.execute("SELECT struct_id, id_type, identifier FROM public.identifier")
|
|
60
58
|
rows = cur.fetchall()
|
|
61
|
-
xrefs:
|
|
59
|
+
xrefs: defaultdict[str, list[Reference]] = defaultdict(list)
|
|
62
60
|
for drugcentral_id, prefix, identifier in tqdm(
|
|
63
61
|
rows, unit_scale=True, desc="loading xrefs"
|
|
64
62
|
):
|
|
@@ -76,7 +74,7 @@ def iter_terms() -> Iterable[Term]:
|
|
|
76
74
|
)
|
|
77
75
|
with closing(conn.cursor()) as cur:
|
|
78
76
|
cur.execute("SELECT id, name FROM public.synonyms")
|
|
79
|
-
synonyms:
|
|
77
|
+
synonyms: defaultdict[str, list[Synonym]] = defaultdict(list)
|
|
80
78
|
for drugcentral_id, synonym in cur.fetchall():
|
|
81
79
|
synonyms[str(drugcentral_id)].append(Synonym(name=synonym))
|
|
82
80
|
|
pyobo/sources/expasy.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Convert ExPASy to OBO."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from collections import defaultdict
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable, Mapping
|
|
6
|
+
from typing import Any, Optional
|
|
8
7
|
|
|
9
8
|
from .utils import get_go_mapping
|
|
10
9
|
from ..struct import Obo, Reference, Synonym, Term
|
|
@@ -76,7 +75,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
76
75
|
with open(tree_path) as file:
|
|
77
76
|
tree = get_tree(file)
|
|
78
77
|
|
|
79
|
-
terms:
|
|
78
|
+
terms: dict[str, Term] = {}
|
|
80
79
|
child_to_parents = defaultdict(list)
|
|
81
80
|
for ec_code, data in tree.items():
|
|
82
81
|
terms[ec_code] = Term(
|
|
@@ -176,7 +175,7 @@ def normalize_expasy_id(expasy_id: str) -> str:
|
|
|
176
175
|
return expasy_id.replace(" ", "")
|
|
177
176
|
|
|
178
177
|
|
|
179
|
-
def give_edge(unnormalized_ec_code: str) ->
|
|
178
|
+
def give_edge(unnormalized_ec_code: str) -> tuple[int, Optional[str], str]:
|
|
180
179
|
"""Return a (parent, child) tuple for given id."""
|
|
181
180
|
levels = [x for x in unnormalized_ec_code.replace(" ", "").replace("-", "").split(".") if x]
|
|
182
181
|
level = len(levels)
|
|
@@ -227,7 +226,7 @@ def get_database(lines: Iterable[str]) -> Mapping:
|
|
|
227
226
|
for groups in _group_by_id(lines):
|
|
228
227
|
_, expasy_id = groups[0]
|
|
229
228
|
|
|
230
|
-
ec_data_entry:
|
|
229
|
+
ec_data_entry: dict[str, Any] = {
|
|
231
230
|
"concept": {
|
|
232
231
|
"namespace": PREFIX,
|
|
233
232
|
"identifier": expasy_id,
|
|
@@ -269,11 +268,11 @@ def get_database(lines: Iterable[str]) -> Mapping:
|
|
|
269
268
|
continue
|
|
270
269
|
uniprot_id, uniprot_accession = uniprot_entry.split(",")
|
|
271
270
|
ec_data_entry["proteins"].append( # type:ignore
|
|
272
|
-
|
|
273
|
-
namespace
|
|
274
|
-
name
|
|
275
|
-
identifier
|
|
276
|
-
|
|
271
|
+
{
|
|
272
|
+
"namespace": "uniprot",
|
|
273
|
+
"name": uniprot_accession,
|
|
274
|
+
"identifier": uniprot_id,
|
|
275
|
+
}
|
|
277
276
|
)
|
|
278
277
|
|
|
279
278
|
rv[expasy_id] = ec_data_entry
|
|
@@ -300,7 +299,7 @@ def _group_by_id(lines):
|
|
|
300
299
|
return groups
|
|
301
300
|
|
|
302
301
|
|
|
303
|
-
def get_ec2go(version: str) -> Mapping[str,
|
|
302
|
+
def get_ec2go(version: str) -> Mapping[str, set[tuple[str, str]]]:
|
|
304
303
|
"""Get the EC mapping to GO activities."""
|
|
305
304
|
url = "http://current.geneontology.org/ontology/external2go/ec2go"
|
|
306
305
|
path = ensure_path(PREFIX, url=url, name="ec2go.tsv", version=version)
|
pyobo/sources/famplex.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for FamPlex."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from collections import defaultdict
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable, Mapping
|
|
8
6
|
|
|
9
7
|
import bioregistry
|
|
10
8
|
from pystow.utils import get_commit
|
|
@@ -62,7 +60,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
62
60
|
dtype=str,
|
|
63
61
|
force=force,
|
|
64
62
|
)
|
|
65
|
-
id_to_definition: Mapping[str,
|
|
63
|
+
id_to_definition: Mapping[str, tuple[str, str]] = {
|
|
66
64
|
identifier: (definition, provenance)
|
|
67
65
|
for identifier, provenance, definition in definitions_df.values
|
|
68
66
|
}
|
|
@@ -140,7 +138,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
140
138
|
yield term
|
|
141
139
|
|
|
142
140
|
|
|
143
|
-
def _get_xref_df(version: str) -> Mapping[str,
|
|
141
|
+
def _get_xref_df(version: str) -> Mapping[str, list[Reference]]:
|
|
144
142
|
base_url = f"https://raw.githubusercontent.com/sorgerlab/famplex/{version}"
|
|
145
143
|
xrefs_url = f"{base_url}/equivalences.csv"
|
|
146
144
|
xrefs_df = ensure_df(PREFIX, url=xrefs_url, version=version, header=None, sep=",", dtype=str)
|
pyobo/sources/flybase.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for FlyBase Genes."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
7
5
|
|
|
8
6
|
import pandas as pd
|
|
9
7
|
from tqdm.auto import tqdm
|
|
@@ -68,7 +66,7 @@ def _get_definitions(version: str, force: bool = False) -> Mapping[str, str]:
|
|
|
68
66
|
return dict(df.values)
|
|
69
67
|
|
|
70
68
|
|
|
71
|
-
def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str,
|
|
69
|
+
def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, set[str]]:
|
|
72
70
|
url = (
|
|
73
71
|
f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/"
|
|
74
72
|
f"orthologs/dmel_human_orthologs_disease_fb_{version}.tsv.gz"
|
pyobo/sources/geonames.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"""Get terms from geonames."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import logging
|
|
4
|
-
from
|
|
6
|
+
from collections.abc import Collection, Iterable, Mapping
|
|
5
7
|
|
|
6
8
|
import pandas as pd
|
|
7
9
|
from pystow.utils import read_zipfile_csv
|
|
@@ -146,15 +148,7 @@ def get_code_to_admin2(
|
|
|
146
148
|
return code_to_admin2
|
|
147
149
|
|
|
148
150
|
|
|
149
|
-
def
|
|
150
|
-
code_to_country,
|
|
151
|
-
code_to_admin1,
|
|
152
|
-
code_to_admin2,
|
|
153
|
-
*,
|
|
154
|
-
minimum_population: int = 100_000,
|
|
155
|
-
force: bool = False,
|
|
156
|
-
) -> Mapping[str, Term]:
|
|
157
|
-
"""Get a mapping from city code to term."""
|
|
151
|
+
def _get_cities_df(force: bool = False) -> pd.DataFrame:
|
|
158
152
|
columns = [
|
|
159
153
|
"geonames_id",
|
|
160
154
|
"name",
|
|
@@ -184,7 +178,19 @@ def get_cities(
|
|
|
184
178
|
names=columns,
|
|
185
179
|
dtype=str,
|
|
186
180
|
)
|
|
181
|
+
return cities_df
|
|
182
|
+
|
|
187
183
|
|
|
184
|
+
def get_cities(
|
|
185
|
+
code_to_country,
|
|
186
|
+
code_to_admin1,
|
|
187
|
+
code_to_admin2,
|
|
188
|
+
*,
|
|
189
|
+
minimum_population: int = 100_000,
|
|
190
|
+
force: bool = False,
|
|
191
|
+
) -> Mapping[str, Term]:
|
|
192
|
+
"""Get a mapping from city code to term."""
|
|
193
|
+
cities_df = _get_cities_df(force=force)
|
|
188
194
|
cities_df = cities_df[cities_df.population.astype(int) > minimum_population]
|
|
189
195
|
cities_df.synonyms = cities_df.synonyms.str.split(",")
|
|
190
196
|
|
|
@@ -235,5 +241,17 @@ def get_cities(
|
|
|
235
241
|
return terms
|
|
236
242
|
|
|
237
243
|
|
|
244
|
+
def get_city_to_country() -> dict[str, str]:
|
|
245
|
+
"""Get a mapping from city GeoNames to country GeoNames id."""
|
|
246
|
+
rv = {}
|
|
247
|
+
code_to_country = get_code_to_country()
|
|
248
|
+
cities_df = _get_cities_df()
|
|
249
|
+
for city_geonames_id, country_code in cities_df[["geonames_id", "country_code"]].values:
|
|
250
|
+
if pd.isna(city_geonames_id) or pd.isna(country_code):
|
|
251
|
+
continue
|
|
252
|
+
rv[city_geonames_id] = code_to_country[country_code].identifier
|
|
253
|
+
return rv
|
|
254
|
+
|
|
255
|
+
|
|
238
256
|
if __name__ == "__main__":
|
|
239
257
|
GeonamesGetter().write_default(write_obo=True, force=True)
|
pyobo/sources/gmt_utils.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""GMT utilities."""
|
|
4
2
|
|
|
3
|
+
from collections.abc import Iterable
|
|
5
4
|
from pathlib import Path
|
|
6
|
-
from typing import
|
|
5
|
+
from typing import Union
|
|
7
6
|
|
|
8
|
-
GMTSummary =
|
|
9
|
-
WikiPathwaysGMTSummary =
|
|
7
|
+
GMTSummary = tuple[str, str, set[str]]
|
|
8
|
+
WikiPathwaysGMTSummary = tuple[str, str, str, str, str, set[str]]
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
def parse_gmt_file(path: Union[str, Path]) -> Iterable[GMTSummary]:
|
|
@@ -20,7 +19,7 @@ def parse_gmt_file(path: Union[str, Path]) -> Iterable[GMTSummary]:
|
|
|
20
19
|
yield _process_line(line)
|
|
21
20
|
|
|
22
21
|
|
|
23
|
-
def _process_line(line: str) ->
|
|
22
|
+
def _process_line(line: str) -> tuple[str, str, set[str]]:
|
|
24
23
|
"""Return the pathway name, url, and gene sets associated.
|
|
25
24
|
|
|
26
25
|
:param line: gmt file line
|
pyobo/sources/go.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Gene Ontology."""
|
|
4
2
|
|
|
5
3
|
from pyobo import get_descendants
|
|
@@ -14,13 +12,13 @@ __all__ = [
|
|
|
14
12
|
def is_biological_process(identifier: str) -> bool:
|
|
15
13
|
"""Return if the given GO identifier is a biological process.
|
|
16
14
|
|
|
17
|
-
>>> is_biological_process(
|
|
15
|
+
>>> is_biological_process("0006915")
|
|
18
16
|
True
|
|
19
|
-
>>> is_biological_process(
|
|
17
|
+
>>> is_biological_process("GO:0006915")
|
|
20
18
|
True
|
|
21
|
-
>>> is_molecular_function(
|
|
19
|
+
>>> is_molecular_function("0006915")
|
|
22
20
|
False
|
|
23
|
-
>>> is_cellular_component(
|
|
21
|
+
>>> is_cellular_component("0006915")
|
|
24
22
|
False
|
|
25
23
|
"""
|
|
26
24
|
return _is_descendant(identifier, "0008150")
|
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for GWAS Central."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
import tarfile
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from typing import Optional
|
|
8
7
|
from xml.etree import ElementTree
|
|
9
8
|
|
|
10
9
|
from pyobo.struct import Obo, Reference, Term, has_part
|
pyobo/sources/hgnc.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for HGNC."""
|
|
4
2
|
|
|
5
3
|
import itertools as itt
|
|
@@ -7,13 +5,14 @@ import json
|
|
|
7
5
|
import logging
|
|
8
6
|
import typing
|
|
9
7
|
from collections import Counter, defaultdict
|
|
8
|
+
from collections.abc import Iterable
|
|
10
9
|
from operator import attrgetter
|
|
11
|
-
from typing import
|
|
10
|
+
from typing import Optional
|
|
12
11
|
|
|
13
|
-
import bioversions
|
|
14
12
|
from tabulate import tabulate
|
|
15
13
|
from tqdm.auto import tqdm
|
|
16
14
|
|
|
15
|
+
from pyobo.api.utils import get_version
|
|
17
16
|
from pyobo.struct import (
|
|
18
17
|
Obo,
|
|
19
18
|
Reference,
|
|
@@ -238,12 +237,12 @@ def get_obo(*, force: bool = False) -> Obo:
|
|
|
238
237
|
return HGNCGetter(force=force)
|
|
239
238
|
|
|
240
239
|
|
|
241
|
-
def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
|
|
240
|
+
def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
|
|
242
241
|
"""Get HGNC terms."""
|
|
243
242
|
if version is None:
|
|
244
|
-
version =
|
|
243
|
+
version = get_version("hgnc")
|
|
245
244
|
unhandled_entry_keys: typing.Counter[str] = Counter()
|
|
246
|
-
unhandle_locus_types:
|
|
245
|
+
unhandle_locus_types: defaultdict[str, dict[str, Term]] = defaultdict(dict)
|
|
247
246
|
path = ensure_path(
|
|
248
247
|
PREFIX,
|
|
249
248
|
url=DEFINITIONS_URL_FMT.format(version=version),
|
|
@@ -459,8 +458,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
|
|
|
459
458
|
headers=["hgnc_id", "name", "obsolete", "link", "provenance"],
|
|
460
459
|
tablefmt="github",
|
|
461
460
|
)
|
|
462
|
-
print(f"## {k} ({len(v)})", file=file)
|
|
463
|
-
print(t, "\n", file=file)
|
|
461
|
+
print(f"## {k} ({len(v)})", file=file)
|
|
462
|
+
print(t, "\n", file=file)
|
|
464
463
|
|
|
465
464
|
unhandle_locus_type_counter = Counter(
|
|
466
465
|
{locus_type: len(d) for locus_type, d in unhandle_locus_types.items()}
|
pyobo/sources/hgncgenefamily.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for HGNC Gene Families."""
|
|
4
2
|
|
|
5
3
|
from collections import defaultdict
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
7
5
|
|
|
8
6
|
import pandas as pd
|
|
9
7
|
|
|
@@ -50,7 +48,7 @@ def get_obo(force: bool = False) -> Obo:
|
|
|
50
48
|
return HGNCGroupGetter(force=force)
|
|
51
49
|
|
|
52
50
|
|
|
53
|
-
def get_hierarchy(force: bool = False) -> Mapping[str,
|
|
51
|
+
def get_hierarchy(force: bool = False) -> Mapping[str, list[str]]:
|
|
54
52
|
"""Get the HGNC Gene Families hierarchy as a dictionary."""
|
|
55
53
|
path = ensure_path(PREFIX, url=HIERARCHY_URL, force=force)
|
|
56
54
|
df = pd.read_csv(path, dtype={"parent_fam_id": str, "child_fam_id": str})
|
pyobo/sources/icd10.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Convert ICD-10 to OBO.
|
|
4
2
|
|
|
5
3
|
Run with python -m pyobo.sources.icd10 -v
|
|
6
4
|
"""
|
|
7
5
|
|
|
8
6
|
import logging
|
|
9
|
-
from
|
|
7
|
+
from collections.abc import Iterable, Mapping
|
|
8
|
+
from typing import Any
|
|
10
9
|
|
|
11
10
|
import click
|
|
12
11
|
from more_click import verbose_option
|
|
@@ -57,7 +56,7 @@ def iter_terms() -> Iterable[Term]:
|
|
|
57
56
|
chapter_urls = res_json["child"]
|
|
58
57
|
tqdm.write(f"there are {len(chapter_urls)} chapters")
|
|
59
58
|
|
|
60
|
-
visited_identifiers:
|
|
59
|
+
visited_identifiers: set[str] = set()
|
|
61
60
|
for identifier in get_child_identifiers(ICD10_TOP_LEVEL_URL, res_json):
|
|
62
61
|
yield from visiter(
|
|
63
62
|
identifier,
|
pyobo/sources/icd11.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Convert ICD11 to OBO.
|
|
4
2
|
|
|
5
3
|
Run with python -m pyobo.sources.icd11 -v
|
|
@@ -8,7 +6,8 @@ Run with python -m pyobo.sources.icd11 -v
|
|
|
8
6
|
import json
|
|
9
7
|
import logging
|
|
10
8
|
import os
|
|
11
|
-
from
|
|
9
|
+
from collections.abc import Iterable, Mapping
|
|
10
|
+
from typing import Any
|
|
12
11
|
|
|
13
12
|
import click
|
|
14
13
|
from more_click import verbose_option
|
|
@@ -67,7 +66,7 @@ def iterate_icd11() -> Iterable[Term]:
|
|
|
67
66
|
|
|
68
67
|
tqdm.write(f'There are {len(res_json["child"])} top level entities')
|
|
69
68
|
|
|
70
|
-
visited_identifiers:
|
|
69
|
+
visited_identifiers: set[str] = set()
|
|
71
70
|
for identifier in get_child_identifiers(ICD11_TOP_LEVEL_URL, res_json):
|
|
72
71
|
yield from visiter(
|
|
73
72
|
identifier,
|
pyobo/sources/icd_utils.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities or interacting with the ICD API.
|
|
4
2
|
|
|
5
3
|
Want to get your own API cliend ID and client secret?
|
|
@@ -11,8 +9,9 @@ Want to get your own API cliend ID and client secret?
|
|
|
11
9
|
import datetime
|
|
12
10
|
import json
|
|
13
11
|
import os
|
|
12
|
+
from collections.abc import Iterable, Mapping
|
|
14
13
|
from pathlib import Path
|
|
15
|
-
from typing import Any, Callable,
|
|
14
|
+
from typing import Any, Callable, Union
|
|
16
15
|
|
|
17
16
|
import pystow
|
|
18
17
|
import requests
|
|
@@ -20,7 +19,7 @@ from cachier import cachier
|
|
|
20
19
|
from pystow.config_api import ConfigError
|
|
21
20
|
from tqdm.auto import tqdm
|
|
22
21
|
|
|
23
|
-
from ..getters import
|
|
22
|
+
from ..getters import NoBuildError
|
|
24
23
|
from ..struct import Term
|
|
25
24
|
|
|
26
25
|
TOKEN_URL = "https://icdaccessmanagement.who.int/connect/token" # noqa:S105
|
|
@@ -43,7 +42,7 @@ def _get_entity(endpoint: str, identifier: str):
|
|
|
43
42
|
return res.json()
|
|
44
43
|
|
|
45
44
|
|
|
46
|
-
def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) ->
|
|
45
|
+
def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) -> list[str]:
|
|
47
46
|
"""Ge the child identifiers."""
|
|
48
47
|
return [url[len(endpoint) :].lstrip("/") for url in res_json.get("child", [])]
|
|
49
48
|
|
|
@@ -55,7 +54,7 @@ def get_icd_api_headers() -> Mapping[str, str]:
|
|
|
55
54
|
icd_client_id = pystow.get_config("pyobo", "icd_client_id", raise_on_missing=True)
|
|
56
55
|
icd_client_secret = pystow.get_config("pyobo", "icd_client_secret", raise_on_missing=True)
|
|
57
56
|
except ConfigError as e:
|
|
58
|
-
raise
|
|
57
|
+
raise NoBuildError from e
|
|
59
58
|
|
|
60
59
|
grant_type = "client_credentials"
|
|
61
60
|
body_params = {"grant_type": grant_type}
|
|
@@ -73,7 +72,7 @@ def get_icd_api_headers() -> Mapping[str, str]:
|
|
|
73
72
|
|
|
74
73
|
def visiter(
|
|
75
74
|
identifier: str,
|
|
76
|
-
visited_identifiers:
|
|
75
|
+
visited_identifiers: set[str],
|
|
77
76
|
directory: Union[str, Path],
|
|
78
77
|
*,
|
|
79
78
|
endpoint: str,
|
pyobo/sources/interpro.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for InterPro."""
|
|
4
2
|
|
|
5
3
|
from collections import defaultdict
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
7
5
|
|
|
8
6
|
from .utils import get_go_mapping
|
|
9
7
|
from ..struct import Obo, Reference, Term
|
|
@@ -82,7 +80,7 @@ def iter_terms(*, version: str, proteins: bool = False, force: bool = False) ->
|
|
|
82
80
|
yield term
|
|
83
81
|
|
|
84
82
|
|
|
85
|
-
def get_interpro_go_df(version: str, force: bool = False) -> Mapping[str,
|
|
83
|
+
def get_interpro_go_df(version: str, force: bool = False) -> Mapping[str, set[tuple[str, str]]]:
|
|
86
84
|
"""Get InterPro to Gene Ontology molecular function mapping."""
|
|
87
85
|
url = f"https://ftp.ebi.ac.uk/pub/databases/interpro/releases/{version}/interpro2go"
|
|
88
86
|
path = ensure_path(PREFIX, url=url, name="interpro2go.tsv", version=version, force=force)
|
|
@@ -98,7 +96,7 @@ def get_interpro_tree(version: str, force: bool = False):
|
|
|
98
96
|
|
|
99
97
|
|
|
100
98
|
def _parse_tree_helper(lines: Iterable[str]):
|
|
101
|
-
rv1:
|
|
99
|
+
rv1: defaultdict[str, list[str]] = defaultdict(list)
|
|
102
100
|
previous_depth, previous_id = 0, ""
|
|
103
101
|
stack = [previous_id]
|
|
104
102
|
|
pyobo/sources/itis.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for the Integrated Taxonomic Information System (ITIS)."""
|
|
4
2
|
|
|
5
3
|
import os
|
|
6
4
|
import shutil
|
|
7
5
|
import sqlite3
|
|
8
6
|
import zipfile
|
|
7
|
+
from collections.abc import Iterable
|
|
9
8
|
from contextlib import closing
|
|
10
|
-
from typing import Iterable
|
|
11
9
|
|
|
12
10
|
from pyobo.struct import Obo, Reference, Term
|
|
13
11
|
from pyobo.utils.io import multidict
|
pyobo/sources/kegg/__init__.py
CHANGED
pyobo/sources/kegg/api.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""API utilities for KEGG."""
|
|
4
2
|
|
|
5
3
|
import urllib.error
|
|
4
|
+
from collections.abc import Mapping
|
|
6
5
|
from dataclasses import dataclass
|
|
7
|
-
from typing import
|
|
6
|
+
from typing import Optional
|
|
8
7
|
|
|
9
8
|
from pyobo import Reference, Term, ensure_path
|
|
10
9
|
from pyobo.struct import from_species
|
|
@@ -132,7 +131,7 @@ def _ensure_conv_genome_helper(
|
|
|
132
131
|
version=version,
|
|
133
132
|
)
|
|
134
133
|
with path_rv.open("w") as file:
|
|
135
|
-
print(file=file)
|
|
134
|
+
print(file=file)
|
|
136
135
|
return path_rv.as_posix()
|
|
137
136
|
except FileNotFoundError:
|
|
138
137
|
return None
|
pyobo/sources/kegg/genes.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Convert KEGG Genes to OBO.
|
|
4
2
|
|
|
5
3
|
Run with ``python -m pyobo.sources.kegg.genes``
|
|
6
4
|
"""
|
|
7
5
|
|
|
8
6
|
import logging
|
|
9
|
-
from
|
|
7
|
+
from collections.abc import Iterable
|
|
8
|
+
from typing import Optional
|
|
10
9
|
|
|
11
10
|
import click
|
|
12
11
|
from more_click import verbose_option
|
|
@@ -90,7 +89,7 @@ def _make_terms(
|
|
|
90
89
|
)
|
|
91
90
|
continue
|
|
92
91
|
if ";" in line:
|
|
93
|
-
*_extras, name =
|
|
92
|
+
*_extras, name = (part.strip() for part in extras.split(";"))
|
|
94
93
|
else:
|
|
95
94
|
name = extras
|
|
96
95
|
|
pyobo/sources/kegg/genome.py
CHANGED