pyobo 0.10.11__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +0 -2
- pyobo/__main__.py +0 -2
- pyobo/api/__init__.py +0 -2
- pyobo/api/alts.py +6 -7
- pyobo/api/hierarchy.py +14 -15
- pyobo/api/metadata.py +3 -4
- pyobo/api/names.py +51 -31
- pyobo/api/properties.py +6 -7
- pyobo/api/relations.py +12 -11
- pyobo/api/species.py +5 -6
- pyobo/api/typedefs.py +1 -3
- pyobo/api/utils.py +63 -2
- pyobo/api/xrefs.py +4 -5
- pyobo/aws.py +3 -5
- pyobo/cli/__init__.py +0 -2
- pyobo/cli/aws.py +0 -2
- pyobo/cli/cli.py +0 -4
- pyobo/cli/database.py +1 -3
- pyobo/cli/lookup.py +2 -4
- pyobo/cli/utils.py +0 -2
- pyobo/constants.py +0 -3
- pyobo/getters.py +19 -26
- pyobo/gilda_utils.py +28 -8
- pyobo/identifier_utils.py +32 -15
- pyobo/mocks.py +5 -6
- pyobo/normalizer.py +24 -24
- pyobo/obographs.py +3 -3
- pyobo/plugins.py +3 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +19 -21
- pyobo/registries/__init__.py +0 -2
- pyobo/registries/metaregistry.py +6 -8
- pyobo/resource_utils.py +1 -3
- pyobo/resources/__init__.py +0 -2
- pyobo/resources/ncbitaxon.py +2 -3
- pyobo/resources/ro.py +2 -4
- pyobo/sources/README.md +15 -0
- pyobo/sources/__init__.py +2 -2
- pyobo/sources/agrovoc.py +3 -3
- pyobo/sources/antibodyregistry.py +4 -5
- pyobo/sources/biogrid.py +7 -7
- pyobo/sources/ccle.py +3 -4
- pyobo/sources/cgnc.py +1 -3
- pyobo/sources/chebi.py +2 -4
- pyobo/sources/chembl.py +1 -3
- pyobo/sources/civic_gene.py +2 -3
- pyobo/sources/complexportal.py +3 -5
- pyobo/sources/conso.py +2 -4
- pyobo/sources/cpt.py +1 -3
- pyobo/sources/credit.py +68 -0
- pyobo/sources/cvx.py +1 -3
- pyobo/sources/depmap.py +3 -4
- pyobo/sources/dictybase_gene.py +1 -3
- pyobo/sources/drugbank.py +6 -7
- pyobo/sources/drugbank_salt.py +3 -4
- pyobo/sources/drugcentral.py +5 -7
- pyobo/sources/expasy.py +11 -12
- pyobo/sources/famplex.py +3 -5
- pyobo/sources/flybase.py +2 -4
- pyobo/sources/geonames.py +28 -10
- pyobo/sources/gmt_utils.py +5 -6
- pyobo/sources/go.py +4 -6
- pyobo/sources/gwascentral_phenotype.py +1 -3
- pyobo/sources/gwascentral_study.py +2 -3
- pyobo/sources/hgnc.py +8 -9
- pyobo/sources/hgncgenefamily.py +2 -4
- pyobo/sources/icd10.py +3 -4
- pyobo/sources/icd11.py +3 -4
- pyobo/sources/icd_utils.py +6 -7
- pyobo/sources/interpro.py +3 -5
- pyobo/sources/itis.py +1 -3
- pyobo/sources/kegg/__init__.py +0 -2
- pyobo/sources/kegg/api.py +3 -4
- pyobo/sources/kegg/genes.py +3 -4
- pyobo/sources/kegg/genome.py +1 -3
- pyobo/sources/kegg/pathway.py +5 -6
- pyobo/sources/mesh.py +19 -21
- pyobo/sources/mgi.py +1 -3
- pyobo/sources/mirbase.py +4 -6
- pyobo/sources/mirbase_constants.py +0 -2
- pyobo/sources/mirbase_family.py +1 -3
- pyobo/sources/mirbase_mature.py +1 -3
- pyobo/sources/msigdb.py +4 -5
- pyobo/sources/ncbigene.py +3 -5
- pyobo/sources/npass.py +2 -4
- pyobo/sources/omim_ps.py +1 -3
- pyobo/sources/pathbank.py +3 -5
- pyobo/sources/pfam.py +1 -3
- pyobo/sources/pfam_clan.py +1 -3
- pyobo/sources/pid.py +3 -5
- pyobo/sources/pombase.py +1 -3
- pyobo/sources/pubchem.py +5 -6
- pyobo/sources/reactome.py +2 -4
- pyobo/sources/rgd.py +3 -4
- pyobo/sources/rhea.py +9 -10
- pyobo/sources/ror.py +69 -22
- pyobo/sources/selventa/__init__.py +0 -2
- pyobo/sources/selventa/schem.py +1 -3
- pyobo/sources/selventa/scomp.py +1 -3
- pyobo/sources/selventa/sdis.py +1 -3
- pyobo/sources/selventa/sfam.py +1 -3
- pyobo/sources/sgd.py +1 -3
- pyobo/sources/slm.py +1 -3
- pyobo/sources/umls/__init__.py +0 -2
- pyobo/sources/umls/__main__.py +0 -2
- pyobo/sources/umls/get_synonym_types.py +1 -1
- pyobo/sources/umls/umls.py +2 -4
- pyobo/sources/uniprot/__init__.py +0 -2
- pyobo/sources/uniprot/uniprot.py +6 -6
- pyobo/sources/uniprot/uniprot_ptm.py +6 -5
- pyobo/sources/utils.py +3 -5
- pyobo/sources/wikipathways.py +1 -3
- pyobo/sources/zfin.py +2 -3
- pyobo/ssg/__init__.py +3 -2
- pyobo/struct/__init__.py +0 -2
- pyobo/struct/reference.py +13 -15
- pyobo/struct/struct.py +106 -99
- pyobo/struct/typedef.py +19 -10
- pyobo/struct/utils.py +0 -2
- pyobo/utils/__init__.py +0 -2
- pyobo/utils/cache.py +14 -6
- pyobo/utils/io.py +9 -10
- pyobo/utils/iter.py +5 -6
- pyobo/utils/misc.py +1 -3
- pyobo/utils/ndex_utils.py +6 -7
- pyobo/utils/path.py +5 -5
- pyobo/version.py +3 -5
- pyobo/xrefdb/__init__.py +0 -2
- pyobo/xrefdb/canonicalizer.py +27 -18
- pyobo/xrefdb/priority.py +0 -2
- pyobo/xrefdb/sources/__init__.py +9 -7
- pyobo/xrefdb/sources/biomappings.py +0 -2
- pyobo/xrefdb/sources/cbms2019.py +0 -2
- pyobo/xrefdb/sources/chembl.py +5 -7
- pyobo/xrefdb/sources/compath.py +1 -3
- pyobo/xrefdb/sources/famplex.py +3 -5
- pyobo/xrefdb/sources/gilda.py +0 -2
- pyobo/xrefdb/sources/intact.py +5 -5
- pyobo/xrefdb/sources/ncit.py +1 -3
- pyobo/xrefdb/sources/pubchem.py +2 -4
- pyobo/xrefdb/sources/wikidata.py +10 -5
- pyobo/xrefdb/xrefs_pipeline.py +15 -16
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/LICENSE +1 -1
- pyobo-0.11.0.dist-info/METADATA +723 -0
- pyobo-0.11.0.dist-info/RECORD +171 -0
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/WHEEL +1 -1
- pyobo-0.11.0.dist-info/entry_points.txt +2 -0
- pyobo/xrefdb/bengo.py +0 -44
- pyobo-0.10.11.dist-info/METADATA +0 -499
- pyobo-0.10.11.dist-info/RECORD +0 -169
- pyobo-0.10.11.dist-info/entry_points.txt +0 -15
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/top_level.txt +0 -0
pyobo/api/xrefs.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""High-level API for synonyms."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
4
|
+
from collections.abc import Mapping
|
|
6
5
|
from functools import lru_cache
|
|
7
|
-
from typing import
|
|
6
|
+
from typing import Optional, Union
|
|
8
7
|
|
|
9
8
|
import pandas as pd
|
|
10
9
|
from tqdm.auto import tqdm
|
|
@@ -43,7 +42,7 @@ def get_xref(
|
|
|
43
42
|
return filtered_xrefs.get(identifier)
|
|
44
43
|
|
|
45
44
|
|
|
46
|
-
@lru_cache
|
|
45
|
+
@lru_cache
|
|
47
46
|
@wrap_norm_prefix
|
|
48
47
|
def get_filtered_xrefs(
|
|
49
48
|
prefix: str,
|
|
@@ -147,7 +146,7 @@ def get_sssom_df(
|
|
|
147
146
|
prefix = prefix.ontology
|
|
148
147
|
else:
|
|
149
148
|
df = get_xrefs_df(prefix=prefix, **kwargs)
|
|
150
|
-
rows:
|
|
149
|
+
rows: list[tuple[str, ...]] = []
|
|
151
150
|
with logging_redirect_tqdm():
|
|
152
151
|
for source_id, target_prefix, target_id in tqdm(
|
|
153
152
|
df.values, unit="mapping", unit_scale=True, desc=f"[{prefix}] SSSOM"
|
pyobo/aws.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Interface for caching data on AWS S3."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
import os
|
|
7
|
-
from typing import Optional
|
|
5
|
+
from typing import Optional
|
|
8
6
|
|
|
9
7
|
import boto3
|
|
10
8
|
import humanize
|
|
@@ -57,8 +55,8 @@ def download_artifacts(bucket: str, suffix: Optional[str] = None) -> None:
|
|
|
57
55
|
|
|
58
56
|
def upload_artifacts(
|
|
59
57
|
bucket: str,
|
|
60
|
-
whitelist: Optional[
|
|
61
|
-
blacklist: Optional[
|
|
58
|
+
whitelist: Optional[set[str]] = None,
|
|
59
|
+
blacklist: Optional[set[str]] = None,
|
|
62
60
|
s3_client=None,
|
|
63
61
|
) -> None:
|
|
64
62
|
"""Upload all artifacts to AWS."""
|
pyobo/cli/__init__.py
CHANGED
pyobo/cli/aws.py
CHANGED
pyobo/cli/cli.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""CLI for PyOBO."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
@@ -15,7 +13,6 @@ from tabulate import tabulate
|
|
|
15
13
|
from .aws import main as aws_main
|
|
16
14
|
from .database import main as database_main
|
|
17
15
|
from .lookup import lookup
|
|
18
|
-
from ..apps.cli import main as apps_main
|
|
19
16
|
from ..constants import RAW_DIRECTORY
|
|
20
17
|
from ..plugins import has_nomenclature_plugin, iter_nomenclature_plugins
|
|
21
18
|
from ..registries import iter_cached_obo
|
|
@@ -116,7 +113,6 @@ def remapping(file):
|
|
|
116
113
|
|
|
117
114
|
|
|
118
115
|
main.add_command(lookup)
|
|
119
|
-
main.add_command(apps_main)
|
|
120
116
|
main.add_command(aws_main)
|
|
121
117
|
main.add_command(database_main)
|
|
122
118
|
|
pyobo/cli/database.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""CLI for PyOBO Database Generation."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
@@ -317,7 +315,7 @@ def properties(directory: str, zenodo: bool, force: bool, no_strict: bool):
|
|
|
317
315
|
@zenodo_option
|
|
318
316
|
@force_option
|
|
319
317
|
@no_strict_option
|
|
320
|
-
def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool):
|
|
318
|
+
def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool):
|
|
321
319
|
"""Make the prefix-identifier-xref dump."""
|
|
322
320
|
with logging_redirect_tqdm():
|
|
323
321
|
paths = db_output_helper(
|
pyobo/cli/lookup.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""CLI for PyOBO lookups."""
|
|
4
2
|
|
|
5
3
|
import json
|
|
@@ -282,7 +280,7 @@ def ancestors(prefix: str, identifier: str, force: bool, version: Optional[str])
|
|
|
282
280
|
"""Look up ancestors."""
|
|
283
281
|
curies = get_ancestors(prefix=prefix, identifier=identifier, force=force, version=version)
|
|
284
282
|
for curie in sorted(curies or []):
|
|
285
|
-
click.echo(f"{curie}\t{get_name_by_curie(curie)}")
|
|
283
|
+
click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")
|
|
286
284
|
|
|
287
285
|
|
|
288
286
|
@lookup.command()
|
|
@@ -295,7 +293,7 @@ def descendants(prefix: str, identifier: str, force: bool, version: Optional[str
|
|
|
295
293
|
"""Look up descendants."""
|
|
296
294
|
curies = get_descendants(prefix=prefix, identifier=identifier, force=force, version=version)
|
|
297
295
|
for curie in sorted(curies or []):
|
|
298
|
-
click.echo(f"{curie}\t{get_name_by_curie(curie)}")
|
|
296
|
+
click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")
|
|
299
297
|
|
|
300
298
|
|
|
301
299
|
@lookup.command()
|
pyobo/cli/utils.py
CHANGED
pyobo/constants.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Constants for PyOBO."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
@@ -80,7 +78,6 @@ TYPEDEFS_FILE = "typedefs.tsv.gz"
|
|
|
80
78
|
SPECIES_RECORD = "5334738"
|
|
81
79
|
SPECIES_FILE = "species.tsv.gz"
|
|
82
80
|
|
|
83
|
-
|
|
84
81
|
NCBITAXON_PREFIX = "NCBITaxon"
|
|
85
82
|
DATE_FORMAT = "%d:%m:%Y %H:%M"
|
|
86
83
|
PROVENANCE_PREFIXES = {
|
pyobo/getters.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities for OBO files."""
|
|
4
2
|
|
|
5
3
|
import datetime
|
|
@@ -11,16 +9,11 @@ import subprocess
|
|
|
11
9
|
import typing
|
|
12
10
|
import urllib.error
|
|
13
11
|
from collections import Counter
|
|
12
|
+
from collections.abc import Iterable, Mapping, Sequence
|
|
14
13
|
from pathlib import Path
|
|
15
14
|
from typing import (
|
|
16
15
|
Callable,
|
|
17
|
-
Iterable,
|
|
18
|
-
List,
|
|
19
|
-
Mapping,
|
|
20
16
|
Optional,
|
|
21
|
-
Sequence,
|
|
22
|
-
Set,
|
|
23
|
-
Tuple,
|
|
24
17
|
TypeVar,
|
|
25
18
|
Union,
|
|
26
19
|
)
|
|
@@ -30,7 +23,7 @@ from bioontologies import robot
|
|
|
30
23
|
from tqdm.auto import tqdm
|
|
31
24
|
|
|
32
25
|
from .constants import DATABASE_DIRECTORY
|
|
33
|
-
from .identifier_utils import
|
|
26
|
+
from .identifier_utils import MissingPrefixError, wrap_norm_prefix
|
|
34
27
|
from .plugins import has_nomenclature_plugin, run_nomenclature_plugin
|
|
35
28
|
from .struct import Obo
|
|
36
29
|
from .utils.io import get_writer
|
|
@@ -39,17 +32,17 @@ from .version import get_git_hash, get_version
|
|
|
39
32
|
|
|
40
33
|
__all__ = [
|
|
41
34
|
"get_ontology",
|
|
42
|
-
"
|
|
35
|
+
"NoBuildError",
|
|
43
36
|
]
|
|
44
37
|
|
|
45
38
|
logger = logging.getLogger(__name__)
|
|
46
39
|
|
|
47
40
|
|
|
48
|
-
class
|
|
41
|
+
class NoBuildError(RuntimeError):
|
|
49
42
|
"""Base exception for being unable to build."""
|
|
50
43
|
|
|
51
44
|
|
|
52
|
-
class
|
|
45
|
+
class UnhandledFormatError(NoBuildError):
|
|
53
46
|
"""Only OWL is available."""
|
|
54
47
|
|
|
55
48
|
|
|
@@ -117,7 +110,7 @@ def get_ontology(
|
|
|
117
110
|
|
|
118
111
|
ontology_format, path = _ensure_ontology_path(prefix, force=force, version=version)
|
|
119
112
|
if path is None:
|
|
120
|
-
raise
|
|
113
|
+
raise NoBuildError(prefix)
|
|
121
114
|
elif ontology_format == "obo":
|
|
122
115
|
pass # all gucci
|
|
123
116
|
elif ontology_format == "owl":
|
|
@@ -127,7 +120,7 @@ def get_ontology(
|
|
|
127
120
|
robot.convert(path, _converted_obo_path, check=robot_check)
|
|
128
121
|
path = _converted_obo_path
|
|
129
122
|
else:
|
|
130
|
-
raise
|
|
123
|
+
raise UnhandledFormatError(f"[{prefix}] unhandled ontology file format: {path.suffix}")
|
|
131
124
|
|
|
132
125
|
from .reader import from_obo_path
|
|
133
126
|
|
|
@@ -147,8 +140,8 @@ def get_ontology(
|
|
|
147
140
|
|
|
148
141
|
def _ensure_ontology_path(
|
|
149
142
|
prefix: str, force, version
|
|
150
|
-
) -> Union[
|
|
151
|
-
for ontology_format, url in [
|
|
143
|
+
) -> Union[tuple[str, Path], tuple[None, None]]:
|
|
144
|
+
for ontology_format, url in [
|
|
152
145
|
("obo", bioregistry.get_obo_download(prefix)),
|
|
153
146
|
("owl", bioregistry.get_owl_download(prefix)),
|
|
154
147
|
("json", bioregistry.get_json_download(prefix)),
|
|
@@ -246,7 +239,7 @@ def iter_helper(
|
|
|
246
239
|
leave: bool = False,
|
|
247
240
|
strict: bool = True,
|
|
248
241
|
**kwargs,
|
|
249
|
-
) -> Iterable[
|
|
242
|
+
) -> Iterable[tuple[str, str, X]]:
|
|
250
243
|
"""Yield all mappings extracted from each database given."""
|
|
251
244
|
for prefix, mapping in iter_helper_helper(f, strict=strict, **kwargs):
|
|
252
245
|
it = tqdm(
|
|
@@ -266,7 +259,7 @@ def _prefixes(
|
|
|
266
259
|
skip_below: Optional[str] = None,
|
|
267
260
|
skip_below_inclusive: bool = True,
|
|
268
261
|
skip_pyobo: bool = False,
|
|
269
|
-
skip_set: Optional[
|
|
262
|
+
skip_set: Optional[set[str]] = None,
|
|
270
263
|
) -> Iterable[str]:
|
|
271
264
|
for prefix, resource in sorted(bioregistry.read_registry().items()):
|
|
272
265
|
if resource.no_own_terms:
|
|
@@ -299,10 +292,10 @@ def iter_helper_helper(
|
|
|
299
292
|
skip_below: Optional[str] = None,
|
|
300
293
|
skip_below_inclusive: bool = True,
|
|
301
294
|
skip_pyobo: bool = False,
|
|
302
|
-
skip_set: Optional[
|
|
295
|
+
skip_set: Optional[set[str]] = None,
|
|
303
296
|
strict: bool = True,
|
|
304
297
|
**kwargs,
|
|
305
|
-
) -> Iterable[
|
|
298
|
+
) -> Iterable[tuple[str, X]]:
|
|
306
299
|
"""Yield all mappings extracted from each database given.
|
|
307
300
|
|
|
308
301
|
:param f: A function that takes a prefix and gives back something that will be used by an outer function.
|
|
@@ -342,13 +335,13 @@ def iter_helper_helper(
|
|
|
342
335
|
logger.warning("[%s] unable to download", prefix)
|
|
343
336
|
if strict and not bioregistry.is_deprecated(prefix):
|
|
344
337
|
raise
|
|
345
|
-
except
|
|
338
|
+
except MissingPrefixError as e:
|
|
346
339
|
logger.warning("[%s] missing prefix: %s", prefix, e)
|
|
347
340
|
if strict and not bioregistry.is_deprecated(prefix):
|
|
348
341
|
raise e
|
|
349
342
|
except subprocess.CalledProcessError:
|
|
350
343
|
logger.warning("[%s] ROBOT was unable to convert OWL to OBO", prefix)
|
|
351
|
-
except
|
|
344
|
+
except UnhandledFormatError as e:
|
|
352
345
|
logger.warning("[%s] %s", prefix, e)
|
|
353
346
|
except ValueError as e:
|
|
354
347
|
if _is_xml(e):
|
|
@@ -390,7 +383,7 @@ def _prep_dir(directory: Union[None, str, pathlib.Path]) -> pathlib.Path:
|
|
|
390
383
|
|
|
391
384
|
|
|
392
385
|
def db_output_helper(
|
|
393
|
-
f: Callable[..., Iterable[
|
|
386
|
+
f: Callable[..., Iterable[tuple[str, ...]]],
|
|
394
387
|
db_name: str,
|
|
395
388
|
columns: Sequence[str],
|
|
396
389
|
*,
|
|
@@ -399,7 +392,7 @@ def db_output_helper(
|
|
|
399
392
|
use_gzip: bool = True,
|
|
400
393
|
summary_detailed: Optional[Sequence[int]] = None,
|
|
401
394
|
**kwargs,
|
|
402
|
-
) ->
|
|
395
|
+
) -> list[pathlib.Path]:
|
|
403
396
|
"""Help output database builds.
|
|
404
397
|
|
|
405
398
|
:param f: A function that takes a prefix and gives back something that will be used by an outer function.
|
|
@@ -413,7 +406,7 @@ def db_output_helper(
|
|
|
413
406
|
directory = _prep_dir(directory)
|
|
414
407
|
|
|
415
408
|
c: typing.Counter[str] = Counter()
|
|
416
|
-
c_detailed: typing.Counter[
|
|
409
|
+
c_detailed: typing.Counter[tuple[str, ...]] = Counter()
|
|
417
410
|
|
|
418
411
|
if use_gzip:
|
|
419
412
|
db_path = directory.joinpath(f"{db_name}.tsv.gz")
|
|
@@ -475,7 +468,7 @@ def db_output_helper(
|
|
|
475
468
|
indent=2,
|
|
476
469
|
)
|
|
477
470
|
|
|
478
|
-
rv:
|
|
471
|
+
rv: list[pathlib.Path] = [
|
|
479
472
|
db_metadata_path,
|
|
480
473
|
db_path,
|
|
481
474
|
db_sample_path,
|
pyobo/gilda_utils.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""PyOBO's Gilda utilities."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
4
|
+
from collections.abc import Iterable
|
|
6
5
|
from subprocess import CalledProcessError
|
|
7
|
-
from typing import
|
|
6
|
+
from typing import Optional, Union
|
|
8
7
|
|
|
9
8
|
import bioregistry
|
|
10
9
|
import gilda.api
|
|
@@ -15,13 +14,14 @@ from gilda.term import filter_out_duplicates
|
|
|
15
14
|
from tqdm.auto import tqdm
|
|
16
15
|
|
|
17
16
|
from pyobo import (
|
|
17
|
+
get_descendants,
|
|
18
18
|
get_id_name_mapping,
|
|
19
19
|
get_id_species_mapping,
|
|
20
20
|
get_id_synonyms_mapping,
|
|
21
21
|
get_ids,
|
|
22
22
|
get_obsolete,
|
|
23
23
|
)
|
|
24
|
-
from pyobo.getters import
|
|
24
|
+
from pyobo.getters import NoBuildError
|
|
25
25
|
from pyobo.utils.io import multidict
|
|
26
26
|
|
|
27
27
|
__all__ = [
|
|
@@ -40,7 +40,7 @@ def iter_gilda_prediction_tuples(
|
|
|
40
40
|
grounder: Optional[Grounder] = None,
|
|
41
41
|
identifiers_are_names: bool = False,
|
|
42
42
|
strict: bool = False,
|
|
43
|
-
) -> Iterable[
|
|
43
|
+
) -> Iterable[tuple[str, str, str, str, str, str, str, str, float]]:
|
|
44
44
|
"""Iterate over prediction tuples for a given prefix."""
|
|
45
45
|
if grounder is None:
|
|
46
46
|
grounder = gilda.api.grounder
|
|
@@ -93,7 +93,7 @@ def get_grounder(
|
|
|
93
93
|
prefixes: Union[str, Iterable[str]],
|
|
94
94
|
*,
|
|
95
95
|
unnamed: Optional[Iterable[str]] = None,
|
|
96
|
-
grounder_cls: Optional[
|
|
96
|
+
grounder_cls: Optional[type[Grounder]] = None,
|
|
97
97
|
versions: Union[None, str, Iterable[Union[str, None]]] = None,
|
|
98
98
|
strict: bool = True,
|
|
99
99
|
skip_obsolete: bool = False,
|
|
@@ -114,7 +114,7 @@ def get_grounder(
|
|
|
114
114
|
if len(prefixes) != len(versions):
|
|
115
115
|
raise ValueError
|
|
116
116
|
|
|
117
|
-
terms:
|
|
117
|
+
terms: list[gilda.term.Term] = []
|
|
118
118
|
for prefix, version in zip(tqdm(prefixes, leave=False, disable=not progress), versions):
|
|
119
119
|
try:
|
|
120
120
|
p_terms = list(
|
|
@@ -127,7 +127,7 @@ def get_grounder(
|
|
|
127
127
|
progress=progress,
|
|
128
128
|
)
|
|
129
129
|
)
|
|
130
|
-
except (
|
|
130
|
+
except (NoBuildError, CalledProcessError):
|
|
131
131
|
continue
|
|
132
132
|
else:
|
|
133
133
|
terms.extend(p_terms)
|
|
@@ -247,3 +247,23 @@ def get_gilda_terms(
|
|
|
247
247
|
)
|
|
248
248
|
if term is not None:
|
|
249
249
|
yield term
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def get_gilda_term_subset(
|
|
253
|
+
source: str, ancestors: Union[str, list[str]], **kwargs
|
|
254
|
+
) -> Iterable[gilda.term.Term]:
|
|
255
|
+
"""Get a subset of terms."""
|
|
256
|
+
subset = {
|
|
257
|
+
descendant
|
|
258
|
+
for parent_curie in _ensure_list(ancestors)
|
|
259
|
+
for descendant in get_descendants(*parent_curie.split(":")) or []
|
|
260
|
+
}
|
|
261
|
+
for term in get_gilda_terms(source, **kwargs):
|
|
262
|
+
if bioregistry.curie_to_str(term.db, term.id) in subset:
|
|
263
|
+
yield term
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _ensure_list(s: Union[str, list[str]]) -> list[str]:
|
|
267
|
+
if isinstance(s, str):
|
|
268
|
+
return [s]
|
|
269
|
+
return s
|
pyobo/identifier_utils.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities for handling prefixes."""
|
|
4
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
5
|
import logging
|
|
6
6
|
from functools import wraps
|
|
7
|
-
from typing import Optional, Tuple, Union
|
|
8
7
|
|
|
9
8
|
import bioregistry
|
|
9
|
+
from curies import Reference, ReferenceTuple
|
|
10
10
|
|
|
11
11
|
from .registries import (
|
|
12
12
|
curie_has_blacklisted_prefix,
|
|
@@ -25,10 +25,15 @@ __all__ = [
|
|
|
25
25
|
logger = logging.getLogger(__name__)
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
class
|
|
28
|
+
class MissingPrefixError(ValueError):
|
|
29
29
|
"""Raised on a missing prefix."""
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
reference: Reference | None
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self, prefix: str, curie: str, xref: str | None = None, ontology: str | None = None
|
|
35
|
+
):
|
|
36
|
+
"""Initialize the error."""
|
|
32
37
|
self.prefix = prefix
|
|
33
38
|
self.curie = curie
|
|
34
39
|
self.xref = xref
|
|
@@ -47,13 +52,13 @@ class MissingPrefix(ValueError):
|
|
|
47
52
|
return s
|
|
48
53
|
|
|
49
54
|
|
|
50
|
-
def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True) ->
|
|
55
|
+
def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True) -> str | None:
|
|
51
56
|
"""Normalize a namespace and return, if possible."""
|
|
52
57
|
norm_prefix = bioregistry.normalize_prefix(prefix)
|
|
53
58
|
if norm_prefix is not None:
|
|
54
59
|
return norm_prefix
|
|
55
60
|
elif strict:
|
|
56
|
-
raise
|
|
61
|
+
raise MissingPrefixError(prefix=prefix, curie=curie, xref=xref)
|
|
57
62
|
else:
|
|
58
63
|
return None
|
|
59
64
|
|
|
@@ -61,9 +66,7 @@ def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True
|
|
|
61
66
|
BAD_CURIES = set()
|
|
62
67
|
|
|
63
68
|
|
|
64
|
-
def normalize_curie(
|
|
65
|
-
curie: str, *, strict: bool = True
|
|
66
|
-
) -> Union[Tuple[str, str], Tuple[None, None]]:
|
|
69
|
+
def normalize_curie(curie: str, *, strict: bool = True) -> tuple[str, str] | tuple[None, None]:
|
|
67
70
|
"""Parse a string that looks like a CURIE.
|
|
68
71
|
|
|
69
72
|
:param curie: A compact uniform resource identifier (CURIE)
|
|
@@ -108,11 +111,25 @@ def wrap_norm_prefix(f):
|
|
|
108
111
|
"""Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
|
|
109
112
|
|
|
110
113
|
@wraps(f)
|
|
111
|
-
def _wrapped(prefix, *args, **kwargs):
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
114
|
+
def _wrapped(prefix: str | Reference | ReferenceTuple, *args, **kwargs):
|
|
115
|
+
if isinstance(prefix, str):
|
|
116
|
+
norm_prefix = bioregistry.normalize_prefix(prefix)
|
|
117
|
+
if norm_prefix is None:
|
|
118
|
+
raise ValueError(f"Invalid prefix: {prefix}")
|
|
119
|
+
prefix = norm_prefix
|
|
120
|
+
elif isinstance(prefix, Reference):
|
|
121
|
+
norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
|
|
122
|
+
if norm_prefix is None:
|
|
123
|
+
raise ValueError(f"Invalid prefix: {prefix.prefix}")
|
|
124
|
+
prefix = Reference(prefix=norm_prefix, identifier=prefix.identifier)
|
|
125
|
+
elif isinstance(prefix, ReferenceTuple):
|
|
126
|
+
norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
|
|
127
|
+
if norm_prefix is None:
|
|
128
|
+
raise ValueError(f"Invalid prefix: {prefix.prefix}")
|
|
129
|
+
prefix = ReferenceTuple(norm_prefix, prefix.identifier)
|
|
130
|
+
else:
|
|
131
|
+
raise TypeError
|
|
132
|
+
return f(prefix, *args, **kwargs)
|
|
116
133
|
|
|
117
134
|
return _wrapped
|
|
118
135
|
|
pyobo/mocks.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Mocks for PyOBO."""
|
|
4
2
|
|
|
5
|
-
from
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from typing import Optional, TypeVar, Union
|
|
6
5
|
from unittest import mock
|
|
7
6
|
|
|
8
7
|
import pandas as pd
|
|
@@ -25,7 +24,7 @@ def get_mock_id_name_mapping(data: Mapping[str, Mapping[str, str]]) -> mock._pat
|
|
|
25
24
|
return _replace_mapping_getter("pyobo.api.names.get_id_name_mapping", data)
|
|
26
25
|
|
|
27
26
|
|
|
28
|
-
def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str,
|
|
27
|
+
def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, list[str]]]) -> mock._patch:
|
|
29
28
|
"""Mock the :func:`pyobo.extract.get_id_synonyms_mapping` function.
|
|
30
29
|
|
|
31
30
|
:param data: A mapping from prefix to mappings of identifier to lists of synonyms.
|
|
@@ -33,7 +32,7 @@ def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, List[str]]]) ->
|
|
|
33
32
|
return _replace_mapping_getter("pyobo.api.names.get_id_synonyms_mapping", data)
|
|
34
33
|
|
|
35
34
|
|
|
36
|
-
def get_mock_id_alts_mapping(data: Mapping[str, Mapping[str,
|
|
35
|
+
def get_mock_id_alts_mapping(data: Mapping[str, Mapping[str, list[str]]]) -> mock._patch:
|
|
37
36
|
"""Mock the :func:`pyobo.extract.get_id_to_alts` function.
|
|
38
37
|
|
|
39
38
|
:param data: A mapping from prefix to mappings of identifier to lists of alternative identifiers.
|
|
@@ -52,7 +51,7 @@ def _replace_mapping_getter(name: str, data: Mapping[str, Mapping[str, X]]) -> m
|
|
|
52
51
|
|
|
53
52
|
|
|
54
53
|
def get_mock_get_xrefs_df(
|
|
55
|
-
df: Union[
|
|
54
|
+
df: Union[list[tuple[str, str, str, str, str]], pd.DataFrame],
|
|
56
55
|
) -> mock._patch:
|
|
57
56
|
"""Mock the :func:`pyobo.xrefsdb.xrefs_pipeline.get_xref_df` function.
|
|
58
57
|
|
pyobo/normalizer.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Use synonyms from OBO to normalize names."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from abc import ABC, abstractmethod
|
|
5
|
+
from collections.abc import Iterable, Mapping
|
|
7
6
|
from dataclasses import dataclass
|
|
8
7
|
from functools import lru_cache
|
|
9
|
-
from typing import
|
|
8
|
+
from typing import Optional, Union
|
|
10
9
|
|
|
11
10
|
import bioregistry
|
|
12
11
|
|
|
@@ -23,29 +22,29 @@ __all__ = [
|
|
|
23
22
|
|
|
24
23
|
logger = logging.getLogger(__name__)
|
|
25
24
|
|
|
26
|
-
NormalizationSuccess =
|
|
27
|
-
NormalizationFailure =
|
|
25
|
+
NormalizationSuccess = tuple[str, str, str]
|
|
26
|
+
NormalizationFailure = tuple[None, None, str]
|
|
28
27
|
NormalizationResult = Union[NormalizationSuccess, NormalizationFailure]
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
class Normalizer(ABC):
|
|
32
31
|
"""A normalizer."""
|
|
33
32
|
|
|
34
|
-
id_to_name:
|
|
35
|
-
id_to_synonyms:
|
|
33
|
+
id_to_name: dict[str, str]
|
|
34
|
+
id_to_synonyms: dict[str, list[str]]
|
|
36
35
|
|
|
37
36
|
#: A mapping from all synonyms to the set of identifiers that they point to.
|
|
38
37
|
#: In a perfect world, each would only be a single element.
|
|
39
|
-
synonym_to_identifiers_mapping:
|
|
38
|
+
synonym_to_identifiers_mapping: dict[str, set[str]]
|
|
40
39
|
#: A mapping from normalized names to the actual ones that they came from
|
|
41
|
-
norm_name_to_name:
|
|
40
|
+
norm_name_to_name: dict[str, set[str]]
|
|
42
41
|
|
|
43
42
|
def __init__(
|
|
44
43
|
self,
|
|
45
|
-
id_to_name:
|
|
46
|
-
id_to_synonyms:
|
|
44
|
+
id_to_name: dict[str, str],
|
|
45
|
+
id_to_synonyms: dict[str, list[str]],
|
|
47
46
|
remove_prefix: Optional[str] = None,
|
|
48
|
-
) -> None:
|
|
47
|
+
) -> None:
|
|
49
48
|
"""Initialize the normalizer.
|
|
50
49
|
|
|
51
50
|
:param id_to_name: An identifier to name dictionary.
|
|
@@ -64,7 +63,7 @@ class Normalizer(ABC):
|
|
|
64
63
|
self.norm_name_to_name = self._get_norm_name_to_names(self.synonym_to_identifiers_mapping)
|
|
65
64
|
|
|
66
65
|
@classmethod
|
|
67
|
-
def _get_norm_name_to_names(cls, synonyms: Iterable[str]) ->
|
|
66
|
+
def _get_norm_name_to_names(cls, synonyms: Iterable[str]) -> dict[str, set[str]]:
|
|
68
67
|
return multisetdict((cls._normalize_text(synonym), synonym) for synonym in synonyms)
|
|
69
68
|
|
|
70
69
|
@staticmethod
|
|
@@ -81,7 +80,7 @@ class Normalizer(ABC):
|
|
|
81
80
|
id_to_name: Mapping[str, str],
|
|
82
81
|
id_to_synonyms: Mapping[str, Iterable[str]],
|
|
83
82
|
remove_prefix: Optional[str] = None,
|
|
84
|
-
) -> Iterable[
|
|
83
|
+
) -> Iterable[tuple[str, str]]:
|
|
85
84
|
if remove_prefix is not None:
|
|
86
85
|
remove_prefix = f'{remove_prefix.lower().rstrip(":")}:'
|
|
87
86
|
|
|
@@ -101,7 +100,7 @@ class Normalizer(ABC):
|
|
|
101
100
|
# it might overwrite but this is probably always due to alternate ids
|
|
102
101
|
yield synonym, identifier
|
|
103
102
|
|
|
104
|
-
def get_names(self, query: str) ->
|
|
103
|
+
def get_names(self, query: str) -> list[str]:
|
|
105
104
|
"""Get all names to which the query text maps."""
|
|
106
105
|
norm_text = self._normalize_text(query)
|
|
107
106
|
return list(self.norm_name_to_name.get(norm_text, []))
|
|
@@ -112,7 +111,7 @@ class Normalizer(ABC):
|
|
|
112
111
|
raise NotImplementedError
|
|
113
112
|
|
|
114
113
|
|
|
115
|
-
@lru_cache
|
|
114
|
+
@lru_cache
|
|
116
115
|
def get_normalizer(prefix: str) -> Normalizer:
|
|
117
116
|
"""Get an OBO normalizer."""
|
|
118
117
|
norm_prefix = bioregistry.normalize_prefix(prefix)
|
|
@@ -149,7 +148,8 @@ def ground(prefix: Union[str, Iterable[str]], query: str) -> NormalizationResult
|
|
|
149
148
|
class OboNormalizer(Normalizer):
|
|
150
149
|
"""A utility for normalizing by names."""
|
|
151
150
|
|
|
152
|
-
def __init__(self, prefix: str) -> None:
|
|
151
|
+
def __init__(self, prefix: str) -> None:
|
|
152
|
+
"""Initialize the normalizer by an ontology's Bioregistry prefix."""
|
|
153
153
|
self.prefix = prefix
|
|
154
154
|
self._len_prefix = len(prefix)
|
|
155
155
|
id_to_name = names.get_id_name_mapping(prefix)
|
|
@@ -160,7 +160,7 @@ class OboNormalizer(Normalizer):
|
|
|
160
160
|
remove_prefix=prefix,
|
|
161
161
|
)
|
|
162
162
|
|
|
163
|
-
def __repr__(self) -> str:
|
|
163
|
+
def __repr__(self) -> str:
|
|
164
164
|
return f'OboNormalizer(prefix="{self.prefix}")'
|
|
165
165
|
|
|
166
166
|
def normalize(self, query: str) -> NormalizationResult:
|
|
@@ -188,20 +188,20 @@ class MultiNormalizer:
|
|
|
188
188
|
If you're looking for taxa of exotic plants, you might use:
|
|
189
189
|
|
|
190
190
|
>>> from pyobo.normalizer import MultiNormalizer
|
|
191
|
-
>>> normalizer = MultiNormalizer(prefixes=[
|
|
192
|
-
>>> normalizer.normalize(
|
|
191
|
+
>>> normalizer = MultiNormalizer(prefixes=["ncbitaxon", "itis"])
|
|
192
|
+
>>> normalizer.normalize("Homo sapiens")
|
|
193
193
|
('ncbitaxon', '9606', 'Homo sapiens')
|
|
194
|
-
>>> normalizer.normalize(
|
|
194
|
+
>>> normalizer.normalize("Abies bifolia") # variety not listed in NCBI
|
|
195
195
|
('itis', '507501', 'Abies bifolia')
|
|
196
|
-
>>> normalizer.normalize(
|
|
196
|
+
>>> normalizer.normalize("vulcan") # nice try, nerds
|
|
197
197
|
(None, None, None)
|
|
198
198
|
"""
|
|
199
199
|
|
|
200
200
|
#: The normalizers for each prefix
|
|
201
|
-
normalizers:
|
|
201
|
+
normalizers: list[Normalizer]
|
|
202
202
|
|
|
203
203
|
@staticmethod
|
|
204
|
-
def from_prefixes(prefixes:
|
|
204
|
+
def from_prefixes(prefixes: list[str]) -> "MultiNormalizer":
|
|
205
205
|
"""Instantiate normalizers based on the given prefixes, in preferred order.."""
|
|
206
206
|
return MultiNormalizer([get_normalizer(prefix) for prefix in prefixes])
|
|
207
207
|
|
pyobo/obographs.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Convert PyOBO into OBO Graph."""
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import Iterable
|
|
4
4
|
|
|
5
5
|
import bioregistry
|
|
6
6
|
import curies
|
|
@@ -35,8 +35,8 @@ def parse_results_from_obo(obo: Obo) -> ParseResults:
|
|
|
35
35
|
|
|
36
36
|
def graph_from_obo(obo: Obo) -> Graph:
|
|
37
37
|
"""Get an OBO Graph object from a PyOBO object."""
|
|
38
|
-
nodes:
|
|
39
|
-
edges:
|
|
38
|
+
nodes: list[Node] = []
|
|
39
|
+
edges: list[Edge] = []
|
|
40
40
|
for term in obo:
|
|
41
41
|
nodes.append(_get_class_node(term))
|
|
42
42
|
edges.extend(_iter_edges(term))
|