pyobo 0.10.12__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +0 -2
- pyobo/__main__.py +0 -2
- pyobo/api/__init__.py +0 -2
- pyobo/api/alts.py +6 -7
- pyobo/api/hierarchy.py +14 -15
- pyobo/api/metadata.py +3 -4
- pyobo/api/names.py +31 -32
- pyobo/api/properties.py +6 -7
- pyobo/api/relations.py +12 -11
- pyobo/api/species.py +5 -6
- pyobo/api/typedefs.py +1 -3
- pyobo/api/utils.py +61 -5
- pyobo/api/xrefs.py +4 -5
- pyobo/aws.py +3 -5
- pyobo/cli/__init__.py +0 -2
- pyobo/cli/aws.py +0 -2
- pyobo/cli/cli.py +0 -4
- pyobo/cli/database.py +1 -3
- pyobo/cli/lookup.py +0 -2
- pyobo/cli/utils.py +0 -2
- pyobo/constants.py +0 -33
- pyobo/getters.py +19 -26
- pyobo/gilda_utils.py +9 -10
- pyobo/identifier_utils.py +10 -10
- pyobo/mocks.py +5 -6
- pyobo/normalizer.py +24 -24
- pyobo/obographs.py +3 -3
- pyobo/plugins.py +3 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +19 -21
- pyobo/registries/__init__.py +0 -2
- pyobo/registries/metaregistry.py +6 -8
- pyobo/resource_utils.py +1 -3
- pyobo/resources/__init__.py +0 -2
- pyobo/resources/ncbitaxon.py +2 -3
- pyobo/resources/ro.py +2 -4
- pyobo/sources/README.md +15 -0
- pyobo/sources/__init__.py +0 -2
- pyobo/sources/agrovoc.py +3 -3
- pyobo/sources/antibodyregistry.py +2 -3
- pyobo/sources/biogrid.py +4 -4
- pyobo/sources/ccle.py +3 -4
- pyobo/sources/cgnc.py +1 -3
- pyobo/sources/chebi.py +2 -4
- pyobo/sources/chembl.py +1 -3
- pyobo/sources/civic_gene.py +2 -3
- pyobo/sources/complexportal.py +3 -5
- pyobo/sources/conso.py +2 -4
- pyobo/sources/cpt.py +1 -3
- pyobo/sources/credit.py +1 -1
- pyobo/sources/cvx.py +1 -3
- pyobo/sources/depmap.py +3 -4
- pyobo/sources/dictybase_gene.py +1 -3
- pyobo/sources/drugbank.py +6 -7
- pyobo/sources/drugbank_salt.py +3 -4
- pyobo/sources/drugcentral.py +5 -7
- pyobo/sources/expasy.py +11 -12
- pyobo/sources/famplex.py +3 -5
- pyobo/sources/flybase.py +2 -4
- pyobo/sources/geonames.py +1 -1
- pyobo/sources/gmt_utils.py +5 -6
- pyobo/sources/go.py +4 -6
- pyobo/sources/gwascentral_phenotype.py +1 -3
- pyobo/sources/gwascentral_study.py +2 -3
- pyobo/sources/hgnc.py +6 -7
- pyobo/sources/hgncgenefamily.py +2 -4
- pyobo/sources/icd10.py +3 -4
- pyobo/sources/icd11.py +3 -4
- pyobo/sources/icd_utils.py +6 -7
- pyobo/sources/interpro.py +3 -5
- pyobo/sources/itis.py +1 -3
- pyobo/sources/kegg/__init__.py +0 -2
- pyobo/sources/kegg/api.py +3 -4
- pyobo/sources/kegg/genes.py +3 -4
- pyobo/sources/kegg/genome.py +1 -3
- pyobo/sources/kegg/pathway.py +5 -6
- pyobo/sources/mesh.py +19 -21
- pyobo/sources/mgi.py +1 -3
- pyobo/sources/mirbase.py +4 -6
- pyobo/sources/mirbase_constants.py +0 -2
- pyobo/sources/mirbase_family.py +1 -3
- pyobo/sources/mirbase_mature.py +1 -3
- pyobo/sources/msigdb.py +4 -5
- pyobo/sources/ncbigene.py +3 -5
- pyobo/sources/npass.py +1 -3
- pyobo/sources/omim_ps.py +1 -3
- pyobo/sources/pathbank.py +3 -5
- pyobo/sources/pfam.py +1 -3
- pyobo/sources/pfam_clan.py +1 -3
- pyobo/sources/pid.py +3 -5
- pyobo/sources/pombase.py +1 -3
- pyobo/sources/pubchem.py +2 -3
- pyobo/sources/reactome.py +2 -4
- pyobo/sources/rgd.py +2 -3
- pyobo/sources/rhea.py +7 -8
- pyobo/sources/ror.py +3 -2
- pyobo/sources/selventa/__init__.py +0 -2
- pyobo/sources/selventa/schem.py +1 -3
- pyobo/sources/selventa/scomp.py +1 -3
- pyobo/sources/selventa/sdis.py +1 -3
- pyobo/sources/selventa/sfam.py +1 -3
- pyobo/sources/sgd.py +1 -3
- pyobo/sources/slm.py +1 -3
- pyobo/sources/umls/__init__.py +0 -2
- pyobo/sources/umls/__main__.py +0 -2
- pyobo/sources/umls/get_synonym_types.py +1 -1
- pyobo/sources/umls/umls.py +2 -4
- pyobo/sources/uniprot/__init__.py +0 -2
- pyobo/sources/uniprot/uniprot.py +4 -4
- pyobo/sources/uniprot/uniprot_ptm.py +6 -5
- pyobo/sources/utils.py +3 -5
- pyobo/sources/wikipathways.py +1 -3
- pyobo/sources/zfin.py +2 -3
- pyobo/ssg/__init__.py +3 -2
- pyobo/struct/__init__.py +0 -2
- pyobo/struct/reference.py +13 -15
- pyobo/struct/struct.py +102 -96
- pyobo/struct/typedef.py +9 -10
- pyobo/struct/utils.py +0 -2
- pyobo/utils/__init__.py +0 -2
- pyobo/utils/cache.py +14 -6
- pyobo/utils/io.py +9 -10
- pyobo/utils/iter.py +5 -6
- pyobo/utils/misc.py +1 -3
- pyobo/utils/ndex_utils.py +6 -7
- pyobo/utils/path.py +4 -5
- pyobo/version.py +3 -5
- pyobo/xrefdb/__init__.py +0 -2
- pyobo/xrefdb/canonicalizer.py +27 -18
- pyobo/xrefdb/priority.py +0 -2
- pyobo/xrefdb/sources/__init__.py +3 -4
- pyobo/xrefdb/sources/biomappings.py +0 -2
- pyobo/xrefdb/sources/cbms2019.py +0 -2
- pyobo/xrefdb/sources/chembl.py +0 -2
- pyobo/xrefdb/sources/compath.py +1 -3
- pyobo/xrefdb/sources/famplex.py +3 -5
- pyobo/xrefdb/sources/gilda.py +0 -2
- pyobo/xrefdb/sources/intact.py +5 -5
- pyobo/xrefdb/sources/ncit.py +1 -3
- pyobo/xrefdb/sources/pubchem.py +2 -5
- pyobo/xrefdb/sources/wikidata.py +2 -4
- pyobo/xrefdb/xrefs_pipeline.py +15 -16
- {pyobo-0.10.12.dist-info → pyobo-0.11.0.dist-info}/LICENSE +1 -1
- pyobo-0.11.0.dist-info/METADATA +723 -0
- pyobo-0.11.0.dist-info/RECORD +171 -0
- {pyobo-0.10.12.dist-info → pyobo-0.11.0.dist-info}/WHEEL +1 -1
- pyobo-0.11.0.dist-info/entry_points.txt +2 -0
- pyobo-0.10.12.dist-info/METADATA +0 -499
- pyobo-0.10.12.dist-info/RECORD +0 -169
- pyobo-0.10.12.dist-info/entry_points.txt +0 -15
- {pyobo-0.10.12.dist-info → pyobo-0.11.0.dist-info}/top_level.txt +0 -0
pyobo/aws.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Interface for caching data on AWS S3."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
import os
|
|
7
|
-
from typing import Optional
|
|
5
|
+
from typing import Optional
|
|
8
6
|
|
|
9
7
|
import boto3
|
|
10
8
|
import humanize
|
|
@@ -57,8 +55,8 @@ def download_artifacts(bucket: str, suffix: Optional[str] = None) -> None:
|
|
|
57
55
|
|
|
58
56
|
def upload_artifacts(
|
|
59
57
|
bucket: str,
|
|
60
|
-
whitelist: Optional[
|
|
61
|
-
blacklist: Optional[
|
|
58
|
+
whitelist: Optional[set[str]] = None,
|
|
59
|
+
blacklist: Optional[set[str]] = None,
|
|
62
60
|
s3_client=None,
|
|
63
61
|
) -> None:
|
|
64
62
|
"""Upload all artifacts to AWS."""
|
pyobo/cli/__init__.py
CHANGED
pyobo/cli/aws.py
CHANGED
pyobo/cli/cli.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""CLI for PyOBO."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
@@ -15,7 +13,6 @@ from tabulate import tabulate
|
|
|
15
13
|
from .aws import main as aws_main
|
|
16
14
|
from .database import main as database_main
|
|
17
15
|
from .lookup import lookup
|
|
18
|
-
from ..apps.cli import main as apps_main
|
|
19
16
|
from ..constants import RAW_DIRECTORY
|
|
20
17
|
from ..plugins import has_nomenclature_plugin, iter_nomenclature_plugins
|
|
21
18
|
from ..registries import iter_cached_obo
|
|
@@ -116,7 +113,6 @@ def remapping(file):
|
|
|
116
113
|
|
|
117
114
|
|
|
118
115
|
main.add_command(lookup)
|
|
119
|
-
main.add_command(apps_main)
|
|
120
116
|
main.add_command(aws_main)
|
|
121
117
|
main.add_command(database_main)
|
|
122
118
|
|
pyobo/cli/database.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""CLI for PyOBO Database Generation."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
@@ -317,7 +315,7 @@ def properties(directory: str, zenodo: bool, force: bool, no_strict: bool):
|
|
|
317
315
|
@zenodo_option
|
|
318
316
|
@force_option
|
|
319
317
|
@no_strict_option
|
|
320
|
-
def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool):
|
|
318
|
+
def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool):
|
|
321
319
|
"""Make the prefix-identifier-xref dump."""
|
|
322
320
|
with logging_redirect_tqdm():
|
|
323
321
|
paths = db_output_helper(
|
pyobo/cli/lookup.py
CHANGED
pyobo/cli/utils.py
CHANGED
pyobo/constants.py
CHANGED
|
@@ -1,10 +1,6 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Constants for PyOBO."""
|
|
4
2
|
|
|
5
|
-
import json
|
|
6
3
|
import logging
|
|
7
|
-
import os
|
|
8
4
|
import re
|
|
9
5
|
|
|
10
6
|
import pystow
|
|
@@ -13,7 +9,6 @@ __all__ = [
|
|
|
13
9
|
"RAW_DIRECTORY",
|
|
14
10
|
"DATABASE_DIRECTORY",
|
|
15
11
|
"SPECIES_REMAPPING",
|
|
16
|
-
"VERSION_PINS",
|
|
17
12
|
]
|
|
18
13
|
|
|
19
14
|
logger = logging.getLogger(__name__)
|
|
@@ -101,31 +96,3 @@ PROVENANCE_PREFIXES = {
|
|
|
101
96
|
"isbn",
|
|
102
97
|
"issn",
|
|
103
98
|
}
|
|
104
|
-
|
|
105
|
-
# Load version pin dictionary from the environmental variable VERSION_PINS
|
|
106
|
-
try:
|
|
107
|
-
VERSION_PINS_STR = os.getenv("VERSION_PINS")
|
|
108
|
-
if not VERSION_PINS_STR:
|
|
109
|
-
VERSION_PINS = {}
|
|
110
|
-
else:
|
|
111
|
-
VERSION_PINS = json.loads(VERSION_PINS_STR)
|
|
112
|
-
for k, v in VERSION_PINS.items():
|
|
113
|
-
if not isinstance(k, str) or not isinstance(v, str):
|
|
114
|
-
logger.error("The prefix and version name must both be " "strings")
|
|
115
|
-
VERSION_PINS = {}
|
|
116
|
-
break
|
|
117
|
-
except ValueError as e:
|
|
118
|
-
logger.error(
|
|
119
|
-
"The value for the environment variable VERSION_PINS must be a valid JSON string: %s" % e
|
|
120
|
-
)
|
|
121
|
-
VERSION_PINS = {}
|
|
122
|
-
|
|
123
|
-
if VERSION_PINS:
|
|
124
|
-
logger.debug(
|
|
125
|
-
f"These are the resource versions that are pinned.\n{VERSION_PINS}. "
|
|
126
|
-
f"\nPyobo will download the latest version of a resource if it's "
|
|
127
|
-
f"not pinned.\nIf you want to use a specific version of a "
|
|
128
|
-
f"resource, edit your VERSION_PINS environmental "
|
|
129
|
-
f"variable which is a JSON string to include a prefix and version "
|
|
130
|
-
f"name."
|
|
131
|
-
)
|
pyobo/getters.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities for OBO files."""
|
|
4
2
|
|
|
5
3
|
import datetime
|
|
@@ -11,16 +9,11 @@ import subprocess
|
|
|
11
9
|
import typing
|
|
12
10
|
import urllib.error
|
|
13
11
|
from collections import Counter
|
|
12
|
+
from collections.abc import Iterable, Mapping, Sequence
|
|
14
13
|
from pathlib import Path
|
|
15
14
|
from typing import (
|
|
16
15
|
Callable,
|
|
17
|
-
Iterable,
|
|
18
|
-
List,
|
|
19
|
-
Mapping,
|
|
20
16
|
Optional,
|
|
21
|
-
Sequence,
|
|
22
|
-
Set,
|
|
23
|
-
Tuple,
|
|
24
17
|
TypeVar,
|
|
25
18
|
Union,
|
|
26
19
|
)
|
|
@@ -30,7 +23,7 @@ from bioontologies import robot
|
|
|
30
23
|
from tqdm.auto import tqdm
|
|
31
24
|
|
|
32
25
|
from .constants import DATABASE_DIRECTORY
|
|
33
|
-
from .identifier_utils import
|
|
26
|
+
from .identifier_utils import MissingPrefixError, wrap_norm_prefix
|
|
34
27
|
from .plugins import has_nomenclature_plugin, run_nomenclature_plugin
|
|
35
28
|
from .struct import Obo
|
|
36
29
|
from .utils.io import get_writer
|
|
@@ -39,17 +32,17 @@ from .version import get_git_hash, get_version
|
|
|
39
32
|
|
|
40
33
|
__all__ = [
|
|
41
34
|
"get_ontology",
|
|
42
|
-
"
|
|
35
|
+
"NoBuildError",
|
|
43
36
|
]
|
|
44
37
|
|
|
45
38
|
logger = logging.getLogger(__name__)
|
|
46
39
|
|
|
47
40
|
|
|
48
|
-
class
|
|
41
|
+
class NoBuildError(RuntimeError):
|
|
49
42
|
"""Base exception for being unable to build."""
|
|
50
43
|
|
|
51
44
|
|
|
52
|
-
class
|
|
45
|
+
class UnhandledFormatError(NoBuildError):
|
|
53
46
|
"""Only OWL is available."""
|
|
54
47
|
|
|
55
48
|
|
|
@@ -117,7 +110,7 @@ def get_ontology(
|
|
|
117
110
|
|
|
118
111
|
ontology_format, path = _ensure_ontology_path(prefix, force=force, version=version)
|
|
119
112
|
if path is None:
|
|
120
|
-
raise
|
|
113
|
+
raise NoBuildError(prefix)
|
|
121
114
|
elif ontology_format == "obo":
|
|
122
115
|
pass # all gucci
|
|
123
116
|
elif ontology_format == "owl":
|
|
@@ -127,7 +120,7 @@ def get_ontology(
|
|
|
127
120
|
robot.convert(path, _converted_obo_path, check=robot_check)
|
|
128
121
|
path = _converted_obo_path
|
|
129
122
|
else:
|
|
130
|
-
raise
|
|
123
|
+
raise UnhandledFormatError(f"[{prefix}] unhandled ontology file format: {path.suffix}")
|
|
131
124
|
|
|
132
125
|
from .reader import from_obo_path
|
|
133
126
|
|
|
@@ -147,8 +140,8 @@ def get_ontology(
|
|
|
147
140
|
|
|
148
141
|
def _ensure_ontology_path(
|
|
149
142
|
prefix: str, force, version
|
|
150
|
-
) -> Union[
|
|
151
|
-
for ontology_format, url in [
|
|
143
|
+
) -> Union[tuple[str, Path], tuple[None, None]]:
|
|
144
|
+
for ontology_format, url in [
|
|
152
145
|
("obo", bioregistry.get_obo_download(prefix)),
|
|
153
146
|
("owl", bioregistry.get_owl_download(prefix)),
|
|
154
147
|
("json", bioregistry.get_json_download(prefix)),
|
|
@@ -246,7 +239,7 @@ def iter_helper(
|
|
|
246
239
|
leave: bool = False,
|
|
247
240
|
strict: bool = True,
|
|
248
241
|
**kwargs,
|
|
249
|
-
) -> Iterable[
|
|
242
|
+
) -> Iterable[tuple[str, str, X]]:
|
|
250
243
|
"""Yield all mappings extracted from each database given."""
|
|
251
244
|
for prefix, mapping in iter_helper_helper(f, strict=strict, **kwargs):
|
|
252
245
|
it = tqdm(
|
|
@@ -266,7 +259,7 @@ def _prefixes(
|
|
|
266
259
|
skip_below: Optional[str] = None,
|
|
267
260
|
skip_below_inclusive: bool = True,
|
|
268
261
|
skip_pyobo: bool = False,
|
|
269
|
-
skip_set: Optional[
|
|
262
|
+
skip_set: Optional[set[str]] = None,
|
|
270
263
|
) -> Iterable[str]:
|
|
271
264
|
for prefix, resource in sorted(bioregistry.read_registry().items()):
|
|
272
265
|
if resource.no_own_terms:
|
|
@@ -299,10 +292,10 @@ def iter_helper_helper(
|
|
|
299
292
|
skip_below: Optional[str] = None,
|
|
300
293
|
skip_below_inclusive: bool = True,
|
|
301
294
|
skip_pyobo: bool = False,
|
|
302
|
-
skip_set: Optional[
|
|
295
|
+
skip_set: Optional[set[str]] = None,
|
|
303
296
|
strict: bool = True,
|
|
304
297
|
**kwargs,
|
|
305
|
-
) -> Iterable[
|
|
298
|
+
) -> Iterable[tuple[str, X]]:
|
|
306
299
|
"""Yield all mappings extracted from each database given.
|
|
307
300
|
|
|
308
301
|
:param f: A function that takes a prefix and gives back something that will be used by an outer function.
|
|
@@ -342,13 +335,13 @@ def iter_helper_helper(
|
|
|
342
335
|
logger.warning("[%s] unable to download", prefix)
|
|
343
336
|
if strict and not bioregistry.is_deprecated(prefix):
|
|
344
337
|
raise
|
|
345
|
-
except
|
|
338
|
+
except MissingPrefixError as e:
|
|
346
339
|
logger.warning("[%s] missing prefix: %s", prefix, e)
|
|
347
340
|
if strict and not bioregistry.is_deprecated(prefix):
|
|
348
341
|
raise e
|
|
349
342
|
except subprocess.CalledProcessError:
|
|
350
343
|
logger.warning("[%s] ROBOT was unable to convert OWL to OBO", prefix)
|
|
351
|
-
except
|
|
344
|
+
except UnhandledFormatError as e:
|
|
352
345
|
logger.warning("[%s] %s", prefix, e)
|
|
353
346
|
except ValueError as e:
|
|
354
347
|
if _is_xml(e):
|
|
@@ -390,7 +383,7 @@ def _prep_dir(directory: Union[None, str, pathlib.Path]) -> pathlib.Path:
|
|
|
390
383
|
|
|
391
384
|
|
|
392
385
|
def db_output_helper(
|
|
393
|
-
f: Callable[..., Iterable[
|
|
386
|
+
f: Callable[..., Iterable[tuple[str, ...]]],
|
|
394
387
|
db_name: str,
|
|
395
388
|
columns: Sequence[str],
|
|
396
389
|
*,
|
|
@@ -399,7 +392,7 @@ def db_output_helper(
|
|
|
399
392
|
use_gzip: bool = True,
|
|
400
393
|
summary_detailed: Optional[Sequence[int]] = None,
|
|
401
394
|
**kwargs,
|
|
402
|
-
) ->
|
|
395
|
+
) -> list[pathlib.Path]:
|
|
403
396
|
"""Help output database builds.
|
|
404
397
|
|
|
405
398
|
:param f: A function that takes a prefix and gives back something that will be used by an outer function.
|
|
@@ -413,7 +406,7 @@ def db_output_helper(
|
|
|
413
406
|
directory = _prep_dir(directory)
|
|
414
407
|
|
|
415
408
|
c: typing.Counter[str] = Counter()
|
|
416
|
-
c_detailed: typing.Counter[
|
|
409
|
+
c_detailed: typing.Counter[tuple[str, ...]] = Counter()
|
|
417
410
|
|
|
418
411
|
if use_gzip:
|
|
419
412
|
db_path = directory.joinpath(f"{db_name}.tsv.gz")
|
|
@@ -475,7 +468,7 @@ def db_output_helper(
|
|
|
475
468
|
indent=2,
|
|
476
469
|
)
|
|
477
470
|
|
|
478
|
-
rv:
|
|
471
|
+
rv: list[pathlib.Path] = [
|
|
479
472
|
db_metadata_path,
|
|
480
473
|
db_path,
|
|
481
474
|
db_sample_path,
|
pyobo/gilda_utils.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""PyOBO's Gilda utilities."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
4
|
+
from collections.abc import Iterable
|
|
6
5
|
from subprocess import CalledProcessError
|
|
7
|
-
from typing import
|
|
6
|
+
from typing import Optional, Union
|
|
8
7
|
|
|
9
8
|
import bioregistry
|
|
10
9
|
import gilda.api
|
|
@@ -22,7 +21,7 @@ from pyobo import (
|
|
|
22
21
|
get_ids,
|
|
23
22
|
get_obsolete,
|
|
24
23
|
)
|
|
25
|
-
from pyobo.getters import
|
|
24
|
+
from pyobo.getters import NoBuildError
|
|
26
25
|
from pyobo.utils.io import multidict
|
|
27
26
|
|
|
28
27
|
__all__ = [
|
|
@@ -41,7 +40,7 @@ def iter_gilda_prediction_tuples(
|
|
|
41
40
|
grounder: Optional[Grounder] = None,
|
|
42
41
|
identifiers_are_names: bool = False,
|
|
43
42
|
strict: bool = False,
|
|
44
|
-
) -> Iterable[
|
|
43
|
+
) -> Iterable[tuple[str, str, str, str, str, str, str, str, float]]:
|
|
45
44
|
"""Iterate over prediction tuples for a given prefix."""
|
|
46
45
|
if grounder is None:
|
|
47
46
|
grounder = gilda.api.grounder
|
|
@@ -94,7 +93,7 @@ def get_grounder(
|
|
|
94
93
|
prefixes: Union[str, Iterable[str]],
|
|
95
94
|
*,
|
|
96
95
|
unnamed: Optional[Iterable[str]] = None,
|
|
97
|
-
grounder_cls: Optional[
|
|
96
|
+
grounder_cls: Optional[type[Grounder]] = None,
|
|
98
97
|
versions: Union[None, str, Iterable[Union[str, None]]] = None,
|
|
99
98
|
strict: bool = True,
|
|
100
99
|
skip_obsolete: bool = False,
|
|
@@ -115,7 +114,7 @@ def get_grounder(
|
|
|
115
114
|
if len(prefixes) != len(versions):
|
|
116
115
|
raise ValueError
|
|
117
116
|
|
|
118
|
-
terms:
|
|
117
|
+
terms: list[gilda.term.Term] = []
|
|
119
118
|
for prefix, version in zip(tqdm(prefixes, leave=False, disable=not progress), versions):
|
|
120
119
|
try:
|
|
121
120
|
p_terms = list(
|
|
@@ -128,7 +127,7 @@ def get_grounder(
|
|
|
128
127
|
progress=progress,
|
|
129
128
|
)
|
|
130
129
|
)
|
|
131
|
-
except (
|
|
130
|
+
except (NoBuildError, CalledProcessError):
|
|
132
131
|
continue
|
|
133
132
|
else:
|
|
134
133
|
terms.extend(p_terms)
|
|
@@ -251,7 +250,7 @@ def get_gilda_terms(
|
|
|
251
250
|
|
|
252
251
|
|
|
253
252
|
def get_gilda_term_subset(
|
|
254
|
-
source: str, ancestors: Union[str,
|
|
253
|
+
source: str, ancestors: Union[str, list[str]], **kwargs
|
|
255
254
|
) -> Iterable[gilda.term.Term]:
|
|
256
255
|
"""Get a subset of terms."""
|
|
257
256
|
subset = {
|
|
@@ -264,7 +263,7 @@ def get_gilda_term_subset(
|
|
|
264
263
|
yield term
|
|
265
264
|
|
|
266
265
|
|
|
267
|
-
def _ensure_list(s: Union[str,
|
|
266
|
+
def _ensure_list(s: Union[str, list[str]]) -> list[str]:
|
|
268
267
|
if isinstance(s, str):
|
|
269
268
|
return [s]
|
|
270
269
|
return s
|
pyobo/identifier_utils.py
CHANGED
|
@@ -1,12 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities for handling prefixes."""
|
|
4
2
|
|
|
5
3
|
from __future__ import annotations
|
|
6
4
|
|
|
7
5
|
import logging
|
|
8
6
|
from functools import wraps
|
|
9
|
-
from typing import Optional, Tuple, Union
|
|
10
7
|
|
|
11
8
|
import bioregistry
|
|
12
9
|
from curies import Reference, ReferenceTuple
|
|
@@ -28,10 +25,15 @@ __all__ = [
|
|
|
28
25
|
logger = logging.getLogger(__name__)
|
|
29
26
|
|
|
30
27
|
|
|
31
|
-
class
|
|
28
|
+
class MissingPrefixError(ValueError):
|
|
32
29
|
"""Raised on a missing prefix."""
|
|
33
30
|
|
|
34
|
-
|
|
31
|
+
reference: Reference | None
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self, prefix: str, curie: str, xref: str | None = None, ontology: str | None = None
|
|
35
|
+
):
|
|
36
|
+
"""Initialize the error."""
|
|
35
37
|
self.prefix = prefix
|
|
36
38
|
self.curie = curie
|
|
37
39
|
self.xref = xref
|
|
@@ -50,13 +52,13 @@ class MissingPrefix(ValueError):
|
|
|
50
52
|
return s
|
|
51
53
|
|
|
52
54
|
|
|
53
|
-
def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True) ->
|
|
55
|
+
def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True) -> str | None:
|
|
54
56
|
"""Normalize a namespace and return, if possible."""
|
|
55
57
|
norm_prefix = bioregistry.normalize_prefix(prefix)
|
|
56
58
|
if norm_prefix is not None:
|
|
57
59
|
return norm_prefix
|
|
58
60
|
elif strict:
|
|
59
|
-
raise
|
|
61
|
+
raise MissingPrefixError(prefix=prefix, curie=curie, xref=xref)
|
|
60
62
|
else:
|
|
61
63
|
return None
|
|
62
64
|
|
|
@@ -64,9 +66,7 @@ def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True
|
|
|
64
66
|
BAD_CURIES = set()
|
|
65
67
|
|
|
66
68
|
|
|
67
|
-
def normalize_curie(
|
|
68
|
-
curie: str, *, strict: bool = True
|
|
69
|
-
) -> Union[Tuple[str, str], Tuple[None, None]]:
|
|
69
|
+
def normalize_curie(curie: str, *, strict: bool = True) -> tuple[str, str] | tuple[None, None]:
|
|
70
70
|
"""Parse a string that looks like a CURIE.
|
|
71
71
|
|
|
72
72
|
:param curie: A compact uniform resource identifier (CURIE)
|
pyobo/mocks.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Mocks for PyOBO."""
|
|
4
2
|
|
|
5
|
-
from
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from typing import Optional, TypeVar, Union
|
|
6
5
|
from unittest import mock
|
|
7
6
|
|
|
8
7
|
import pandas as pd
|
|
@@ -25,7 +24,7 @@ def get_mock_id_name_mapping(data: Mapping[str, Mapping[str, str]]) -> mock._pat
|
|
|
25
24
|
return _replace_mapping_getter("pyobo.api.names.get_id_name_mapping", data)
|
|
26
25
|
|
|
27
26
|
|
|
28
|
-
def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str,
|
|
27
|
+
def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, list[str]]]) -> mock._patch:
|
|
29
28
|
"""Mock the :func:`pyobo.extract.get_id_synonyms_mapping` function.
|
|
30
29
|
|
|
31
30
|
:param data: A mapping from prefix to mappings of identifier to lists of synonyms.
|
|
@@ -33,7 +32,7 @@ def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, List[str]]]) ->
|
|
|
33
32
|
return _replace_mapping_getter("pyobo.api.names.get_id_synonyms_mapping", data)
|
|
34
33
|
|
|
35
34
|
|
|
36
|
-
def get_mock_id_alts_mapping(data: Mapping[str, Mapping[str,
|
|
35
|
+
def get_mock_id_alts_mapping(data: Mapping[str, Mapping[str, list[str]]]) -> mock._patch:
|
|
37
36
|
"""Mock the :func:`pyobo.extract.get_id_to_alts` function.
|
|
38
37
|
|
|
39
38
|
:param data: A mapping from prefix to mappings of identifier to lists of alternative identifiers.
|
|
@@ -52,7 +51,7 @@ def _replace_mapping_getter(name: str, data: Mapping[str, Mapping[str, X]]) -> m
|
|
|
52
51
|
|
|
53
52
|
|
|
54
53
|
def get_mock_get_xrefs_df(
|
|
55
|
-
df: Union[
|
|
54
|
+
df: Union[list[tuple[str, str, str, str, str]], pd.DataFrame],
|
|
56
55
|
) -> mock._patch:
|
|
57
56
|
"""Mock the :func:`pyobo.xrefsdb.xrefs_pipeline.get_xref_df` function.
|
|
58
57
|
|
pyobo/normalizer.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Use synonyms from OBO to normalize names."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from abc import ABC, abstractmethod
|
|
5
|
+
from collections.abc import Iterable, Mapping
|
|
7
6
|
from dataclasses import dataclass
|
|
8
7
|
from functools import lru_cache
|
|
9
|
-
from typing import
|
|
8
|
+
from typing import Optional, Union
|
|
10
9
|
|
|
11
10
|
import bioregistry
|
|
12
11
|
|
|
@@ -23,29 +22,29 @@ __all__ = [
|
|
|
23
22
|
|
|
24
23
|
logger = logging.getLogger(__name__)
|
|
25
24
|
|
|
26
|
-
NormalizationSuccess =
|
|
27
|
-
NormalizationFailure =
|
|
25
|
+
NormalizationSuccess = tuple[str, str, str]
|
|
26
|
+
NormalizationFailure = tuple[None, None, str]
|
|
28
27
|
NormalizationResult = Union[NormalizationSuccess, NormalizationFailure]
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
class Normalizer(ABC):
|
|
32
31
|
"""A normalizer."""
|
|
33
32
|
|
|
34
|
-
id_to_name:
|
|
35
|
-
id_to_synonyms:
|
|
33
|
+
id_to_name: dict[str, str]
|
|
34
|
+
id_to_synonyms: dict[str, list[str]]
|
|
36
35
|
|
|
37
36
|
#: A mapping from all synonyms to the set of identifiers that they point to.
|
|
38
37
|
#: In a perfect world, each would only be a single element.
|
|
39
|
-
synonym_to_identifiers_mapping:
|
|
38
|
+
synonym_to_identifiers_mapping: dict[str, set[str]]
|
|
40
39
|
#: A mapping from normalized names to the actual ones that they came from
|
|
41
|
-
norm_name_to_name:
|
|
40
|
+
norm_name_to_name: dict[str, set[str]]
|
|
42
41
|
|
|
43
42
|
def __init__(
|
|
44
43
|
self,
|
|
45
|
-
id_to_name:
|
|
46
|
-
id_to_synonyms:
|
|
44
|
+
id_to_name: dict[str, str],
|
|
45
|
+
id_to_synonyms: dict[str, list[str]],
|
|
47
46
|
remove_prefix: Optional[str] = None,
|
|
48
|
-
) -> None:
|
|
47
|
+
) -> None:
|
|
49
48
|
"""Initialize the normalizer.
|
|
50
49
|
|
|
51
50
|
:param id_to_name: An identifier to name dictionary.
|
|
@@ -64,7 +63,7 @@ class Normalizer(ABC):
|
|
|
64
63
|
self.norm_name_to_name = self._get_norm_name_to_names(self.synonym_to_identifiers_mapping)
|
|
65
64
|
|
|
66
65
|
@classmethod
|
|
67
|
-
def _get_norm_name_to_names(cls, synonyms: Iterable[str]) ->
|
|
66
|
+
def _get_norm_name_to_names(cls, synonyms: Iterable[str]) -> dict[str, set[str]]:
|
|
68
67
|
return multisetdict((cls._normalize_text(synonym), synonym) for synonym in synonyms)
|
|
69
68
|
|
|
70
69
|
@staticmethod
|
|
@@ -81,7 +80,7 @@ class Normalizer(ABC):
|
|
|
81
80
|
id_to_name: Mapping[str, str],
|
|
82
81
|
id_to_synonyms: Mapping[str, Iterable[str]],
|
|
83
82
|
remove_prefix: Optional[str] = None,
|
|
84
|
-
) -> Iterable[
|
|
83
|
+
) -> Iterable[tuple[str, str]]:
|
|
85
84
|
if remove_prefix is not None:
|
|
86
85
|
remove_prefix = f'{remove_prefix.lower().rstrip(":")}:'
|
|
87
86
|
|
|
@@ -101,7 +100,7 @@ class Normalizer(ABC):
|
|
|
101
100
|
# it might overwrite but this is probably always due to alternate ids
|
|
102
101
|
yield synonym, identifier
|
|
103
102
|
|
|
104
|
-
def get_names(self, query: str) ->
|
|
103
|
+
def get_names(self, query: str) -> list[str]:
|
|
105
104
|
"""Get all names to which the query text maps."""
|
|
106
105
|
norm_text = self._normalize_text(query)
|
|
107
106
|
return list(self.norm_name_to_name.get(norm_text, []))
|
|
@@ -112,7 +111,7 @@ class Normalizer(ABC):
|
|
|
112
111
|
raise NotImplementedError
|
|
113
112
|
|
|
114
113
|
|
|
115
|
-
@lru_cache
|
|
114
|
+
@lru_cache
|
|
116
115
|
def get_normalizer(prefix: str) -> Normalizer:
|
|
117
116
|
"""Get an OBO normalizer."""
|
|
118
117
|
norm_prefix = bioregistry.normalize_prefix(prefix)
|
|
@@ -149,7 +148,8 @@ def ground(prefix: Union[str, Iterable[str]], query: str) -> NormalizationResult
|
|
|
149
148
|
class OboNormalizer(Normalizer):
|
|
150
149
|
"""A utility for normalizing by names."""
|
|
151
150
|
|
|
152
|
-
def __init__(self, prefix: str) -> None:
|
|
151
|
+
def __init__(self, prefix: str) -> None:
|
|
152
|
+
"""Initialize the normalizer by an ontology's Bioregistry prefix."""
|
|
153
153
|
self.prefix = prefix
|
|
154
154
|
self._len_prefix = len(prefix)
|
|
155
155
|
id_to_name = names.get_id_name_mapping(prefix)
|
|
@@ -160,7 +160,7 @@ class OboNormalizer(Normalizer):
|
|
|
160
160
|
remove_prefix=prefix,
|
|
161
161
|
)
|
|
162
162
|
|
|
163
|
-
def __repr__(self) -> str:
|
|
163
|
+
def __repr__(self) -> str:
|
|
164
164
|
return f'OboNormalizer(prefix="{self.prefix}")'
|
|
165
165
|
|
|
166
166
|
def normalize(self, query: str) -> NormalizationResult:
|
|
@@ -188,20 +188,20 @@ class MultiNormalizer:
|
|
|
188
188
|
If you're looking for taxa of exotic plants, you might use:
|
|
189
189
|
|
|
190
190
|
>>> from pyobo.normalizer import MultiNormalizer
|
|
191
|
-
>>> normalizer = MultiNormalizer(prefixes=[
|
|
192
|
-
>>> normalizer.normalize(
|
|
191
|
+
>>> normalizer = MultiNormalizer(prefixes=["ncbitaxon", "itis"])
|
|
192
|
+
>>> normalizer.normalize("Homo sapiens")
|
|
193
193
|
('ncbitaxon', '9606', 'Homo sapiens')
|
|
194
|
-
>>> normalizer.normalize(
|
|
194
|
+
>>> normalizer.normalize("Abies bifolia") # variety not listed in NCBI
|
|
195
195
|
('itis', '507501', 'Abies bifolia')
|
|
196
|
-
>>> normalizer.normalize(
|
|
196
|
+
>>> normalizer.normalize("vulcan") # nice try, nerds
|
|
197
197
|
(None, None, None)
|
|
198
198
|
"""
|
|
199
199
|
|
|
200
200
|
#: The normalizers for each prefix
|
|
201
|
-
normalizers:
|
|
201
|
+
normalizers: list[Normalizer]
|
|
202
202
|
|
|
203
203
|
@staticmethod
|
|
204
|
-
def from_prefixes(prefixes:
|
|
204
|
+
def from_prefixes(prefixes: list[str]) -> "MultiNormalizer":
|
|
205
205
|
"""Instantiate normalizers based on the given prefixes, in preferred order.."""
|
|
206
206
|
return MultiNormalizer([get_normalizer(prefix) for prefix in prefixes])
|
|
207
207
|
|
pyobo/obographs.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Convert PyOBO into OBO Graph."""
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import Iterable
|
|
4
4
|
|
|
5
5
|
import bioregistry
|
|
6
6
|
import curies
|
|
@@ -35,8 +35,8 @@ def parse_results_from_obo(obo: Obo) -> ParseResults:
|
|
|
35
35
|
|
|
36
36
|
def graph_from_obo(obo: Obo) -> Graph:
|
|
37
37
|
"""Get an OBO Graph object from a PyOBO object."""
|
|
38
|
-
nodes:
|
|
39
|
-
edges:
|
|
38
|
+
nodes: list[Node] = []
|
|
39
|
+
edges: list[Edge] = []
|
|
40
40
|
for term in obo:
|
|
41
41
|
nodes.append(_get_class_node(term))
|
|
42
42
|
edges.extend(_iter_edges(term))
|
pyobo/plugins.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Tools for loading entry points."""
|
|
4
2
|
|
|
3
|
+
from collections.abc import Iterable, Mapping
|
|
5
4
|
from functools import lru_cache
|
|
6
|
-
from typing import Callable,
|
|
5
|
+
from typing import Callable, Optional
|
|
7
6
|
|
|
8
7
|
from .struct import Obo
|
|
9
8
|
|
|
@@ -14,7 +13,7 @@ __all__ = [
|
|
|
14
13
|
]
|
|
15
14
|
|
|
16
15
|
|
|
17
|
-
@lru_cache
|
|
16
|
+
@lru_cache
|
|
18
17
|
def _get_nomenclature_plugins() -> Mapping[str, Callable[[], Obo]]:
|
|
19
18
|
from .sources import ontology_resolver
|
|
20
19
|
|
pyobo/py.typed
ADDED
|
File without changes
|