pyobo 0.10.11__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +0 -2
- pyobo/__main__.py +0 -2
- pyobo/api/__init__.py +0 -2
- pyobo/api/alts.py +6 -7
- pyobo/api/hierarchy.py +14 -15
- pyobo/api/metadata.py +3 -4
- pyobo/api/names.py +51 -31
- pyobo/api/properties.py +6 -7
- pyobo/api/relations.py +12 -11
- pyobo/api/species.py +5 -6
- pyobo/api/typedefs.py +1 -3
- pyobo/api/utils.py +63 -2
- pyobo/api/xrefs.py +4 -5
- pyobo/aws.py +3 -5
- pyobo/cli/__init__.py +0 -2
- pyobo/cli/aws.py +0 -2
- pyobo/cli/cli.py +0 -4
- pyobo/cli/database.py +1 -3
- pyobo/cli/lookup.py +2 -4
- pyobo/cli/utils.py +0 -2
- pyobo/constants.py +0 -3
- pyobo/getters.py +19 -26
- pyobo/gilda_utils.py +28 -8
- pyobo/identifier_utils.py +32 -15
- pyobo/mocks.py +5 -6
- pyobo/normalizer.py +24 -24
- pyobo/obographs.py +3 -3
- pyobo/plugins.py +3 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +19 -21
- pyobo/registries/__init__.py +0 -2
- pyobo/registries/metaregistry.py +6 -8
- pyobo/resource_utils.py +1 -3
- pyobo/resources/__init__.py +0 -2
- pyobo/resources/ncbitaxon.py +2 -3
- pyobo/resources/ro.py +2 -4
- pyobo/sources/README.md +15 -0
- pyobo/sources/__init__.py +2 -2
- pyobo/sources/agrovoc.py +3 -3
- pyobo/sources/antibodyregistry.py +4 -5
- pyobo/sources/biogrid.py +7 -7
- pyobo/sources/ccle.py +3 -4
- pyobo/sources/cgnc.py +1 -3
- pyobo/sources/chebi.py +2 -4
- pyobo/sources/chembl.py +1 -3
- pyobo/sources/civic_gene.py +2 -3
- pyobo/sources/complexportal.py +3 -5
- pyobo/sources/conso.py +2 -4
- pyobo/sources/cpt.py +1 -3
- pyobo/sources/credit.py +68 -0
- pyobo/sources/cvx.py +1 -3
- pyobo/sources/depmap.py +3 -4
- pyobo/sources/dictybase_gene.py +1 -3
- pyobo/sources/drugbank.py +6 -7
- pyobo/sources/drugbank_salt.py +3 -4
- pyobo/sources/drugcentral.py +5 -7
- pyobo/sources/expasy.py +11 -12
- pyobo/sources/famplex.py +3 -5
- pyobo/sources/flybase.py +2 -4
- pyobo/sources/geonames.py +28 -10
- pyobo/sources/gmt_utils.py +5 -6
- pyobo/sources/go.py +4 -6
- pyobo/sources/gwascentral_phenotype.py +1 -3
- pyobo/sources/gwascentral_study.py +2 -3
- pyobo/sources/hgnc.py +8 -9
- pyobo/sources/hgncgenefamily.py +2 -4
- pyobo/sources/icd10.py +3 -4
- pyobo/sources/icd11.py +3 -4
- pyobo/sources/icd_utils.py +6 -7
- pyobo/sources/interpro.py +3 -5
- pyobo/sources/itis.py +1 -3
- pyobo/sources/kegg/__init__.py +0 -2
- pyobo/sources/kegg/api.py +3 -4
- pyobo/sources/kegg/genes.py +3 -4
- pyobo/sources/kegg/genome.py +1 -3
- pyobo/sources/kegg/pathway.py +5 -6
- pyobo/sources/mesh.py +19 -21
- pyobo/sources/mgi.py +1 -3
- pyobo/sources/mirbase.py +4 -6
- pyobo/sources/mirbase_constants.py +0 -2
- pyobo/sources/mirbase_family.py +1 -3
- pyobo/sources/mirbase_mature.py +1 -3
- pyobo/sources/msigdb.py +4 -5
- pyobo/sources/ncbigene.py +3 -5
- pyobo/sources/npass.py +2 -4
- pyobo/sources/omim_ps.py +1 -3
- pyobo/sources/pathbank.py +3 -5
- pyobo/sources/pfam.py +1 -3
- pyobo/sources/pfam_clan.py +1 -3
- pyobo/sources/pid.py +3 -5
- pyobo/sources/pombase.py +1 -3
- pyobo/sources/pubchem.py +5 -6
- pyobo/sources/reactome.py +2 -4
- pyobo/sources/rgd.py +3 -4
- pyobo/sources/rhea.py +9 -10
- pyobo/sources/ror.py +69 -22
- pyobo/sources/selventa/__init__.py +0 -2
- pyobo/sources/selventa/schem.py +1 -3
- pyobo/sources/selventa/scomp.py +1 -3
- pyobo/sources/selventa/sdis.py +1 -3
- pyobo/sources/selventa/sfam.py +1 -3
- pyobo/sources/sgd.py +1 -3
- pyobo/sources/slm.py +1 -3
- pyobo/sources/umls/__init__.py +0 -2
- pyobo/sources/umls/__main__.py +0 -2
- pyobo/sources/umls/get_synonym_types.py +1 -1
- pyobo/sources/umls/umls.py +2 -4
- pyobo/sources/uniprot/__init__.py +0 -2
- pyobo/sources/uniprot/uniprot.py +6 -6
- pyobo/sources/uniprot/uniprot_ptm.py +6 -5
- pyobo/sources/utils.py +3 -5
- pyobo/sources/wikipathways.py +1 -3
- pyobo/sources/zfin.py +2 -3
- pyobo/ssg/__init__.py +3 -2
- pyobo/struct/__init__.py +0 -2
- pyobo/struct/reference.py +13 -15
- pyobo/struct/struct.py +106 -99
- pyobo/struct/typedef.py +19 -10
- pyobo/struct/utils.py +0 -2
- pyobo/utils/__init__.py +0 -2
- pyobo/utils/cache.py +14 -6
- pyobo/utils/io.py +9 -10
- pyobo/utils/iter.py +5 -6
- pyobo/utils/misc.py +1 -3
- pyobo/utils/ndex_utils.py +6 -7
- pyobo/utils/path.py +5 -5
- pyobo/version.py +3 -5
- pyobo/xrefdb/__init__.py +0 -2
- pyobo/xrefdb/canonicalizer.py +27 -18
- pyobo/xrefdb/priority.py +0 -2
- pyobo/xrefdb/sources/__init__.py +9 -7
- pyobo/xrefdb/sources/biomappings.py +0 -2
- pyobo/xrefdb/sources/cbms2019.py +0 -2
- pyobo/xrefdb/sources/chembl.py +5 -7
- pyobo/xrefdb/sources/compath.py +1 -3
- pyobo/xrefdb/sources/famplex.py +3 -5
- pyobo/xrefdb/sources/gilda.py +0 -2
- pyobo/xrefdb/sources/intact.py +5 -5
- pyobo/xrefdb/sources/ncit.py +1 -3
- pyobo/xrefdb/sources/pubchem.py +2 -4
- pyobo/xrefdb/sources/wikidata.py +10 -5
- pyobo/xrefdb/xrefs_pipeline.py +15 -16
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/LICENSE +1 -1
- pyobo-0.11.0.dist-info/METADATA +723 -0
- pyobo-0.11.0.dist-info/RECORD +171 -0
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/WHEEL +1 -1
- pyobo-0.11.0.dist-info/entry_points.txt +2 -0
- pyobo/xrefdb/bengo.py +0 -44
- pyobo-0.10.11.dist-info/METADATA +0 -499
- pyobo-0.10.11.dist-info/RECORD +0 -169
- pyobo-0.10.11.dist-info/entry_points.txt +0 -15
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/top_level.txt +0 -0
pyobo/plugins.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Tools for loading entry points."""
|
|
4
2
|
|
|
3
|
+
from collections.abc import Iterable, Mapping
|
|
5
4
|
from functools import lru_cache
|
|
6
|
-
from typing import Callable,
|
|
5
|
+
from typing import Callable, Optional
|
|
7
6
|
|
|
8
7
|
from .struct import Obo
|
|
9
8
|
|
|
@@ -14,7 +13,7 @@ __all__ = [
|
|
|
14
13
|
]
|
|
15
14
|
|
|
16
15
|
|
|
17
|
-
@lru_cache
|
|
16
|
+
@lru_cache
|
|
18
17
|
def _get_nomenclature_plugins() -> Mapping[str, Callable[[], Obo]]:
|
|
19
18
|
from .sources import ontology_resolver
|
|
20
19
|
|
pyobo/py.typed
ADDED
|
File without changes
|
pyobo/reader.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""OBO Readers."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
6
5
|
from datetime import datetime
|
|
7
6
|
from pathlib import Path
|
|
8
|
-
from typing import Any,
|
|
7
|
+
from typing import Any, Optional, Union
|
|
9
8
|
|
|
10
9
|
import bioregistry
|
|
11
10
|
import networkx as nx
|
|
@@ -13,7 +12,7 @@ from more_itertools import pairwise
|
|
|
13
12
|
from tqdm.auto import tqdm
|
|
14
13
|
|
|
15
14
|
from .constants import DATE_FORMAT, PROVENANCE_PREFIXES
|
|
16
|
-
from .identifier_utils import
|
|
15
|
+
from .identifier_utils import MissingPrefixError, normalize_curie
|
|
17
16
|
from .registries import curie_has_blacklisted_prefix, curie_is_blacklisted, remap_prefix
|
|
18
17
|
from .struct import (
|
|
19
18
|
Obo,
|
|
@@ -39,7 +38,7 @@ logger = logging.getLogger(__name__)
|
|
|
39
38
|
|
|
40
39
|
# FIXME use bioontologies
|
|
41
40
|
# RELATION_REMAPPINGS: Mapping[str, Tuple[str, str]] = bioontologies.upgrade.load()
|
|
42
|
-
RELATION_REMAPPINGS: Mapping[str,
|
|
41
|
+
RELATION_REMAPPINGS: Mapping[str, tuple[str, str]] = {
|
|
43
42
|
"part_of": part_of.pair,
|
|
44
43
|
"has_part": has_part.pair,
|
|
45
44
|
"develops_from": develops_from.pair,
|
|
@@ -75,7 +74,7 @@ def from_obo_path(
|
|
|
75
74
|
return from_obonet(graph, strict=strict, **kwargs)
|
|
76
75
|
|
|
77
76
|
|
|
78
|
-
def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo":
|
|
77
|
+
def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo":
|
|
79
78
|
"""Get all of the terms from a OBO graph."""
|
|
80
79
|
_ontology = graph.graph["ontology"]
|
|
81
80
|
ontology = bioregistry.normalize_prefix(_ontology) # probably always okay
|
|
@@ -126,12 +125,12 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
|
|
|
126
125
|
)
|
|
127
126
|
for prefix, identifier, data in _iter_obo_graph(graph=graph, strict=strict)
|
|
128
127
|
)
|
|
129
|
-
references: Mapping[
|
|
128
|
+
references: Mapping[tuple[str, str], Reference] = {
|
|
130
129
|
reference.pair: reference for reference in reference_it
|
|
131
130
|
}
|
|
132
131
|
|
|
133
132
|
#: CURIEs to typedefs
|
|
134
|
-
typedefs: Mapping[
|
|
133
|
+
typedefs: Mapping[tuple[str, str], TypeDef] = {
|
|
135
134
|
typedef.pair: typedef for typedef in iterate_graph_typedefs(graph, ontology)
|
|
136
135
|
}
|
|
137
136
|
|
|
@@ -152,7 +151,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
|
|
|
152
151
|
|
|
153
152
|
try:
|
|
154
153
|
node_xrefs = list(iterate_node_xrefs(prefix=prefix, data=data, strict=strict))
|
|
155
|
-
except
|
|
154
|
+
except MissingPrefixError as e:
|
|
156
155
|
e.reference = reference
|
|
157
156
|
raise e
|
|
158
157
|
xrefs, provenance = [], []
|
|
@@ -171,7 +170,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
|
|
|
171
170
|
|
|
172
171
|
try:
|
|
173
172
|
alt_ids = list(iterate_node_alt_ids(data, strict=strict))
|
|
174
|
-
except
|
|
173
|
+
except MissingPrefixError as e:
|
|
175
174
|
e.reference = reference
|
|
176
175
|
raise e
|
|
177
176
|
n_alt_ids += len(alt_ids)
|
|
@@ -185,7 +184,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
|
|
|
185
184
|
strict=strict,
|
|
186
185
|
)
|
|
187
186
|
)
|
|
188
|
-
except
|
|
187
|
+
except MissingPrefixError as e:
|
|
189
188
|
e.reference = reference
|
|
190
189
|
raise e
|
|
191
190
|
n_parents += len(parents)
|
|
@@ -220,7 +219,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
|
|
|
220
219
|
strict=strict,
|
|
221
220
|
)
|
|
222
221
|
)
|
|
223
|
-
except
|
|
222
|
+
except MissingPrefixError as e:
|
|
224
223
|
e.reference = reference
|
|
225
224
|
raise e
|
|
226
225
|
for relation, reference in relations_references:
|
|
@@ -278,7 +277,7 @@ def _iter_obo_graph(
|
|
|
278
277
|
graph: nx.MultiDiGraph,
|
|
279
278
|
*,
|
|
280
279
|
strict: bool = True,
|
|
281
|
-
) -> Iterable[
|
|
280
|
+
) -> Iterable[tuple[str, str, Mapping[str, Any]]]:
|
|
282
281
|
"""Iterate over the nodes in the graph with the prefix stripped (if it's there)."""
|
|
283
282
|
for node, data in graph.nodes(data=True):
|
|
284
283
|
prefix, identifier = normalize_curie(node, strict=strict)
|
|
@@ -366,7 +365,8 @@ def iterate_graph_typedefs(
|
|
|
366
365
|
|
|
367
366
|
def get_definition(
|
|
368
367
|
data, *, prefix: str, identifier: str
|
|
369
|
-
) -> Union[
|
|
368
|
+
) -> Union[tuple[None, None], tuple[str, list[Reference]]]:
|
|
369
|
+
"""Extract the definition from the data."""
|
|
370
370
|
definition = data.get("def") # it's allowed not to have a definition
|
|
371
371
|
if not definition:
|
|
372
372
|
return None, None
|
|
@@ -379,7 +379,7 @@ def _extract_definition(
|
|
|
379
379
|
prefix: str,
|
|
380
380
|
identifier: str,
|
|
381
381
|
strict: bool = False,
|
|
382
|
-
) -> Union[
|
|
382
|
+
) -> Union[tuple[None, None], tuple[str, list[Reference]]]:
|
|
383
383
|
"""Extract the definitions."""
|
|
384
384
|
if not s.startswith('"'):
|
|
385
385
|
raise ValueError("definition does not start with a quote")
|
|
@@ -405,7 +405,7 @@ def _get_first_nonquoted(s: str) -> Optional[int]:
|
|
|
405
405
|
return None
|
|
406
406
|
|
|
407
407
|
|
|
408
|
-
def _quote_split(s: str) ->
|
|
408
|
+
def _quote_split(s: str) -> tuple[str, str]:
|
|
409
409
|
s = s.lstrip('"')
|
|
410
410
|
i = _get_first_nonquoted(s)
|
|
411
411
|
if i is None:
|
|
@@ -416,9 +416,7 @@ def _quote_split(s: str) -> Tuple[str, str]:
|
|
|
416
416
|
def _clean_definition(s: str) -> str:
|
|
417
417
|
# if '\t' in s:
|
|
418
418
|
# logger.warning('has tab')
|
|
419
|
-
return (
|
|
420
|
-
s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace("\d", "") # noqa:W605
|
|
421
|
-
)
|
|
419
|
+
return s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace(r"\d", "")
|
|
422
420
|
|
|
423
421
|
|
|
424
422
|
def _extract_synonym(
|
|
@@ -516,7 +514,7 @@ HANDLED_PROPERTY_TYPES = {
|
|
|
516
514
|
|
|
517
515
|
def iterate_node_properties(
|
|
518
516
|
data: Mapping[str, Any], *, property_prefix: Optional[str] = None, term=None
|
|
519
|
-
) -> Iterable[
|
|
517
|
+
) -> Iterable[tuple[str, str]]:
|
|
520
518
|
"""Extract properties from a :mod:`obonet` node's data."""
|
|
521
519
|
for prop_value_type in data.get("property_value", []):
|
|
522
520
|
try:
|
|
@@ -568,7 +566,7 @@ def iterate_node_relationships(
|
|
|
568
566
|
prefix: str,
|
|
569
567
|
identifier: str,
|
|
570
568
|
strict: bool = True,
|
|
571
|
-
) -> Iterable[
|
|
569
|
+
) -> Iterable[tuple[Reference, Reference]]:
|
|
572
570
|
"""Extract relationships from a :mod:`obonet` node's data."""
|
|
573
571
|
for s in data.get("relationship", []):
|
|
574
572
|
relation_curie, target_curie = s.split(" ")
|
pyobo/registries/__init__.py
CHANGED
pyobo/registries/metaregistry.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Load the manually curated metaregistry."""
|
|
4
2
|
|
|
5
3
|
import itertools as itt
|
|
6
4
|
import json
|
|
7
5
|
import os
|
|
6
|
+
from collections.abc import Iterable, Mapping
|
|
8
7
|
from functools import lru_cache
|
|
9
8
|
from pathlib import Path
|
|
10
|
-
from typing import Iterable, Mapping, Set, Tuple
|
|
11
9
|
|
|
12
10
|
import bioregistry
|
|
13
11
|
|
|
@@ -25,7 +23,7 @@ def has_no_download(prefix: str) -> bool:
|
|
|
25
23
|
|
|
26
24
|
|
|
27
25
|
@lru_cache(maxsize=1)
|
|
28
|
-
def _no_download() ->
|
|
26
|
+
def _no_download() -> set[str]:
|
|
29
27
|
"""Get the list of prefixes not available as OBO."""
|
|
30
28
|
return {
|
|
31
29
|
prefix
|
|
@@ -41,7 +39,7 @@ def curie_has_blacklisted_prefix(curie: str) -> bool:
|
|
|
41
39
|
|
|
42
40
|
|
|
43
41
|
@lru_cache(maxsize=1)
|
|
44
|
-
def get_xrefs_prefix_blacklist() ->
|
|
42
|
+
def get_xrefs_prefix_blacklist() -> set[str]:
|
|
45
43
|
"""Get the set of blacklisted xref prefixes."""
|
|
46
44
|
#: Xrefs starting with these prefixes will be ignored
|
|
47
45
|
prefixes = set(
|
|
@@ -65,7 +63,7 @@ def curie_has_blacklisted_suffix(curie: str) -> bool:
|
|
|
65
63
|
|
|
66
64
|
|
|
67
65
|
@lru_cache(maxsize=1)
|
|
68
|
-
def get_xrefs_suffix_blacklist() ->
|
|
66
|
+
def get_xrefs_suffix_blacklist() -> set[str]:
|
|
69
67
|
"""Get the set of blacklisted xref suffixes."""
|
|
70
68
|
#: Xrefs ending with these suffixes will be ignored
|
|
71
69
|
return set(CURATED_REGISTRY["blacklists"]["suffix"])
|
|
@@ -77,7 +75,7 @@ def curie_is_blacklisted(curie: str) -> bool:
|
|
|
77
75
|
|
|
78
76
|
|
|
79
77
|
@lru_cache(maxsize=1)
|
|
80
|
-
def get_xrefs_blacklist() ->
|
|
78
|
+
def get_xrefs_blacklist() -> set[str]:
|
|
81
79
|
"""Get the set of blacklisted xrefs."""
|
|
82
80
|
rv = set()
|
|
83
81
|
for x in CURATED_REGISTRY["blacklists"]["full"]:
|
|
@@ -123,7 +121,7 @@ def remap_prefix(curie: str) -> str:
|
|
|
123
121
|
return curie
|
|
124
122
|
|
|
125
123
|
|
|
126
|
-
def iter_cached_obo() -> Iterable[
|
|
124
|
+
def iter_cached_obo() -> Iterable[tuple[str, str]]:
|
|
127
125
|
"""Iterate over cached OBO paths."""
|
|
128
126
|
for prefix in os.listdir(RAW_DIRECTORY):
|
|
129
127
|
if prefix in GLOBAL_SKIP or has_no_download(prefix) or bioregistry.is_deprecated(prefix):
|
pyobo/resource_utils.py
CHANGED
pyobo/resources/__init__.py
CHANGED
pyobo/resources/ncbitaxon.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Loading of the NCBI Taxonomy names."""
|
|
4
2
|
|
|
5
3
|
import csv
|
|
6
4
|
import gzip
|
|
5
|
+
from collections.abc import Mapping
|
|
7
6
|
from functools import lru_cache
|
|
8
7
|
from pathlib import Path
|
|
9
|
-
from typing import
|
|
8
|
+
from typing import Optional, Union
|
|
10
9
|
|
|
11
10
|
import requests
|
|
12
11
|
|
pyobo/resources/ro.py
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Loading of the relations ontology names."""
|
|
4
2
|
|
|
5
3
|
import csv
|
|
6
4
|
import os
|
|
5
|
+
from collections.abc import Mapping
|
|
7
6
|
from functools import lru_cache
|
|
8
|
-
from typing import Mapping, Tuple
|
|
9
7
|
|
|
10
8
|
import requests
|
|
11
9
|
|
|
@@ -20,7 +18,7 @@ PREFIX = "http://purl.obolibrary.org/obo/"
|
|
|
20
18
|
|
|
21
19
|
|
|
22
20
|
@lru_cache(maxsize=1)
|
|
23
|
-
def load_ro() -> Mapping[
|
|
21
|
+
def load_ro() -> Mapping[tuple[str, str], str]:
|
|
24
22
|
"""Load the relation ontology names."""
|
|
25
23
|
if not os.path.exists(PATH):
|
|
26
24
|
download()
|
pyobo/sources/README.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Sources
|
|
2
|
+
|
|
3
|
+
1. Create a new module in `pyobo.sources` named with the prefix for the resource you're ontologizing
|
|
4
|
+
2. Make sure your resource has a corresponding prefix in [the Bioregistry](https://github.com/biopragmatics/bioregistry)
|
|
5
|
+
3. Subclass the `pyobo.Obo` class to represent your resource
|
|
6
|
+
4. Add your resource to the list in `pyobo.sources.__init__`
|
|
7
|
+
|
|
8
|
+
## What is in scope?
|
|
9
|
+
|
|
10
|
+
1. Biomedical, semantic web, bibliographic, life sciences, and related natural sciences resources are welcome
|
|
11
|
+
2. The source you want to ontologize should be an identifier resource, i.e., it mints its own identifiers. If you want
|
|
12
|
+
to ontologize some database that reuses some other identifier resource's identifiers, then this isn't the right
|
|
13
|
+
place.
|
|
14
|
+
3. Resources that are not possible to download automatically are not in scope for PyOBO. Reproducibility and reusability
|
|
15
|
+
are core values of this software
|
pyobo/sources/__init__.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Sources of OBO content."""
|
|
4
2
|
|
|
5
3
|
from class_resolver import ClassResolver
|
|
@@ -12,6 +10,7 @@ from .civic_gene import CIVICGeneGetter
|
|
|
12
10
|
from .complexportal import ComplexPortalGetter
|
|
13
11
|
from .conso import CONSOGetter
|
|
14
12
|
from .cpt import CPTGetter
|
|
13
|
+
from .credit import CreditGetter
|
|
15
14
|
from .cvx import CVXGetter
|
|
16
15
|
from .depmap import DepMapGetter
|
|
17
16
|
from .dictybase_gene import DictybaseGetter
|
|
@@ -69,6 +68,7 @@ __all__ = [
|
|
|
69
68
|
"CVXGetter",
|
|
70
69
|
"ChEMBLCompoundGetter",
|
|
71
70
|
"ComplexPortalGetter",
|
|
71
|
+
"CreditGetter",
|
|
72
72
|
"DepMapGetter",
|
|
73
73
|
"DictybaseGetter",
|
|
74
74
|
"DrugBankGetter",
|
pyobo/sources/agrovoc.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for AGROVOC."""
|
|
4
2
|
|
|
5
3
|
import pystow
|
|
@@ -11,6 +9,8 @@ __all__ = [
|
|
|
11
9
|
"ensure_agrovoc_graph",
|
|
12
10
|
]
|
|
13
11
|
|
|
12
|
+
PREFIX = "agrovoc"
|
|
13
|
+
|
|
14
14
|
|
|
15
15
|
def ensure_agrovoc_graph(version: str) -> Graph:
|
|
16
16
|
"""Download and parse the given version of AGROVOC."""
|
|
@@ -20,5 +20,5 @@ def ensure_agrovoc_graph(version: str) -> Graph:
|
|
|
20
20
|
graph.bind("skosxl", "http://www.w3.org/2008/05/skos-xl#")
|
|
21
21
|
graph.bind("skos", SKOS)
|
|
22
22
|
graph.bind("dcterms", DCTERMS)
|
|
23
|
-
graph.bind(
|
|
23
|
+
graph.bind(PREFIX, "http://aims.fao.org/aos/agrontology#")
|
|
24
24
|
return graph
|
|
@@ -1,16 +1,15 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for the Antibody Registry."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
5
|
+
from typing import Optional
|
|
7
6
|
|
|
8
|
-
import bioversions
|
|
9
7
|
import pandas as pd
|
|
10
8
|
from bioregistry.utils import removeprefix
|
|
11
9
|
from tqdm.auto import tqdm
|
|
12
10
|
|
|
13
11
|
from pyobo import Obo, Term
|
|
12
|
+
from pyobo.api.utils import get_version
|
|
14
13
|
from pyobo.utils.path import ensure_df
|
|
15
14
|
|
|
16
15
|
__all__ = [
|
|
@@ -27,7 +26,7 @@ CHUNKSIZE = 20_000
|
|
|
27
26
|
def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame:
|
|
28
27
|
"""Get the BioGRID identifiers mapping dataframe."""
|
|
29
28
|
if version is None:
|
|
30
|
-
version =
|
|
29
|
+
version = get_version(PREFIX)
|
|
31
30
|
df = ensure_df(
|
|
32
31
|
PREFIX,
|
|
33
32
|
url=URL,
|
pyobo/sources/biogrid.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Extract and convert BioGRID identifiers."""
|
|
4
2
|
|
|
3
|
+
from collections.abc import Mapping
|
|
5
4
|
from functools import partial
|
|
6
|
-
from typing import
|
|
5
|
+
from typing import Optional
|
|
7
6
|
|
|
8
|
-
import bioversions
|
|
9
7
|
import pandas as pd
|
|
10
8
|
|
|
9
|
+
from pyobo.api.utils import get_version
|
|
11
10
|
from pyobo.resources.ncbitaxon import get_ncbitaxon_id
|
|
12
11
|
from pyobo.utils.cache import cached_mapping
|
|
13
12
|
from pyobo.utils.path import ensure_df, prefix_directory_join
|
|
@@ -52,7 +51,7 @@ def _lookup(name: str) -> Optional[str]:
|
|
|
52
51
|
|
|
53
52
|
def get_df() -> pd.DataFrame:
|
|
54
53
|
"""Get the BioGRID identifiers mapping dataframe."""
|
|
55
|
-
version =
|
|
54
|
+
version = get_version("biogrid")
|
|
56
55
|
url = f"{BASE_URL}/BIOGRID-{version}/BIOGRID-IDENTIFIERS-{version}.tab.zip"
|
|
57
56
|
df = ensure_df(PREFIX, url=url, skiprows=28, dtype=str, version=version)
|
|
58
57
|
df["taxonomy_id"] = df["ORGANISM_OFFICIAL_NAME"].map(_lookup)
|
|
@@ -65,7 +64,7 @@ def get_df() -> pd.DataFrame:
|
|
|
65
64
|
"cache",
|
|
66
65
|
"xrefs",
|
|
67
66
|
name="ncbigene.tsv",
|
|
68
|
-
version=partial(
|
|
67
|
+
version=partial(get_version, PREFIX),
|
|
69
68
|
),
|
|
70
69
|
header=["biogrid_id", "ncbigene_id"],
|
|
71
70
|
)
|
|
@@ -77,7 +76,8 @@ def get_ncbigene_mapping() -> Mapping[str, str]:
|
|
|
77
76
|
.. code-block:: python
|
|
78
77
|
|
|
79
78
|
from pyobo import get_filtered_xrefs
|
|
80
|
-
|
|
79
|
+
|
|
80
|
+
biogrid_ncbigene_mapping = get_filtered_xrefs("biogrid", "ncbigene")
|
|
81
81
|
"""
|
|
82
82
|
df = get_df()
|
|
83
83
|
df = df.loc[df["IDENTIFIER_TYPE"] == "ENTREZ_GENE", ["BIOGRID_ID", "IDENTIFIER_VALUE"]]
|
pyobo/sources/ccle.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Get the CCLE Cells, provided by cBioPortal."""
|
|
4
2
|
|
|
5
3
|
import tarfile
|
|
4
|
+
from collections.abc import Iterable
|
|
6
5
|
from pathlib import Path
|
|
7
|
-
from typing import
|
|
6
|
+
from typing import Optional
|
|
8
7
|
|
|
9
8
|
import pandas as pd
|
|
10
9
|
import pystow
|
|
@@ -25,7 +24,7 @@ class CCLEGetter(Obo):
|
|
|
25
24
|
|
|
26
25
|
ontology = bioregistry_key = PREFIX
|
|
27
26
|
|
|
28
|
-
def __post_init__(self):
|
|
27
|
+
def __post_init__(self):
|
|
29
28
|
self.data_version = VERSION
|
|
30
29
|
|
|
31
30
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
pyobo/sources/cgnc.py
CHANGED
pyobo/sources/chebi.py
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for ChEBI."""
|
|
4
2
|
|
|
5
|
-
from
|
|
3
|
+
from collections.abc import Mapping
|
|
6
4
|
|
|
7
5
|
from ..api import get_filtered_properties_mapping, get_filtered_relations_df
|
|
8
6
|
from ..struct import Reference, TypeDef
|
|
@@ -33,7 +31,7 @@ def get_chebi_smiles_id_mapping() -> Mapping[str, str]:
|
|
|
33
31
|
has_role = TypeDef(reference=Reference(prefix="chebi", identifier="has_role"))
|
|
34
32
|
|
|
35
33
|
|
|
36
|
-
def get_chebi_role_to_children() -> Mapping[str,
|
|
34
|
+
def get_chebi_role_to_children() -> Mapping[str, set[tuple[str, str]]]:
|
|
37
35
|
"""Get the ChEBI role to children mapping."""
|
|
38
36
|
df = get_filtered_relations_df("chebi", relation=has_role)
|
|
39
37
|
return multisetdict((role_id, ("chebi", chemical_id)) for chemical_id, _, role_id in df.values)
|
pyobo/sources/chembl.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for ChEMBL.
|
|
4
2
|
|
|
5
3
|
Run with ``python -m pyobo.sources.chembl -vv``.
|
|
6
4
|
"""
|
|
7
5
|
|
|
8
6
|
import logging
|
|
7
|
+
from collections.abc import Iterable
|
|
9
8
|
from contextlib import closing
|
|
10
|
-
from typing import Iterable
|
|
11
9
|
|
|
12
10
|
import chembl_downloader
|
|
13
11
|
|
pyobo/sources/civic_gene.py
CHANGED
pyobo/sources/complexportal.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for ComplexPortal."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable
|
|
7
5
|
|
|
8
6
|
import pandas as pd
|
|
9
7
|
from tqdm.auto import tqdm
|
|
@@ -52,7 +50,7 @@ DTYPE = {
|
|
|
52
50
|
}
|
|
53
51
|
|
|
54
52
|
|
|
55
|
-
def _parse_members(s) ->
|
|
53
|
+
def _parse_members(s) -> list[tuple[Reference, str]]:
|
|
56
54
|
if pd.isna(s):
|
|
57
55
|
return []
|
|
58
56
|
|
|
@@ -68,7 +66,7 @@ def _parse_members(s) -> List[Tuple[Reference, str]]:
|
|
|
68
66
|
return rv
|
|
69
67
|
|
|
70
68
|
|
|
71
|
-
def _parse_xrefs(s) ->
|
|
69
|
+
def _parse_xrefs(s) -> list[tuple[Reference, str]]:
|
|
72
70
|
if pd.isna(s):
|
|
73
71
|
return []
|
|
74
72
|
|
pyobo/sources/conso.py
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for CONSO."""
|
|
4
2
|
|
|
5
|
-
from
|
|
3
|
+
from collections.abc import Iterable
|
|
6
4
|
|
|
7
5
|
import pandas as pd
|
|
8
6
|
|
|
@@ -68,7 +66,7 @@ def iter_terms() -> Iterable[Term]:
|
|
|
68
66
|
for _, row in terms_df.iterrows():
|
|
69
67
|
if row["Name"] == "WITHDRAWN":
|
|
70
68
|
continue
|
|
71
|
-
provenance:
|
|
69
|
+
provenance: list[Reference] = []
|
|
72
70
|
for curie in row["References"].split(","):
|
|
73
71
|
curie = curie.strip()
|
|
74
72
|
if not curie:
|
pyobo/sources/cpt.py
CHANGED
pyobo/sources/credit.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Converter for the Contributor Roles Taxonomy."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from collections.abc import Iterable
|
|
7
|
+
|
|
8
|
+
from more_itertools import chunked
|
|
9
|
+
|
|
10
|
+
from pyobo.struct import Obo, Term
|
|
11
|
+
from pyobo.utils.path import ensure_path
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"CreditGetter",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
url = "https://api.github.com/repos/CASRAI-CRedIT/Dictionary/contents/Picklists/Contributor%20Roles"
|
|
18
|
+
PREFIX = "credit"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CreditGetter(Obo):
|
|
22
|
+
"""An ontology representation of the Contributor Roles Taxonomy."""
|
|
23
|
+
|
|
24
|
+
ontology = PREFIX
|
|
25
|
+
static_version = "2022"
|
|
26
|
+
idspaces = {
|
|
27
|
+
PREFIX: "https://credit.niso.org/contributor-roles/",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
31
|
+
"""Iterate over terms in the ontology."""
|
|
32
|
+
return get_terms(force=force)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_obo(force: bool = False) -> Obo:
|
|
36
|
+
"""Get RGD as OBO."""
|
|
37
|
+
return CreditGetter(force=force)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_terms(force: bool = False) -> list[Term]:
|
|
41
|
+
"""Get terms from the Contributor Roles Taxonomy via GitHub."""
|
|
42
|
+
path = ensure_path(PREFIX, url=url, name="picklist-api.json", force=force)
|
|
43
|
+
with open(path) as f:
|
|
44
|
+
data = json.load(f)
|
|
45
|
+
terms = []
|
|
46
|
+
for x in data:
|
|
47
|
+
name = x["name"].removesuffix(".md").lower()
|
|
48
|
+
|
|
49
|
+
pp = ensure_path(PREFIX, "picklist", url=x["download_url"], backend="requests")
|
|
50
|
+
with open(pp) as f:
|
|
51
|
+
header, *rest = f.read().splitlines()
|
|
52
|
+
name = header = header.removeprefix("# Contributor Roles/")
|
|
53
|
+
dd = {k.removeprefix("## "): v for k, v in chunked(rest, 2)}
|
|
54
|
+
identifier = (
|
|
55
|
+
dd["Canonical URL"]
|
|
56
|
+
.removeprefix("https://credit.niso.org/contributor-roles/")
|
|
57
|
+
.rstrip("/")
|
|
58
|
+
)
|
|
59
|
+
desc = dd["Short definition"]
|
|
60
|
+
terms.append(
|
|
61
|
+
Term.from_triple(prefix=PREFIX, identifier=identifier, name=name, definition=desc)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return terms
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
if __name__ == "__main__":
|
|
68
|
+
get_obo(force=True).write_default(write_obo=True)
|
pyobo/sources/cvx.py
CHANGED
pyobo/sources/depmap.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""DepMap cell lines."""
|
|
4
2
|
|
|
5
|
-
from
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from typing import Optional
|
|
6
5
|
|
|
7
6
|
import pandas as pd
|
|
8
7
|
import pystow
|
|
@@ -113,7 +112,7 @@ def ensure(version: str, force: bool = False) -> pd.DataFrame:
|
|
|
113
112
|
url=get_url(version=version),
|
|
114
113
|
name="sample_info.tsv",
|
|
115
114
|
force=force,
|
|
116
|
-
read_csv_kwargs=
|
|
115
|
+
read_csv_kwargs={"sep": ",", "dtype": str},
|
|
117
116
|
)
|
|
118
117
|
|
|
119
118
|
|