pyobo 0.10.12__py3-none-any.whl → 0.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +0 -2
- pyobo/__main__.py +0 -2
- pyobo/api/__init__.py +0 -2
- pyobo/api/alts.py +6 -7
- pyobo/api/hierarchy.py +14 -15
- pyobo/api/metadata.py +3 -4
- pyobo/api/names.py +31 -32
- pyobo/api/properties.py +6 -7
- pyobo/api/relations.py +12 -11
- pyobo/api/species.py +5 -6
- pyobo/api/typedefs.py +1 -3
- pyobo/api/utils.py +61 -5
- pyobo/api/xrefs.py +4 -5
- pyobo/aws.py +3 -5
- pyobo/cli/__init__.py +0 -2
- pyobo/cli/aws.py +0 -2
- pyobo/cli/cli.py +0 -4
- pyobo/cli/database.py +1 -3
- pyobo/cli/lookup.py +0 -2
- pyobo/cli/utils.py +0 -2
- pyobo/constants.py +1 -33
- pyobo/getters.py +19 -26
- pyobo/gilda_utils.py +19 -17
- pyobo/identifier_utils.py +10 -10
- pyobo/mocks.py +5 -6
- pyobo/normalizer.py +24 -24
- pyobo/obographs.py +8 -5
- pyobo/plugins.py +3 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +19 -21
- pyobo/registries/__init__.py +0 -2
- pyobo/registries/metaregistry.py +6 -8
- pyobo/resource_utils.py +1 -3
- pyobo/resources/__init__.py +0 -2
- pyobo/resources/ncbitaxon.py +2 -3
- pyobo/resources/ro.py +2 -4
- pyobo/resources/so.py +55 -0
- pyobo/resources/so.tsv +2604 -0
- pyobo/sources/README.md +15 -0
- pyobo/sources/__init__.py +0 -2
- pyobo/sources/agrovoc.py +3 -3
- pyobo/sources/antibodyregistry.py +2 -3
- pyobo/sources/biogrid.py +4 -4
- pyobo/sources/ccle.py +3 -4
- pyobo/sources/cgnc.py +1 -3
- pyobo/sources/chebi.py +2 -4
- pyobo/sources/chembl.py +1 -3
- pyobo/sources/civic_gene.py +2 -3
- pyobo/sources/complexportal.py +57 -20
- pyobo/sources/conso.py +2 -4
- pyobo/sources/cpt.py +1 -3
- pyobo/sources/credit.py +1 -1
- pyobo/sources/cvx.py +1 -3
- pyobo/sources/depmap.py +3 -4
- pyobo/sources/dictybase_gene.py +15 -12
- pyobo/sources/drugbank.py +6 -7
- pyobo/sources/drugbank_salt.py +3 -4
- pyobo/sources/drugcentral.py +9 -8
- pyobo/sources/expasy.py +33 -16
- pyobo/sources/famplex.py +3 -5
- pyobo/sources/flybase.py +5 -6
- pyobo/sources/geonames.py +1 -1
- pyobo/sources/gmt_utils.py +5 -6
- pyobo/sources/go.py +4 -6
- pyobo/sources/gwascentral_phenotype.py +1 -3
- pyobo/sources/gwascentral_study.py +2 -3
- pyobo/sources/hgnc.py +30 -26
- pyobo/sources/hgncgenefamily.py +9 -11
- pyobo/sources/icd10.py +3 -4
- pyobo/sources/icd11.py +3 -4
- pyobo/sources/icd_utils.py +6 -7
- pyobo/sources/interpro.py +3 -5
- pyobo/sources/itis.py +1 -3
- pyobo/sources/kegg/__init__.py +0 -2
- pyobo/sources/kegg/api.py +3 -4
- pyobo/sources/kegg/genes.py +3 -4
- pyobo/sources/kegg/genome.py +19 -9
- pyobo/sources/kegg/pathway.py +5 -6
- pyobo/sources/mesh.py +19 -21
- pyobo/sources/mgi.py +1 -3
- pyobo/sources/mirbase.py +13 -9
- pyobo/sources/mirbase_constants.py +0 -2
- pyobo/sources/mirbase_family.py +1 -3
- pyobo/sources/mirbase_mature.py +1 -3
- pyobo/sources/msigdb.py +4 -5
- pyobo/sources/ncbigene.py +3 -5
- pyobo/sources/npass.py +2 -4
- pyobo/sources/omim_ps.py +1 -3
- pyobo/sources/pathbank.py +35 -28
- pyobo/sources/pfam.py +1 -3
- pyobo/sources/pfam_clan.py +1 -3
- pyobo/sources/pid.py +3 -5
- pyobo/sources/pombase.py +7 -6
- pyobo/sources/pubchem.py +2 -3
- pyobo/sources/reactome.py +30 -11
- pyobo/sources/rgd.py +3 -4
- pyobo/sources/rhea.py +7 -8
- pyobo/sources/ror.py +3 -2
- pyobo/sources/selventa/__init__.py +0 -2
- pyobo/sources/selventa/schem.py +1 -3
- pyobo/sources/selventa/scomp.py +1 -3
- pyobo/sources/selventa/sdis.py +1 -3
- pyobo/sources/selventa/sfam.py +1 -3
- pyobo/sources/sgd.py +1 -3
- pyobo/sources/slm.py +29 -17
- pyobo/sources/umls/__init__.py +0 -2
- pyobo/sources/umls/__main__.py +0 -2
- pyobo/sources/umls/get_synonym_types.py +1 -1
- pyobo/sources/umls/umls.py +2 -4
- pyobo/sources/uniprot/__init__.py +0 -2
- pyobo/sources/uniprot/uniprot.py +11 -10
- pyobo/sources/uniprot/uniprot_ptm.py +6 -5
- pyobo/sources/utils.py +3 -5
- pyobo/sources/wikipathways.py +1 -3
- pyobo/sources/zfin.py +20 -9
- pyobo/ssg/__init__.py +3 -2
- pyobo/struct/__init__.py +0 -2
- pyobo/struct/reference.py +22 -23
- pyobo/struct/struct.py +132 -116
- pyobo/struct/typedef.py +14 -10
- pyobo/struct/utils.py +0 -2
- pyobo/utils/__init__.py +0 -2
- pyobo/utils/cache.py +14 -6
- pyobo/utils/io.py +9 -10
- pyobo/utils/iter.py +5 -6
- pyobo/utils/misc.py +1 -3
- pyobo/utils/ndex_utils.py +6 -7
- pyobo/utils/path.py +4 -5
- pyobo/version.py +3 -5
- pyobo/xrefdb/__init__.py +0 -2
- pyobo/xrefdb/canonicalizer.py +27 -18
- pyobo/xrefdb/priority.py +0 -2
- pyobo/xrefdb/sources/__init__.py +3 -4
- pyobo/xrefdb/sources/biomappings.py +0 -2
- pyobo/xrefdb/sources/cbms2019.py +0 -2
- pyobo/xrefdb/sources/chembl.py +0 -2
- pyobo/xrefdb/sources/compath.py +1 -3
- pyobo/xrefdb/sources/famplex.py +3 -5
- pyobo/xrefdb/sources/gilda.py +0 -2
- pyobo/xrefdb/sources/intact.py +5 -5
- pyobo/xrefdb/sources/ncit.py +1 -3
- pyobo/xrefdb/sources/pubchem.py +2 -5
- pyobo/xrefdb/sources/wikidata.py +2 -4
- pyobo/xrefdb/xrefs_pipeline.py +15 -16
- {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/LICENSE +1 -1
- pyobo-0.11.1.dist-info/METADATA +711 -0
- pyobo-0.11.1.dist-info/RECORD +173 -0
- {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/WHEEL +1 -1
- pyobo-0.11.1.dist-info/entry_points.txt +2 -0
- pyobo-0.10.12.dist-info/METADATA +0 -499
- pyobo-0.10.12.dist-info/RECORD +0 -169
- pyobo-0.10.12.dist-info/entry_points.txt +0 -15
- {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/top_level.txt +0 -0
pyobo/plugins.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Tools for loading entry points."""
|
|
4
2
|
|
|
3
|
+
from collections.abc import Iterable, Mapping
|
|
5
4
|
from functools import lru_cache
|
|
6
|
-
from typing import Callable,
|
|
5
|
+
from typing import Callable, Optional
|
|
7
6
|
|
|
8
7
|
from .struct import Obo
|
|
9
8
|
|
|
@@ -14,7 +13,7 @@ __all__ = [
|
|
|
14
13
|
]
|
|
15
14
|
|
|
16
15
|
|
|
17
|
-
@lru_cache
|
|
16
|
+
@lru_cache
|
|
18
17
|
def _get_nomenclature_plugins() -> Mapping[str, Callable[[], Obo]]:
|
|
19
18
|
from .sources import ontology_resolver
|
|
20
19
|
|
pyobo/py.typed
ADDED
|
File without changes
|
pyobo/reader.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""OBO Readers."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
4
|
+
from collections.abc import Iterable, Mapping
|
|
6
5
|
from datetime import datetime
|
|
7
6
|
from pathlib import Path
|
|
8
|
-
from typing import Any,
|
|
7
|
+
from typing import Any, Optional, Union
|
|
9
8
|
|
|
10
9
|
import bioregistry
|
|
11
10
|
import networkx as nx
|
|
@@ -13,7 +12,7 @@ from more_itertools import pairwise
|
|
|
13
12
|
from tqdm.auto import tqdm
|
|
14
13
|
|
|
15
14
|
from .constants import DATE_FORMAT, PROVENANCE_PREFIXES
|
|
16
|
-
from .identifier_utils import
|
|
15
|
+
from .identifier_utils import MissingPrefixError, normalize_curie
|
|
17
16
|
from .registries import curie_has_blacklisted_prefix, curie_is_blacklisted, remap_prefix
|
|
18
17
|
from .struct import (
|
|
19
18
|
Obo,
|
|
@@ -39,7 +38,7 @@ logger = logging.getLogger(__name__)
|
|
|
39
38
|
|
|
40
39
|
# FIXME use bioontologies
|
|
41
40
|
# RELATION_REMAPPINGS: Mapping[str, Tuple[str, str]] = bioontologies.upgrade.load()
|
|
42
|
-
RELATION_REMAPPINGS: Mapping[str,
|
|
41
|
+
RELATION_REMAPPINGS: Mapping[str, tuple[str, str]] = {
|
|
43
42
|
"part_of": part_of.pair,
|
|
44
43
|
"has_part": has_part.pair,
|
|
45
44
|
"develops_from": develops_from.pair,
|
|
@@ -75,7 +74,7 @@ def from_obo_path(
|
|
|
75
74
|
return from_obonet(graph, strict=strict, **kwargs)
|
|
76
75
|
|
|
77
76
|
|
|
78
|
-
def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo":
|
|
77
|
+
def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo":
|
|
79
78
|
"""Get all of the terms from a OBO graph."""
|
|
80
79
|
_ontology = graph.graph["ontology"]
|
|
81
80
|
ontology = bioregistry.normalize_prefix(_ontology) # probably always okay
|
|
@@ -126,12 +125,12 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
|
|
|
126
125
|
)
|
|
127
126
|
for prefix, identifier, data in _iter_obo_graph(graph=graph, strict=strict)
|
|
128
127
|
)
|
|
129
|
-
references: Mapping[
|
|
128
|
+
references: Mapping[tuple[str, str], Reference] = {
|
|
130
129
|
reference.pair: reference for reference in reference_it
|
|
131
130
|
}
|
|
132
131
|
|
|
133
132
|
#: CURIEs to typedefs
|
|
134
|
-
typedefs: Mapping[
|
|
133
|
+
typedefs: Mapping[tuple[str, str], TypeDef] = {
|
|
135
134
|
typedef.pair: typedef for typedef in iterate_graph_typedefs(graph, ontology)
|
|
136
135
|
}
|
|
137
136
|
|
|
@@ -152,7 +151,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
|
|
|
152
151
|
|
|
153
152
|
try:
|
|
154
153
|
node_xrefs = list(iterate_node_xrefs(prefix=prefix, data=data, strict=strict))
|
|
155
|
-
except
|
|
154
|
+
except MissingPrefixError as e:
|
|
156
155
|
e.reference = reference
|
|
157
156
|
raise e
|
|
158
157
|
xrefs, provenance = [], []
|
|
@@ -171,7 +170,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
|
|
|
171
170
|
|
|
172
171
|
try:
|
|
173
172
|
alt_ids = list(iterate_node_alt_ids(data, strict=strict))
|
|
174
|
-
except
|
|
173
|
+
except MissingPrefixError as e:
|
|
175
174
|
e.reference = reference
|
|
176
175
|
raise e
|
|
177
176
|
n_alt_ids += len(alt_ids)
|
|
@@ -185,7 +184,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
|
|
|
185
184
|
strict=strict,
|
|
186
185
|
)
|
|
187
186
|
)
|
|
188
|
-
except
|
|
187
|
+
except MissingPrefixError as e:
|
|
189
188
|
e.reference = reference
|
|
190
189
|
raise e
|
|
191
190
|
n_parents += len(parents)
|
|
@@ -220,7 +219,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq
|
|
|
220
219
|
strict=strict,
|
|
221
220
|
)
|
|
222
221
|
)
|
|
223
|
-
except
|
|
222
|
+
except MissingPrefixError as e:
|
|
224
223
|
e.reference = reference
|
|
225
224
|
raise e
|
|
226
225
|
for relation, reference in relations_references:
|
|
@@ -278,7 +277,7 @@ def _iter_obo_graph(
|
|
|
278
277
|
graph: nx.MultiDiGraph,
|
|
279
278
|
*,
|
|
280
279
|
strict: bool = True,
|
|
281
|
-
) -> Iterable[
|
|
280
|
+
) -> Iterable[tuple[str, str, Mapping[str, Any]]]:
|
|
282
281
|
"""Iterate over the nodes in the graph with the prefix stripped (if it's there)."""
|
|
283
282
|
for node, data in graph.nodes(data=True):
|
|
284
283
|
prefix, identifier = normalize_curie(node, strict=strict)
|
|
@@ -366,7 +365,8 @@ def iterate_graph_typedefs(
|
|
|
366
365
|
|
|
367
366
|
def get_definition(
|
|
368
367
|
data, *, prefix: str, identifier: str
|
|
369
|
-
) -> Union[
|
|
368
|
+
) -> Union[tuple[None, None], tuple[str, list[Reference]]]:
|
|
369
|
+
"""Extract the definition from the data."""
|
|
370
370
|
definition = data.get("def") # it's allowed not to have a definition
|
|
371
371
|
if not definition:
|
|
372
372
|
return None, None
|
|
@@ -379,7 +379,7 @@ def _extract_definition(
|
|
|
379
379
|
prefix: str,
|
|
380
380
|
identifier: str,
|
|
381
381
|
strict: bool = False,
|
|
382
|
-
) -> Union[
|
|
382
|
+
) -> Union[tuple[None, None], tuple[str, list[Reference]]]:
|
|
383
383
|
"""Extract the definitions."""
|
|
384
384
|
if not s.startswith('"'):
|
|
385
385
|
raise ValueError("definition does not start with a quote")
|
|
@@ -405,7 +405,7 @@ def _get_first_nonquoted(s: str) -> Optional[int]:
|
|
|
405
405
|
return None
|
|
406
406
|
|
|
407
407
|
|
|
408
|
-
def _quote_split(s: str) ->
|
|
408
|
+
def _quote_split(s: str) -> tuple[str, str]:
|
|
409
409
|
s = s.lstrip('"')
|
|
410
410
|
i = _get_first_nonquoted(s)
|
|
411
411
|
if i is None:
|
|
@@ -416,9 +416,7 @@ def _quote_split(s: str) -> Tuple[str, str]:
|
|
|
416
416
|
def _clean_definition(s: str) -> str:
|
|
417
417
|
# if '\t' in s:
|
|
418
418
|
# logger.warning('has tab')
|
|
419
|
-
return (
|
|
420
|
-
s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace(r"\d", "") # noqa:W605
|
|
421
|
-
)
|
|
419
|
+
return s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace(r"\d", "")
|
|
422
420
|
|
|
423
421
|
|
|
424
422
|
def _extract_synonym(
|
|
@@ -516,7 +514,7 @@ HANDLED_PROPERTY_TYPES = {
|
|
|
516
514
|
|
|
517
515
|
def iterate_node_properties(
|
|
518
516
|
data: Mapping[str, Any], *, property_prefix: Optional[str] = None, term=None
|
|
519
|
-
) -> Iterable[
|
|
517
|
+
) -> Iterable[tuple[str, str]]:
|
|
520
518
|
"""Extract properties from a :mod:`obonet` node's data."""
|
|
521
519
|
for prop_value_type in data.get("property_value", []):
|
|
522
520
|
try:
|
|
@@ -568,7 +566,7 @@ def iterate_node_relationships(
|
|
|
568
566
|
prefix: str,
|
|
569
567
|
identifier: str,
|
|
570
568
|
strict: bool = True,
|
|
571
|
-
) -> Iterable[
|
|
569
|
+
) -> Iterable[tuple[Reference, Reference]]:
|
|
572
570
|
"""Extract relationships from a :mod:`obonet` node's data."""
|
|
573
571
|
for s in data.get("relationship", []):
|
|
574
572
|
relation_curie, target_curie = s.split(" ")
|
pyobo/registries/__init__.py
CHANGED
pyobo/registries/metaregistry.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Load the manually curated metaregistry."""
|
|
4
2
|
|
|
5
3
|
import itertools as itt
|
|
6
4
|
import json
|
|
7
5
|
import os
|
|
6
|
+
from collections.abc import Iterable, Mapping
|
|
8
7
|
from functools import lru_cache
|
|
9
8
|
from pathlib import Path
|
|
10
|
-
from typing import Iterable, Mapping, Set, Tuple
|
|
11
9
|
|
|
12
10
|
import bioregistry
|
|
13
11
|
|
|
@@ -25,7 +23,7 @@ def has_no_download(prefix: str) -> bool:
|
|
|
25
23
|
|
|
26
24
|
|
|
27
25
|
@lru_cache(maxsize=1)
|
|
28
|
-
def _no_download() ->
|
|
26
|
+
def _no_download() -> set[str]:
|
|
29
27
|
"""Get the list of prefixes not available as OBO."""
|
|
30
28
|
return {
|
|
31
29
|
prefix
|
|
@@ -41,7 +39,7 @@ def curie_has_blacklisted_prefix(curie: str) -> bool:
|
|
|
41
39
|
|
|
42
40
|
|
|
43
41
|
@lru_cache(maxsize=1)
|
|
44
|
-
def get_xrefs_prefix_blacklist() ->
|
|
42
|
+
def get_xrefs_prefix_blacklist() -> set[str]:
|
|
45
43
|
"""Get the set of blacklisted xref prefixes."""
|
|
46
44
|
#: Xrefs starting with these prefixes will be ignored
|
|
47
45
|
prefixes = set(
|
|
@@ -65,7 +63,7 @@ def curie_has_blacklisted_suffix(curie: str) -> bool:
|
|
|
65
63
|
|
|
66
64
|
|
|
67
65
|
@lru_cache(maxsize=1)
|
|
68
|
-
def get_xrefs_suffix_blacklist() ->
|
|
66
|
+
def get_xrefs_suffix_blacklist() -> set[str]:
|
|
69
67
|
"""Get the set of blacklisted xref suffixes."""
|
|
70
68
|
#: Xrefs ending with these suffixes will be ignored
|
|
71
69
|
return set(CURATED_REGISTRY["blacklists"]["suffix"])
|
|
@@ -77,7 +75,7 @@ def curie_is_blacklisted(curie: str) -> bool:
|
|
|
77
75
|
|
|
78
76
|
|
|
79
77
|
@lru_cache(maxsize=1)
|
|
80
|
-
def get_xrefs_blacklist() ->
|
|
78
|
+
def get_xrefs_blacklist() -> set[str]:
|
|
81
79
|
"""Get the set of blacklisted xrefs."""
|
|
82
80
|
rv = set()
|
|
83
81
|
for x in CURATED_REGISTRY["blacklists"]["full"]:
|
|
@@ -123,7 +121,7 @@ def remap_prefix(curie: str) -> str:
|
|
|
123
121
|
return curie
|
|
124
122
|
|
|
125
123
|
|
|
126
|
-
def iter_cached_obo() -> Iterable[
|
|
124
|
+
def iter_cached_obo() -> Iterable[tuple[str, str]]:
|
|
127
125
|
"""Iterate over cached OBO paths."""
|
|
128
126
|
for prefix in os.listdir(RAW_DIRECTORY):
|
|
129
127
|
if prefix in GLOBAL_SKIP or has_no_download(prefix) or bioregistry.is_deprecated(prefix):
|
pyobo/resource_utils.py
CHANGED
pyobo/resources/__init__.py
CHANGED
pyobo/resources/ncbitaxon.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Loading of the NCBI Taxonomy names."""
|
|
4
2
|
|
|
5
3
|
import csv
|
|
6
4
|
import gzip
|
|
5
|
+
from collections.abc import Mapping
|
|
7
6
|
from functools import lru_cache
|
|
8
7
|
from pathlib import Path
|
|
9
|
-
from typing import
|
|
8
|
+
from typing import Optional, Union
|
|
10
9
|
|
|
11
10
|
import requests
|
|
12
11
|
|
pyobo/resources/ro.py
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Loading of the relations ontology names."""
|
|
4
2
|
|
|
5
3
|
import csv
|
|
6
4
|
import os
|
|
5
|
+
from collections.abc import Mapping
|
|
7
6
|
from functools import lru_cache
|
|
8
|
-
from typing import Mapping, Tuple
|
|
9
7
|
|
|
10
8
|
import requests
|
|
11
9
|
|
|
@@ -20,7 +18,7 @@ PREFIX = "http://purl.obolibrary.org/obo/"
|
|
|
20
18
|
|
|
21
19
|
|
|
22
20
|
@lru_cache(maxsize=1)
|
|
23
|
-
def load_ro() -> Mapping[
|
|
21
|
+
def load_ro() -> Mapping[tuple[str, str], str]:
|
|
24
22
|
"""Load the relation ontology names."""
|
|
25
23
|
if not os.path.exists(PATH):
|
|
26
24
|
download()
|
pyobo/resources/so.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Loading of the relations ontology names."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
import os
|
|
7
|
+
from functools import lru_cache
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"get_so_name",
|
|
13
|
+
"load_so",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
17
|
+
SO_PATH = os.path.join(HERE, "so.tsv")
|
|
18
|
+
SO_JSON_URL = "https://github.com/The-Sequence-Ontology/SO-Ontologies/raw/refs/heads/master/Ontology_Files/so-simple.json"
|
|
19
|
+
SO_URI_PREFIX = "http://purl.obolibrary.org/obo/SO_"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_so_name(so_id: str) -> str | None:
|
|
23
|
+
"""Get the name from the identifier."""
|
|
24
|
+
return load_so().get(so_id)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@lru_cache(maxsize=1)
|
|
28
|
+
def load_so() -> dict[str, str]:
|
|
29
|
+
"""Load the Sequence Ontology names."""
|
|
30
|
+
if not os.path.exists(SO_PATH):
|
|
31
|
+
download_so()
|
|
32
|
+
with open(SO_PATH) as file:
|
|
33
|
+
return dict(csv.reader(file, delimiter="\t"))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def download_so():
|
|
37
|
+
"""Download the latest version of the Relation Ontology."""
|
|
38
|
+
rows = []
|
|
39
|
+
res_json = requests.get(SO_JSON_URL).json()
|
|
40
|
+
for node in res_json["graphs"][0]["nodes"]:
|
|
41
|
+
uri = node["id"]
|
|
42
|
+
if not uri.startswith(SO_URI_PREFIX):
|
|
43
|
+
continue
|
|
44
|
+
identifier = uri.removeprefix(SO_URI_PREFIX)
|
|
45
|
+
name = node.get("lbl")
|
|
46
|
+
if name:
|
|
47
|
+
rows.append((identifier, name))
|
|
48
|
+
|
|
49
|
+
with open(SO_PATH, "w") as file:
|
|
50
|
+
writer = csv.writer(file, delimiter="\t")
|
|
51
|
+
writer.writerows(sorted(rows, key=lambda x: int(x[0])))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
if __name__ == "__main__":
|
|
55
|
+
download_so()
|