pyobo 0.10.12__py3-none-any.whl → 0.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +0 -2
- pyobo/__main__.py +0 -2
- pyobo/api/__init__.py +0 -2
- pyobo/api/alts.py +6 -7
- pyobo/api/hierarchy.py +14 -15
- pyobo/api/metadata.py +3 -4
- pyobo/api/names.py +31 -32
- pyobo/api/properties.py +6 -7
- pyobo/api/relations.py +12 -11
- pyobo/api/species.py +5 -6
- pyobo/api/typedefs.py +1 -3
- pyobo/api/utils.py +61 -5
- pyobo/api/xrefs.py +4 -5
- pyobo/aws.py +3 -5
- pyobo/cli/__init__.py +0 -2
- pyobo/cli/aws.py +0 -2
- pyobo/cli/cli.py +0 -4
- pyobo/cli/database.py +1 -3
- pyobo/cli/lookup.py +0 -2
- pyobo/cli/utils.py +0 -2
- pyobo/constants.py +1 -33
- pyobo/getters.py +19 -26
- pyobo/gilda_utils.py +19 -17
- pyobo/identifier_utils.py +10 -10
- pyobo/mocks.py +5 -6
- pyobo/normalizer.py +24 -24
- pyobo/obographs.py +8 -5
- pyobo/plugins.py +3 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +19 -21
- pyobo/registries/__init__.py +0 -2
- pyobo/registries/metaregistry.py +6 -8
- pyobo/resource_utils.py +1 -3
- pyobo/resources/__init__.py +0 -2
- pyobo/resources/ncbitaxon.py +2 -3
- pyobo/resources/ro.py +2 -4
- pyobo/resources/so.py +55 -0
- pyobo/resources/so.tsv +2604 -0
- pyobo/sources/README.md +15 -0
- pyobo/sources/__init__.py +0 -2
- pyobo/sources/agrovoc.py +3 -3
- pyobo/sources/antibodyregistry.py +2 -3
- pyobo/sources/biogrid.py +4 -4
- pyobo/sources/ccle.py +3 -4
- pyobo/sources/cgnc.py +1 -3
- pyobo/sources/chebi.py +2 -4
- pyobo/sources/chembl.py +1 -3
- pyobo/sources/civic_gene.py +2 -3
- pyobo/sources/complexportal.py +57 -20
- pyobo/sources/conso.py +2 -4
- pyobo/sources/cpt.py +1 -3
- pyobo/sources/credit.py +1 -1
- pyobo/sources/cvx.py +1 -3
- pyobo/sources/depmap.py +3 -4
- pyobo/sources/dictybase_gene.py +15 -12
- pyobo/sources/drugbank.py +6 -7
- pyobo/sources/drugbank_salt.py +3 -4
- pyobo/sources/drugcentral.py +9 -8
- pyobo/sources/expasy.py +33 -16
- pyobo/sources/famplex.py +3 -5
- pyobo/sources/flybase.py +5 -6
- pyobo/sources/geonames.py +1 -1
- pyobo/sources/gmt_utils.py +5 -6
- pyobo/sources/go.py +4 -6
- pyobo/sources/gwascentral_phenotype.py +1 -3
- pyobo/sources/gwascentral_study.py +2 -3
- pyobo/sources/hgnc.py +30 -26
- pyobo/sources/hgncgenefamily.py +9 -11
- pyobo/sources/icd10.py +3 -4
- pyobo/sources/icd11.py +3 -4
- pyobo/sources/icd_utils.py +6 -7
- pyobo/sources/interpro.py +3 -5
- pyobo/sources/itis.py +1 -3
- pyobo/sources/kegg/__init__.py +0 -2
- pyobo/sources/kegg/api.py +3 -4
- pyobo/sources/kegg/genes.py +3 -4
- pyobo/sources/kegg/genome.py +19 -9
- pyobo/sources/kegg/pathway.py +5 -6
- pyobo/sources/mesh.py +19 -21
- pyobo/sources/mgi.py +1 -3
- pyobo/sources/mirbase.py +13 -9
- pyobo/sources/mirbase_constants.py +0 -2
- pyobo/sources/mirbase_family.py +1 -3
- pyobo/sources/mirbase_mature.py +1 -3
- pyobo/sources/msigdb.py +4 -5
- pyobo/sources/ncbigene.py +3 -5
- pyobo/sources/npass.py +2 -4
- pyobo/sources/omim_ps.py +1 -3
- pyobo/sources/pathbank.py +35 -28
- pyobo/sources/pfam.py +1 -3
- pyobo/sources/pfam_clan.py +1 -3
- pyobo/sources/pid.py +3 -5
- pyobo/sources/pombase.py +7 -6
- pyobo/sources/pubchem.py +2 -3
- pyobo/sources/reactome.py +30 -11
- pyobo/sources/rgd.py +3 -4
- pyobo/sources/rhea.py +7 -8
- pyobo/sources/ror.py +3 -2
- pyobo/sources/selventa/__init__.py +0 -2
- pyobo/sources/selventa/schem.py +1 -3
- pyobo/sources/selventa/scomp.py +1 -3
- pyobo/sources/selventa/sdis.py +1 -3
- pyobo/sources/selventa/sfam.py +1 -3
- pyobo/sources/sgd.py +1 -3
- pyobo/sources/slm.py +29 -17
- pyobo/sources/umls/__init__.py +0 -2
- pyobo/sources/umls/__main__.py +0 -2
- pyobo/sources/umls/get_synonym_types.py +1 -1
- pyobo/sources/umls/umls.py +2 -4
- pyobo/sources/uniprot/__init__.py +0 -2
- pyobo/sources/uniprot/uniprot.py +11 -10
- pyobo/sources/uniprot/uniprot_ptm.py +6 -5
- pyobo/sources/utils.py +3 -5
- pyobo/sources/wikipathways.py +1 -3
- pyobo/sources/zfin.py +20 -9
- pyobo/ssg/__init__.py +3 -2
- pyobo/struct/__init__.py +0 -2
- pyobo/struct/reference.py +22 -23
- pyobo/struct/struct.py +132 -116
- pyobo/struct/typedef.py +14 -10
- pyobo/struct/utils.py +0 -2
- pyobo/utils/__init__.py +0 -2
- pyobo/utils/cache.py +14 -6
- pyobo/utils/io.py +9 -10
- pyobo/utils/iter.py +5 -6
- pyobo/utils/misc.py +1 -3
- pyobo/utils/ndex_utils.py +6 -7
- pyobo/utils/path.py +4 -5
- pyobo/version.py +3 -5
- pyobo/xrefdb/__init__.py +0 -2
- pyobo/xrefdb/canonicalizer.py +27 -18
- pyobo/xrefdb/priority.py +0 -2
- pyobo/xrefdb/sources/__init__.py +3 -4
- pyobo/xrefdb/sources/biomappings.py +0 -2
- pyobo/xrefdb/sources/cbms2019.py +0 -2
- pyobo/xrefdb/sources/chembl.py +0 -2
- pyobo/xrefdb/sources/compath.py +1 -3
- pyobo/xrefdb/sources/famplex.py +3 -5
- pyobo/xrefdb/sources/gilda.py +0 -2
- pyobo/xrefdb/sources/intact.py +5 -5
- pyobo/xrefdb/sources/ncit.py +1 -3
- pyobo/xrefdb/sources/pubchem.py +2 -5
- pyobo/xrefdb/sources/wikidata.py +2 -4
- pyobo/xrefdb/xrefs_pipeline.py +15 -16
- {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/LICENSE +1 -1
- pyobo-0.11.1.dist-info/METADATA +711 -0
- pyobo-0.11.1.dist-info/RECORD +173 -0
- {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/WHEEL +1 -1
- pyobo-0.11.1.dist-info/entry_points.txt +2 -0
- pyobo-0.10.12.dist-info/METADATA +0 -499
- pyobo-0.10.12.dist-info/RECORD +0 -169
- pyobo-0.10.12.dist-info/entry_points.txt +0 -15
- {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/top_level.txt +0 -0
pyobo/sources/rhea.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for Rhea."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Iterable
|
|
5
|
+
from typing import TYPE_CHECKING, Optional
|
|
7
6
|
|
|
8
7
|
import pystow
|
|
9
8
|
|
|
@@ -71,7 +70,7 @@ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdfl
|
|
|
71
70
|
version,
|
|
72
71
|
url=RHEA_RDF_GZ_URL,
|
|
73
72
|
force=force,
|
|
74
|
-
parse_kwargs=
|
|
73
|
+
parse_kwargs={"format": "xml"},
|
|
75
74
|
)
|
|
76
75
|
|
|
77
76
|
|
|
@@ -103,10 +102,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
103
102
|
)
|
|
104
103
|
names = {str(identifier): str(name) for _, identifier, name in result}
|
|
105
104
|
|
|
106
|
-
terms:
|
|
107
|
-
master_to_left:
|
|
108
|
-
master_to_right:
|
|
109
|
-
master_to_bi:
|
|
105
|
+
terms: dict[str, Term] = {}
|
|
106
|
+
master_to_left: dict[str, str] = {}
|
|
107
|
+
master_to_right: dict[str, str] = {}
|
|
108
|
+
master_to_bi: dict[str, str] = {}
|
|
110
109
|
|
|
111
110
|
directions = ensure_df(
|
|
112
111
|
PREFIX,
|
pyobo/sources/ror.py
CHANGED
|
@@ -4,7 +4,8 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
import zipfile
|
|
7
|
-
from
|
|
7
|
+
from collections.abc import Iterable
|
|
8
|
+
from typing import Any
|
|
8
9
|
|
|
9
10
|
import bioregistry
|
|
10
11
|
import zenodo_client
|
|
@@ -62,7 +63,7 @@ class RORGetter(Obo):
|
|
|
62
63
|
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
|
63
64
|
}
|
|
64
65
|
|
|
65
|
-
def __post_init__(self):
|
|
66
|
+
def __post_init__(self):
|
|
66
67
|
self.data_version, _url, _path = _get_info()
|
|
67
68
|
super().__post_init__()
|
|
68
69
|
|
pyobo/sources/selventa/schem.py
CHANGED
pyobo/sources/selventa/scomp.py
CHANGED
pyobo/sources/selventa/sdis.py
CHANGED
pyobo/sources/selventa/sfam.py
CHANGED
pyobo/sources/sgd.py
CHANGED
pyobo/sources/slm.py
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Swisslipids."""
|
|
4
2
|
|
|
5
|
-
from
|
|
3
|
+
from collections.abc import Iterable
|
|
6
4
|
|
|
7
5
|
import pandas as pd
|
|
8
6
|
from tqdm.auto import tqdm
|
|
@@ -79,10 +77,10 @@ def iter_terms(version: str, force: bool = False):
|
|
|
79
77
|
smiles,
|
|
80
78
|
inchi,
|
|
81
79
|
inchikey,
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
80
|
+
chebi_ids,
|
|
81
|
+
lipidmaps_ids,
|
|
82
|
+
hmdb_ids,
|
|
83
|
+
pubmed_ids,
|
|
86
84
|
) in tqdm(
|
|
87
85
|
df[COLUMNS].values, desc=f"[{PREFIX}] generating terms", unit_scale=True, unit="lipid"
|
|
88
86
|
):
|
|
@@ -105,21 +103,35 @@ def iter_terms(version: str, force: bool = False):
|
|
|
105
103
|
inchi = inchi[len("InChI=") :]
|
|
106
104
|
term.append_property(has_inchi, inchi)
|
|
107
105
|
if pd.notna(inchikey):
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
106
|
+
inchikey = inchikey.removeprefix("InChIKey=").strip()
|
|
107
|
+
if inchikey and inchikey != "none":
|
|
108
|
+
try:
|
|
109
|
+
inchi_ref = Reference(prefix="inchikey", identifier=inchikey)
|
|
110
|
+
except ValueError:
|
|
111
|
+
tqdm.write(
|
|
112
|
+
f"[slm:{identifier}] had invalid inchikey reference: ({type(inchikey)}) {inchikey}"
|
|
113
|
+
)
|
|
114
|
+
else:
|
|
115
|
+
term.append_exact_match(inchi_ref)
|
|
116
|
+
for chebi_id in _split(chebi_ids):
|
|
117
|
+
term.append_xref(("chebi", chebi_id))
|
|
118
|
+
for lipidmaps_id in _split(lipidmaps_ids):
|
|
114
119
|
term.append_exact_match(("lipidmaps", lipidmaps_id))
|
|
115
|
-
|
|
120
|
+
for hmdb_id in _split(hmdb_ids):
|
|
116
121
|
term.append_exact_match(("hmdb", hmdb_id))
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
term.append_provenance(("pubmed", pmid))
|
|
122
|
+
for pubmed_id in _split(pubmed_ids):
|
|
123
|
+
term.append_provenance(("pubmed", pubmed_id))
|
|
120
124
|
# TODO how to handle class, parents, and components?
|
|
121
125
|
yield term
|
|
122
126
|
|
|
123
127
|
|
|
128
|
+
def _split(s: str) -> Iterable[str]:
|
|
129
|
+
if pd.notna(s):
|
|
130
|
+
for x in s.split("|"):
|
|
131
|
+
x = x.strip()
|
|
132
|
+
if x:
|
|
133
|
+
yield x
|
|
134
|
+
|
|
135
|
+
|
|
124
136
|
if __name__ == "__main__":
|
|
125
137
|
get_obo().write_default(write_obo=True, use_tqdm=True)
|
pyobo/sources/umls/__init__.py
CHANGED
pyobo/sources/umls/__main__.py
CHANGED
pyobo/sources/umls/umls.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for UMLS.
|
|
4
2
|
|
|
5
3
|
Run with ``python -m pyobo.sources.umls``
|
|
@@ -8,7 +6,7 @@ Run with ``python -m pyobo.sources.umls``
|
|
|
8
6
|
import itertools as itt
|
|
9
7
|
import operator
|
|
10
8
|
from collections import defaultdict
|
|
11
|
-
from
|
|
9
|
+
from collections.abc import Iterable, Mapping
|
|
12
10
|
|
|
13
11
|
import bioregistry
|
|
14
12
|
import pandas as pd
|
|
@@ -67,7 +65,7 @@ def get_obo() -> Obo:
|
|
|
67
65
|
return UMLSGetter()
|
|
68
66
|
|
|
69
67
|
|
|
70
|
-
def get_semantic_types() -> Mapping[str,
|
|
68
|
+
def get_semantic_types() -> Mapping[str, set[str]]:
|
|
71
69
|
"""Get UMLS semantic types for each term."""
|
|
72
70
|
dd = defaultdict(set)
|
|
73
71
|
with open_umls_semantic_types() as file:
|
pyobo/sources/uniprot/uniprot.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for UniProt."""
|
|
4
2
|
|
|
3
|
+
from collections.abc import Iterable
|
|
5
4
|
from operator import attrgetter
|
|
6
5
|
from pathlib import Path
|
|
7
|
-
from typing import
|
|
6
|
+
from typing import Optional, cast
|
|
8
7
|
|
|
9
8
|
from tqdm.auto import tqdm
|
|
10
9
|
|
|
@@ -57,6 +56,7 @@ class UniProtGetter(Obo):
|
|
|
57
56
|
gene_product_of,
|
|
58
57
|
molecularly_interacts_with,
|
|
59
58
|
derives_from,
|
|
59
|
+
located_in,
|
|
60
60
|
]
|
|
61
61
|
|
|
62
62
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
@@ -82,7 +82,7 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
|
|
|
82
82
|
pubmeds,
|
|
83
83
|
pdbs,
|
|
84
84
|
proteome,
|
|
85
|
-
|
|
85
|
+
gene_ids,
|
|
86
86
|
rhea_curies,
|
|
87
87
|
go_components,
|
|
88
88
|
go_functions,
|
|
@@ -94,13 +94,14 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
|
|
|
94
94
|
description = description.removeprefix("FUNCTION: ")
|
|
95
95
|
term = Term(
|
|
96
96
|
reference=Reference(prefix=PREFIX, identifier=uniprot_id, name=accession),
|
|
97
|
-
definition=description or None,
|
|
97
|
+
# definition=description or None,
|
|
98
98
|
)
|
|
99
99
|
term.set_species(taxonomy_id)
|
|
100
|
-
if
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
100
|
+
if gene_ids:
|
|
101
|
+
for gene_id in gene_ids.split(";"):
|
|
102
|
+
term.append_relationship(
|
|
103
|
+
gene_product_of, Reference(prefix="ncbigene", identifier=gene_id.strip())
|
|
104
|
+
)
|
|
104
105
|
|
|
105
106
|
# TODO add type=Reference(prefix="xsd", identifier="boolean")
|
|
106
107
|
term.append_property("reviewed", "true")
|
|
@@ -154,7 +155,7 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
|
|
|
154
155
|
yield term
|
|
155
156
|
|
|
156
157
|
|
|
157
|
-
def _parse_go(go_terms) ->
|
|
158
|
+
def _parse_go(go_terms) -> list[Reference]:
|
|
158
159
|
rv = []
|
|
159
160
|
if go_terms:
|
|
160
161
|
for go_term in go_terms.split(";"):
|
|
@@ -27,7 +27,8 @@ DR Cross-reference to external Optional; once or more
|
|
|
27
27
|
|
|
28
28
|
import itertools as itt
|
|
29
29
|
from collections import defaultdict
|
|
30
|
-
from
|
|
30
|
+
from collections.abc import Iterable, Mapping
|
|
31
|
+
from typing import Optional
|
|
31
32
|
|
|
32
33
|
from tqdm.auto import tqdm
|
|
33
34
|
|
|
@@ -63,18 +64,18 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
|
|
|
63
64
|
path = ensure_path(PREFIX, url=URL, force=force)
|
|
64
65
|
with open(path) as file:
|
|
65
66
|
lines = list(file)
|
|
66
|
-
it: Iterable[
|
|
67
|
+
it: Iterable[tuple[str, str]] = ((line[:2], line[2:].strip()) for line in lines[47:-5])
|
|
67
68
|
for i, (_, term_lines) in enumerate(itt.groupby(it, key=lambda p: p[0] == "//")):
|
|
68
69
|
term = _parse(i, term_lines)
|
|
69
70
|
if term:
|
|
70
71
|
yield term
|
|
71
72
|
|
|
72
73
|
|
|
73
|
-
def _parse(i, lines: Iterable[
|
|
74
|
-
dd_:
|
|
74
|
+
def _parse(i, lines: Iterable[tuple[str, str]]) -> Optional[Term]:
|
|
75
|
+
dd_: defaultdict[str, list[str]] = defaultdict(list)
|
|
75
76
|
for key, value in lines:
|
|
76
77
|
dd_[key].append(value)
|
|
77
|
-
dd: Mapping[str,
|
|
78
|
+
dd: Mapping[str, list[str]] = dict(dd_)
|
|
78
79
|
|
|
79
80
|
if "//" in dd:
|
|
80
81
|
return None
|
pyobo/sources/utils.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Utilities for converters."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
|
-
from
|
|
4
|
+
from collections.abc import Mapping
|
|
7
5
|
|
|
8
6
|
from ..utils.io import multisetdict
|
|
9
7
|
|
|
@@ -15,7 +13,7 @@ __all__ = [
|
|
|
15
13
|
logger = logging.getLogger(__name__)
|
|
16
14
|
|
|
17
15
|
|
|
18
|
-
def get_go_mapping(path: str, prefix: str) -> Mapping[str,
|
|
16
|
+
def get_go_mapping(path: str, prefix: str) -> Mapping[str, set[tuple[str, str]]]:
|
|
19
17
|
"""Get a GO mapping file."""
|
|
20
18
|
with open(path) as file:
|
|
21
19
|
return multisetdict(
|
|
@@ -23,7 +21,7 @@ def get_go_mapping(path: str, prefix: str) -> Mapping[str, Set[Tuple[str, str]]]
|
|
|
23
21
|
)
|
|
24
22
|
|
|
25
23
|
|
|
26
|
-
def process_go_mapping_line(line: str, prefix: str) ->
|
|
24
|
+
def process_go_mapping_line(line: str, prefix: str) -> tuple[str, tuple[str, str]]:
|
|
27
25
|
"""Process a GO mapping line."""
|
|
28
26
|
line1 = line[len(f"{prefix}:") :]
|
|
29
27
|
line2, go_id = line1.rsplit(";", 1)
|
pyobo/sources/wikipathways.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for WikiPathways."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
import urllib.error
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable
|
|
8
6
|
|
|
9
7
|
from .gmt_utils import parse_wikipathways_gmt
|
|
10
8
|
from ..constants import SPECIES_REMAPPING
|
pyobo/sources/zfin.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Converter for ZFIN."""
|
|
4
2
|
|
|
5
3
|
import logging
|
|
6
4
|
from collections import defaultdict
|
|
7
|
-
from
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from typing import Optional
|
|
8
7
|
|
|
9
8
|
from tqdm.auto import tqdm
|
|
10
9
|
|
|
10
|
+
from pyobo.resources.so import get_so_name
|
|
11
11
|
from pyobo.struct import (
|
|
12
12
|
Obo,
|
|
13
13
|
Reference,
|
|
@@ -114,7 +114,9 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
|
|
|
114
114
|
)
|
|
115
115
|
df["sequence_ontology_id"] = df["sequence_ontology_id"].map(lambda x: x[len("SO:") :])
|
|
116
116
|
so = {
|
|
117
|
-
sequence_ontology_id: Reference
|
|
117
|
+
sequence_ontology_id: Reference(
|
|
118
|
+
prefix="SO", identifier=sequence_ontology_id, name=get_so_name(sequence_ontology_id)
|
|
119
|
+
)
|
|
118
120
|
for sequence_ontology_id in df["sequence_ontology_id"].unique()
|
|
119
121
|
}
|
|
120
122
|
for _, reference in sorted(so.items()):
|
|
@@ -136,17 +138,26 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
|
|
|
136
138
|
term.append_alt(alt_id)
|
|
137
139
|
entrez_id = entrez_mappings.get(identifier)
|
|
138
140
|
if entrez_id:
|
|
139
|
-
|
|
141
|
+
try:
|
|
142
|
+
ncbigene_ref = Reference(prefix="ncbigene", identifier=entrez_id)
|
|
143
|
+
except ValueError:
|
|
144
|
+
tqdm.write(f"[zfin] invalid NCBI gene: {entrez_id}")
|
|
145
|
+
else:
|
|
146
|
+
term.append_exact_match(ncbigene_ref)
|
|
140
147
|
for uniprot_id in uniprot_mappings.get(identifier, []):
|
|
141
|
-
term.append_relationship(
|
|
148
|
+
term.append_relationship(
|
|
149
|
+
has_gene_product, Reference(prefix="uniprot", identifier=uniprot_id)
|
|
150
|
+
)
|
|
142
151
|
for hgnc_id in human_orthologs.get(identifier, []):
|
|
143
|
-
term.append_relationship(orthologous, Reference
|
|
152
|
+
term.append_relationship(orthologous, Reference(prefix="hgnc", identifier=hgnc_id))
|
|
144
153
|
for mgi_curie in mouse_orthologs.get(identifier, []):
|
|
145
|
-
mouse_ortholog = Reference.from_curie(mgi_curie
|
|
154
|
+
mouse_ortholog = Reference.from_curie(mgi_curie)
|
|
146
155
|
if mouse_ortholog:
|
|
147
156
|
term.append_relationship(orthologous, mouse_ortholog)
|
|
148
157
|
for flybase_id in fly_orthologs.get(identifier, []):
|
|
149
|
-
term.append_relationship(
|
|
158
|
+
term.append_relationship(
|
|
159
|
+
orthologous, Reference(prefix="flybase", identifier=flybase_id)
|
|
160
|
+
)
|
|
150
161
|
|
|
151
162
|
yield term
|
|
152
163
|
|
pyobo/ssg/__init__.py
CHANGED
|
@@ -2,9 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
import itertools as itt
|
|
4
4
|
from collections import defaultdict
|
|
5
|
+
from collections.abc import Sequence
|
|
5
6
|
from operator import attrgetter
|
|
6
7
|
from pathlib import Path
|
|
7
|
-
from typing import Optional,
|
|
8
|
+
from typing import Optional, Union
|
|
8
9
|
|
|
9
10
|
import bioregistry
|
|
10
11
|
from bioregistry.constants import BIOREGISTRY_DEFAULT_BASE_URL
|
|
@@ -37,7 +38,7 @@ def make_site(
|
|
|
37
38
|
metaregistry_metaprefix: Optional[str] = None,
|
|
38
39
|
metaregistry_name: Optional[str] = None,
|
|
39
40
|
metaregistry_base_url: Optional[str] = None,
|
|
40
|
-
show_properties_in_manifest: Optional[Sequence[
|
|
41
|
+
show_properties_in_manifest: Optional[Sequence[tuple[str, str]]] = None,
|
|
41
42
|
) -> None:
|
|
42
43
|
"""Make a website in the given directory.
|
|
43
44
|
|
pyobo/struct/__init__.py
CHANGED
pyobo/struct/reference.py
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Data structures for OBO."""
|
|
4
2
|
|
|
5
|
-
from typing import Optional
|
|
3
|
+
from typing import Optional
|
|
6
4
|
|
|
7
5
|
import bioregistry
|
|
8
6
|
import curies
|
|
9
7
|
from curies.api import ExpansionError
|
|
10
|
-
from pydantic import Field,
|
|
8
|
+
from pydantic import Field, field_validator, model_validator
|
|
11
9
|
|
|
12
10
|
from .utils import obo_escape
|
|
11
|
+
from ..constants import GLOBAL_CHECK_IDS
|
|
13
12
|
from ..identifier_utils import normalize_curie
|
|
14
13
|
|
|
15
14
|
__all__ = [
|
|
@@ -23,7 +22,7 @@ class Reference(curies.Reference):
|
|
|
23
22
|
|
|
24
23
|
name: Optional[str] = Field(default=None, description="the name of the reference")
|
|
25
24
|
|
|
26
|
-
@
|
|
25
|
+
@field_validator("prefix")
|
|
27
26
|
def validate_prefix(cls, v): # noqa
|
|
28
27
|
"""Validate the prefix for this reference."""
|
|
29
28
|
norm_prefix = bioregistry.normalize_prefix(v)
|
|
@@ -41,19 +40,19 @@ class Reference(curies.Reference):
|
|
|
41
40
|
"""Get the preferred curie for this reference."""
|
|
42
41
|
return f"{self.preferred_prefix}:{self.identifier}"
|
|
43
42
|
|
|
44
|
-
@
|
|
43
|
+
@model_validator(mode="before")
|
|
45
44
|
def validate_identifier(cls, values): # noqa
|
|
46
45
|
"""Validate the identifier."""
|
|
47
46
|
prefix, identifier = values.get("prefix"), values.get("identifier")
|
|
48
47
|
if not prefix or not identifier:
|
|
49
48
|
return values
|
|
50
|
-
|
|
51
|
-
if
|
|
49
|
+
resource = bioregistry.get_resource(prefix)
|
|
50
|
+
if resource is None:
|
|
52
51
|
raise ExpansionError(f"Unknown prefix: {prefix}")
|
|
53
|
-
values["prefix"] =
|
|
54
|
-
values["identifier"] =
|
|
55
|
-
|
|
56
|
-
|
|
52
|
+
values["prefix"] = resource.prefix
|
|
53
|
+
values["identifier"] = resource.standardize_identifier(identifier)
|
|
54
|
+
if GLOBAL_CHECK_IDS and not resource.is_valid_identifier(values["identifier"]):
|
|
55
|
+
raise ValueError(f"non-standard identifier: {resource.prefix}:{values['identifier']}")
|
|
57
56
|
return values
|
|
58
57
|
|
|
59
58
|
@classmethod
|
|
@@ -62,7 +61,7 @@ class Reference(curies.Reference):
|
|
|
62
61
|
from ..api import get_name
|
|
63
62
|
|
|
64
63
|
name = get_name(prefix, identifier)
|
|
65
|
-
return cls(prefix
|
|
64
|
+
return cls.model_validate({"prefix": prefix, "identifier": identifier, "name": name})
|
|
66
65
|
|
|
67
66
|
@property
|
|
68
67
|
def bioregistry_link(self) -> str:
|
|
@@ -118,13 +117,13 @@ class Reference(curies.Reference):
|
|
|
118
117
|
return None
|
|
119
118
|
if name is None and auto:
|
|
120
119
|
return cls.auto(prefix=prefix, identifier=identifier)
|
|
121
|
-
return cls(prefix
|
|
120
|
+
return cls.model_validate({"prefix": prefix, "identifier": identifier, "name": name})
|
|
122
121
|
|
|
123
122
|
@property
|
|
124
123
|
def _escaped_identifier(self):
|
|
125
124
|
return obo_escape(self.identifier)
|
|
126
125
|
|
|
127
|
-
def __str__(self):
|
|
126
|
+
def __str__(self):
|
|
128
127
|
identifier_lower = self.identifier.lower()
|
|
129
128
|
if identifier_lower.startswith(f"{self.prefix.lower()}:"):
|
|
130
129
|
rv = identifier_lower
|
|
@@ -134,7 +133,7 @@ class Reference(curies.Reference):
|
|
|
134
133
|
rv = f"{rv} ! {self.name}"
|
|
135
134
|
return rv
|
|
136
135
|
|
|
137
|
-
def __hash__(self):
|
|
136
|
+
def __hash__(self):
|
|
138
137
|
return hash((self.__class__, self.prefix, self.identifier))
|
|
139
138
|
|
|
140
139
|
|
|
@@ -145,32 +144,32 @@ class Referenced:
|
|
|
145
144
|
|
|
146
145
|
@property
|
|
147
146
|
def prefix(self):
|
|
148
|
-
"""The prefix of the typedef."""
|
|
147
|
+
"""The prefix of the typedef."""
|
|
149
148
|
return self.reference.prefix
|
|
150
149
|
|
|
151
150
|
@property
|
|
152
151
|
def name(self):
|
|
153
|
-
"""The name of the typedef."""
|
|
152
|
+
"""The name of the typedef."""
|
|
154
153
|
return self.reference.name
|
|
155
154
|
|
|
156
155
|
@property
|
|
157
156
|
def identifier(self) -> str:
|
|
158
|
-
"""The local unique identifier for this typedef."""
|
|
157
|
+
"""The local unique identifier for this typedef."""
|
|
159
158
|
return self.reference.identifier
|
|
160
159
|
|
|
161
160
|
@property
|
|
162
161
|
def curie(self) -> str:
|
|
163
|
-
"""The CURIE for this typedef."""
|
|
162
|
+
"""The CURIE for this typedef."""
|
|
164
163
|
return self.reference.curie
|
|
165
164
|
|
|
166
165
|
@property
|
|
167
166
|
def preferred_curie(self) -> str:
|
|
168
|
-
"""The preferred CURIE for this typedef."""
|
|
167
|
+
"""The preferred CURIE for this typedef."""
|
|
169
168
|
return self.reference.preferred_curie
|
|
170
169
|
|
|
171
170
|
@property
|
|
172
|
-
def pair(self) ->
|
|
173
|
-
"""The pair of namespace/identifier."""
|
|
171
|
+
def pair(self) -> tuple[str, str]:
|
|
172
|
+
"""The pair of namespace/identifier."""
|
|
174
173
|
return self.reference.pair
|
|
175
174
|
|
|
176
175
|
@property
|