pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- {pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
- pyobo/apps/__init__.py +0 -3
- pyobo/apps/cli.py +0 -24
- pyobo/apps/gilda/__init__.py +0 -3
- pyobo/apps/gilda/__main__.py +0 -8
- pyobo/apps/gilda/app.py +0 -48
- pyobo/apps/gilda/cli.py +0 -36
- pyobo/apps/gilda/templates/base.html +0 -33
- pyobo/apps/gilda/templates/home.html +0 -11
- pyobo/apps/gilda/templates/matches.html +0 -32
- pyobo/apps/mapper/__init__.py +0 -3
- pyobo/apps/mapper/__main__.py +0 -11
- pyobo/apps/mapper/cli.py +0 -37
- pyobo/apps/mapper/mapper.py +0 -187
- pyobo/apps/mapper/templates/base.html +0 -35
- pyobo/apps/mapper/templates/mapper_home.html +0 -64
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.1.dist-info/RECORD +0 -173
- pyobo-0.11.1.dist-info/WHEEL +0 -5
- pyobo-0.11.1.dist-info/top_level.txt +0 -1
pyobo/sources/reactome.py
CHANGED
|
@@ -11,7 +11,7 @@ from tqdm.auto import tqdm
|
|
|
11
11
|
from ..api import get_id_multirelations_mapping
|
|
12
12
|
from ..constants import SPECIES_REMAPPING
|
|
13
13
|
from ..resources.ncbitaxon import get_ncbitaxon_id
|
|
14
|
-
from ..struct import Obo, Reference, Term, from_species, has_participant
|
|
14
|
+
from ..struct import Obo, Reference, Term, from_species, has_citation, has_participant
|
|
15
15
|
from ..utils.io import multidict
|
|
16
16
|
from ..utils.path import ensure_df
|
|
17
17
|
|
|
@@ -31,18 +31,13 @@ class ReactomeGetter(Obo):
|
|
|
31
31
|
"""An ontology representation of the Reactome pathway database."""
|
|
32
32
|
|
|
33
33
|
ontology = bioversions_key = PREFIX
|
|
34
|
-
typedefs = [from_species, has_participant]
|
|
34
|
+
typedefs = [from_species, has_participant, has_citation]
|
|
35
35
|
|
|
36
36
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
37
37
|
"""Iterate over terms in the ontology."""
|
|
38
38
|
return iter_terms(version=self._version_or_raise, force=force)
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
def get_obo(force: bool = False) -> Obo:
|
|
42
|
-
"""Get Reactome OBO."""
|
|
43
|
-
return ReactomeGetter(force=force)
|
|
44
|
-
|
|
45
|
-
|
|
46
41
|
def ensure_participant_df(version: str, force: bool = False) -> pd.DataFrame:
|
|
47
42
|
"""Get the pathway uniprot participant dataframe."""
|
|
48
43
|
uniprot_pathway_url = f"https://reactome.org/download/{version}/UniProt2Reactome_All_Levels.txt"
|
|
@@ -76,11 +71,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
76
71
|
for reactome_id, name, species_name, taxonomy_id in it:
|
|
77
72
|
terms[reactome_id] = term = Term(
|
|
78
73
|
reference=Reference(prefix=PREFIX, identifier=reactome_id, name=name),
|
|
79
|
-
provenance=[
|
|
80
|
-
Reference(prefix="pubmed", identifier=pubmed_id)
|
|
81
|
-
for pubmed_id in provenance_d.get(reactome_id, [])
|
|
82
|
-
],
|
|
83
74
|
)
|
|
75
|
+
for pubmed_id in provenance_d.get(reactome_id, []):
|
|
76
|
+
term.append_provenance(Reference(prefix="pubmed", identifier=pubmed_id))
|
|
77
|
+
|
|
84
78
|
if not taxonomy_id or pd.isna(taxonomy_id):
|
|
85
79
|
raise ValueError(f"unmapped species: {species_name}")
|
|
86
80
|
|
pyobo/sources/rgd.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from collections.abc import Iterable
|
|
5
|
-
from typing import Optional
|
|
6
5
|
|
|
7
6
|
import pandas as pd
|
|
8
7
|
from tqdm.auto import tqdm
|
|
@@ -10,10 +9,11 @@ from tqdm.auto import tqdm
|
|
|
10
9
|
from pyobo.struct import (
|
|
11
10
|
Obo,
|
|
12
11
|
Reference,
|
|
13
|
-
Synonym,
|
|
14
12
|
SynonymTypeDef,
|
|
15
13
|
Term,
|
|
14
|
+
default_reference,
|
|
16
15
|
from_species,
|
|
16
|
+
has_citation,
|
|
17
17
|
has_gene_product,
|
|
18
18
|
transcribes_to,
|
|
19
19
|
)
|
|
@@ -22,8 +22,8 @@ from pyobo.utils.path import ensure_df
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
23
23
|
PREFIX = "rgd"
|
|
24
24
|
|
|
25
|
-
old_symbol_type = SynonymTypeDef
|
|
26
|
-
old_name_type = SynonymTypeDef
|
|
25
|
+
old_symbol_type = SynonymTypeDef(reference=default_reference(PREFIX, "old_symbol"))
|
|
26
|
+
old_name_type = SynonymTypeDef(reference=default_reference(PREFIX, "old_name"))
|
|
27
27
|
|
|
28
28
|
# NOTE unigene id was discontinue in January 18th, 2021 dump
|
|
29
29
|
|
|
@@ -73,7 +73,7 @@ class RGDGetter(Obo):
|
|
|
73
73
|
"""An ontology representation of RGD's rat gene nomenclature."""
|
|
74
74
|
|
|
75
75
|
bioversions_key = ontology = PREFIX
|
|
76
|
-
typedefs = [from_species, transcribes_to, has_gene_product]
|
|
76
|
+
typedefs = [from_species, transcribes_to, has_gene_product, has_citation]
|
|
77
77
|
synonym_typedefs = [old_name_type, old_symbol_type]
|
|
78
78
|
|
|
79
79
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
@@ -81,11 +81,6 @@ class RGDGetter(Obo):
|
|
|
81
81
|
return get_terms(force=force, version=self._version_or_raise)
|
|
82
82
|
|
|
83
83
|
|
|
84
|
-
def get_obo(force: bool = False) -> Obo:
|
|
85
|
-
"""Get RGD as OBO."""
|
|
86
|
-
return RGDGetter(force=force)
|
|
87
|
-
|
|
88
|
-
|
|
89
84
|
namespace_to_column = [
|
|
90
85
|
("ensembl", "ENSEMBL_ID"),
|
|
91
86
|
("uniprot", "UNIPROT_ID"),
|
|
@@ -93,7 +88,7 @@ namespace_to_column = [
|
|
|
93
88
|
]
|
|
94
89
|
|
|
95
90
|
|
|
96
|
-
def get_terms(force: bool = False, version:
|
|
91
|
+
def get_terms(force: bool = False, version: str | None = None) -> Iterable[Term]:
|
|
97
92
|
"""Get RGD terms."""
|
|
98
93
|
df = ensure_df(
|
|
99
94
|
PREFIX,
|
|
@@ -124,11 +119,11 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
|
|
|
124
119
|
old_names = row["OLD_NAME"]
|
|
125
120
|
if old_names and pd.notna(old_names):
|
|
126
121
|
for old_name in old_names.split(";"):
|
|
127
|
-
term.append_synonym(
|
|
122
|
+
term.append_synonym(old_name, type=old_name_type)
|
|
128
123
|
old_symbols = row["OLD_SYMBOL"]
|
|
129
124
|
if old_symbols and pd.notna(old_symbols):
|
|
130
125
|
for old_symbol in old_symbols.split(";"):
|
|
131
|
-
term.append_synonym(
|
|
126
|
+
term.append_synonym(old_symbol, type=old_symbol_type)
|
|
132
127
|
for prefix, key in namespace_to_column:
|
|
133
128
|
xref_ids = str(row[key])
|
|
134
129
|
if xref_ids and pd.notna(xref_ids):
|
|
@@ -136,7 +131,7 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
|
|
|
136
131
|
if xref_id == "nan":
|
|
137
132
|
continue
|
|
138
133
|
if prefix == "uniprot":
|
|
139
|
-
term.
|
|
134
|
+
term.annotate_object(
|
|
140
135
|
has_gene_product, Reference(prefix=prefix, identifier=xref_id)
|
|
141
136
|
)
|
|
142
137
|
elif prefix == "ensembl":
|
|
@@ -144,11 +139,11 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
|
|
|
144
139
|
# second one is reverse strand
|
|
145
140
|
term.append_xref(Reference(prefix=prefix, identifier=xref_id))
|
|
146
141
|
elif xref_id.startswith("ENSMUST"):
|
|
147
|
-
term.
|
|
142
|
+
term.annotate_object(
|
|
148
143
|
transcribes_to, Reference(prefix=prefix, identifier=xref_id)
|
|
149
144
|
)
|
|
150
145
|
elif xref_id.startswith("ENSMUSP"):
|
|
151
|
-
term.
|
|
146
|
+
term.annotate_object(
|
|
152
147
|
has_gene_product, Reference(prefix=prefix, identifier=xref_id)
|
|
153
148
|
)
|
|
154
149
|
else:
|
pyobo/sources/rhea.py
CHANGED
|
@@ -2,23 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from collections.abc import Iterable
|
|
5
|
-
from typing import TYPE_CHECKING,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
6
6
|
|
|
7
7
|
import pystow
|
|
8
8
|
|
|
9
9
|
from pyobo.api.utils import get_version
|
|
10
|
-
from pyobo.struct import Obo, Reference, Term
|
|
11
|
-
from pyobo.struct
|
|
12
|
-
TypeDef,
|
|
13
|
-
enabled_by,
|
|
14
|
-
has_bidirectional_reaction,
|
|
15
|
-
has_input,
|
|
16
|
-
has_left_to_right_reaction,
|
|
17
|
-
has_output,
|
|
18
|
-
has_participant,
|
|
19
|
-
has_right_to_left_reaction,
|
|
20
|
-
reaction_enabled_by_molecular_function,
|
|
21
|
-
)
|
|
10
|
+
from pyobo.struct import Obo, Reference, Term, TypeDef
|
|
11
|
+
from pyobo.struct import typedef as v
|
|
22
12
|
from pyobo.utils.path import ensure_df
|
|
23
13
|
|
|
24
14
|
if TYPE_CHECKING:
|
|
@@ -32,6 +22,16 @@ logger = logging.getLogger(__name__)
|
|
|
32
22
|
PREFIX = "rhea"
|
|
33
23
|
RHEA_RDF_GZ_URL = "ftp://ftp.expasy.org/databases/rhea/rdf/rhea.rdf.gz"
|
|
34
24
|
|
|
25
|
+
has_left_to_right_reaction = TypeDef.default(
|
|
26
|
+
PREFIX, "hasLeftToRightReaction", name="has left to right reaction", is_metadata_tag=True
|
|
27
|
+
).append_xref(v.has_left_to_right_reaction)
|
|
28
|
+
has_right_to_left_reaction = TypeDef.default(
|
|
29
|
+
PREFIX, "hasRightToLeftReaction", name="has right to left reaction", is_metadata_tag=True
|
|
30
|
+
).append_xref(v.has_right_to_left_reaction)
|
|
31
|
+
has_bidirectional_reaction = TypeDef.default(
|
|
32
|
+
PREFIX, "hasBidirectionalReaction", name="has bidirectional reaction", is_metadata_tag=True
|
|
33
|
+
).append_xref(v.has_bidirectional_reaction)
|
|
34
|
+
|
|
35
35
|
|
|
36
36
|
class RheaGetter(Obo):
|
|
37
37
|
"""An ontology representation of Rhea's chemical reaction database."""
|
|
@@ -41,11 +41,11 @@ class RheaGetter(Obo):
|
|
|
41
41
|
has_left_to_right_reaction,
|
|
42
42
|
has_bidirectional_reaction,
|
|
43
43
|
has_right_to_left_reaction,
|
|
44
|
-
enabled_by,
|
|
45
|
-
has_input,
|
|
46
|
-
has_output,
|
|
47
|
-
has_participant,
|
|
48
|
-
reaction_enabled_by_molecular_function,
|
|
44
|
+
v.enabled_by,
|
|
45
|
+
v.has_input,
|
|
46
|
+
v.has_output,
|
|
47
|
+
v.has_participant,
|
|
48
|
+
v.reaction_enabled_by_molecular_function,
|
|
49
49
|
]
|
|
50
50
|
|
|
51
51
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
@@ -53,16 +53,13 @@ class RheaGetter(Obo):
|
|
|
53
53
|
return iter_terms(version=self._version_or_raise, force=force)
|
|
54
54
|
|
|
55
55
|
|
|
56
|
-
def
|
|
57
|
-
"""Get Rhea as OBO."""
|
|
58
|
-
return RheaGetter(force=force)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdflib.Graph":
|
|
56
|
+
def ensure_rhea_rdf(version: str | None = None, force: bool = False) -> "rdflib.Graph":
|
|
62
57
|
"""Get the Rhea RDF graph."""
|
|
63
58
|
# see docs: https://ftp.expasy.org/databases/rhea/rdf/rhea_rdf_documentation.pdf
|
|
64
59
|
if version is None:
|
|
65
60
|
version = get_version(PREFIX)
|
|
61
|
+
if version is None:
|
|
62
|
+
raise ValueError
|
|
66
63
|
return pystow.ensure_rdf(
|
|
67
64
|
"pyobo",
|
|
68
65
|
"raw",
|
|
@@ -100,7 +97,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
100
97
|
}
|
|
101
98
|
"""
|
|
102
99
|
)
|
|
103
|
-
names = {
|
|
100
|
+
names = {
|
|
101
|
+
str(identifier): str(name)
|
|
102
|
+
for _, identifier, name in cast(Iterable[tuple[Any, str, str]], result)
|
|
103
|
+
}
|
|
104
104
|
|
|
105
105
|
terms: dict[str, Term] = {}
|
|
106
106
|
master_to_left: dict[str, str] = {}
|
|
@@ -145,8 +145,9 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
145
145
|
?compound rh:chebi|rh:underlyingChebi|(rh:reactivePart/rh:chebi) ?chebi .
|
|
146
146
|
}
|
|
147
147
|
"""
|
|
148
|
-
|
|
149
|
-
|
|
148
|
+
results = cast(Iterable[tuple[int, str, str]], graph.query(sparql))
|
|
149
|
+
for master_rhea_id_int, side_uri, chebi_uri in results:
|
|
150
|
+
master_rhea_id = str(master_rhea_id_int)
|
|
150
151
|
chebi_reference = Reference(
|
|
151
152
|
prefix="chebi", identifier=chebi_uri[len("http://purl.obolibrary.org/obo/CHEBI_") :]
|
|
152
153
|
)
|
|
@@ -159,10 +160,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
159
160
|
right_rhea_id = master_to_left[master_rhea_id]
|
|
160
161
|
else:
|
|
161
162
|
raise ValueError(f"Invalid side: {side_uri}")
|
|
162
|
-
terms[master_rhea_id].
|
|
163
|
-
terms[master_to_bi[master_rhea_id]].
|
|
164
|
-
terms[left_rhea_id].append_relationship(has_input, chebi_reference)
|
|
165
|
-
terms[right_rhea_id].append_relationship(has_output, chebi_reference)
|
|
163
|
+
terms[master_rhea_id].annotate_object(v.has_participant, chebi_reference)
|
|
164
|
+
terms[master_to_bi[master_rhea_id]].annotate_object(v.has_participant, chebi_reference)
|
|
165
|
+
terms[left_rhea_id].append_relationship(v.has_input, chebi_reference)
|
|
166
|
+
terms[right_rhea_id].append_relationship(v.has_output, chebi_reference)
|
|
166
167
|
|
|
167
168
|
hierarchy = ensure_df(
|
|
168
169
|
PREFIX,
|
|
@@ -181,8 +182,8 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
181
182
|
("reactome", "rhea2reactome", None),
|
|
182
183
|
("macie", "rhea2macie", None),
|
|
183
184
|
("metacyc", "rhea2metacyc", None),
|
|
184
|
-
("go", "rhea2go", reaction_enabled_by_molecular_function),
|
|
185
|
-
("uniprot", "rhea2uniprot", enabled_by),
|
|
185
|
+
("go", "rhea2go", v.reaction_enabled_by_molecular_function),
|
|
186
|
+
("uniprot", "rhea2uniprot", v.enabled_by),
|
|
186
187
|
]:
|
|
187
188
|
xref_df = ensure_df(
|
|
188
189
|
PREFIX,
|
|
@@ -202,7 +203,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
202
203
|
)
|
|
203
204
|
continue
|
|
204
205
|
target_reference = Reference(prefix=xref_prefix, identifier=xref_id)
|
|
205
|
-
if
|
|
206
|
+
if relation is not None:
|
|
206
207
|
terms[directional_rhea_id].append_relationship(relation, target_reference)
|
|
207
208
|
else:
|
|
208
209
|
terms[directional_rhea_id].append_xref(target_reference)
|
|
@@ -223,11 +224,11 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
223
224
|
_iubmb,
|
|
224
225
|
) in ec_df.values:
|
|
225
226
|
terms[directional_rhea_id].append_relationship(
|
|
226
|
-
enabled_by, Reference(prefix="
|
|
227
|
+
v.enabled_by, Reference(prefix="ec", identifier=ec)
|
|
227
228
|
)
|
|
228
229
|
|
|
229
230
|
yield from terms.values()
|
|
230
231
|
|
|
231
232
|
|
|
232
233
|
if __name__ == "__main__":
|
|
233
|
-
RheaGetter
|
|
234
|
+
RheaGetter.cli(["--owl"])
|
pyobo/sources/ror.py
CHANGED
|
@@ -3,16 +3,18 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
|
+
import logging
|
|
6
7
|
import zipfile
|
|
7
8
|
from collections.abc import Iterable
|
|
8
9
|
from typing import Any
|
|
9
10
|
|
|
10
11
|
import bioregistry
|
|
11
12
|
import zenodo_client
|
|
13
|
+
from pydantic import ValidationError
|
|
12
14
|
from tqdm.auto import tqdm
|
|
13
15
|
|
|
14
16
|
from pyobo.struct import Obo, Reference, Term
|
|
15
|
-
from pyobo.struct.struct import acronym
|
|
17
|
+
from pyobo.struct.struct import CHARLIE_TERM, HUMAN_TERM, PYOBO_INJECTED, acronym
|
|
16
18
|
from pyobo.struct.typedef import (
|
|
17
19
|
has_homepage,
|
|
18
20
|
has_part,
|
|
@@ -23,11 +25,13 @@ from pyobo.struct.typedef import (
|
|
|
23
25
|
see_also,
|
|
24
26
|
)
|
|
25
27
|
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
26
29
|
PREFIX = "ror"
|
|
27
30
|
ROR_ZENODO_RECORD_ID = "10086202"
|
|
28
31
|
|
|
29
32
|
# Constants
|
|
30
|
-
ORG_CLASS = Reference(prefix="OBI", identifier="0000245")
|
|
33
|
+
ORG_CLASS = Reference(prefix="OBI", identifier="0000245", name="organization")
|
|
34
|
+
CITY_CLASS = Reference(prefix="ENVO", identifier="00000856", name="city")
|
|
31
35
|
|
|
32
36
|
RMAP = {
|
|
33
37
|
"Related": see_also,
|
|
@@ -52,16 +56,7 @@ class RORGetter(Obo):
|
|
|
52
56
|
ontology = bioregistry_key = PREFIX
|
|
53
57
|
typedefs = [has_homepage, *RMAP.values()]
|
|
54
58
|
synonym_typedefs = [acronym]
|
|
55
|
-
|
|
56
|
-
"ror": "https://ror.org/",
|
|
57
|
-
"geonames": "https://www.geonames.org/",
|
|
58
|
-
"ENVO": "http://purl.obolibrary.org/obo/ENVO_",
|
|
59
|
-
"BFO": "http://purl.obolibrary.org/obo/BFO_",
|
|
60
|
-
"RO": "http://purl.obolibrary.org/obo/RO_",
|
|
61
|
-
"OBI": "http://purl.obolibrary.org/obo/OBI_",
|
|
62
|
-
"OMO": "http://purl.obolibrary.org/obo/OMO_",
|
|
63
|
-
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
|
64
|
-
}
|
|
59
|
+
root_terms = [CITY_CLASS, ORG_CLASS]
|
|
65
60
|
|
|
66
61
|
def __post_init__(self):
|
|
67
62
|
self.data_version, _url, _path = _get_info()
|
|
@@ -69,26 +64,40 @@ class RORGetter(Obo):
|
|
|
69
64
|
|
|
70
65
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
71
66
|
"""Iterate over terms in the ontology."""
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
"
|
|
82
|
-
"
|
|
67
|
+
yield CHARLIE_TERM
|
|
68
|
+
yield HUMAN_TERM
|
|
69
|
+
yield Term(reference=ORG_CLASS)
|
|
70
|
+
yield Term(reference=CITY_CLASS)
|
|
71
|
+
yield from ROR_ORGANIZATION_TYPE_TO_OBI.values()
|
|
72
|
+
yield from iterate_ror_terms(force=force)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
ROR_ORGANIZATION_TYPE_TO_OBI: dict[str, Term] = {
|
|
76
|
+
"Education": Term.default(PREFIX, "education", "educational organization"),
|
|
77
|
+
"Facility": Term.default(PREFIX, "facility", "facility"),
|
|
78
|
+
"Company": Term.default(PREFIX, "company", "company"),
|
|
79
|
+
"Government": Term.default(PREFIX, "government", "government organization"),
|
|
80
|
+
"Healthcare": Term.default(PREFIX, "healthcare", "healthcare organization"),
|
|
81
|
+
"Archive": Term.default(PREFIX, "archive", "archival organization"),
|
|
82
|
+
"Nonprofit": Term.default(PREFIX, "healthcare", "nonprofit organization")
|
|
83
|
+
.append_xref(Reference(prefix="ICO", identifier="0000048"))
|
|
84
|
+
.append_xref(Reference(prefix="GSSO", identifier="004615")),
|
|
83
85
|
}
|
|
86
|
+
for _k, v in ROR_ORGANIZATION_TYPE_TO_OBI.items():
|
|
87
|
+
v.append_parent(ORG_CLASS)
|
|
88
|
+
v.append_contributor(CHARLIE_TERM)
|
|
89
|
+
v.append_comment(PYOBO_INJECTED)
|
|
90
|
+
|
|
84
91
|
_MISSED_ORG_TYPES: set[str] = set()
|
|
85
92
|
|
|
86
93
|
|
|
87
94
|
def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
|
|
88
95
|
"""Iterate over terms in ROR."""
|
|
89
|
-
|
|
90
|
-
unhandled_xref_prefixes = set()
|
|
91
|
-
|
|
96
|
+
_version, _source_uri, records = get_latest(force=force)
|
|
97
|
+
unhandled_xref_prefixes: set[str] = set()
|
|
98
|
+
|
|
99
|
+
seen_geonames_references = set()
|
|
100
|
+
for record in tqdm(records, unit_scale=True, unit="record", desc=f"{PREFIX} v{_version}"):
|
|
92
101
|
identifier = record["id"].removeprefix("https://ror.org/")
|
|
93
102
|
name = record["name"]
|
|
94
103
|
name = NAME_REMAPPING.get(name, name)
|
|
@@ -103,13 +112,14 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
|
|
|
103
112
|
type="Instance",
|
|
104
113
|
definition=description,
|
|
105
114
|
)
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
115
|
+
for organization_type in organization_types:
|
|
116
|
+
if organization_type == "Other":
|
|
117
|
+
term.append_parent(ORG_CLASS)
|
|
118
|
+
else:
|
|
119
|
+
term.append_parent(ROR_ORGANIZATION_TYPE_TO_OBI[organization_type])
|
|
110
120
|
|
|
111
121
|
for link in record.get("links", []):
|
|
112
|
-
term.
|
|
122
|
+
term.annotate_uri(has_homepage, link)
|
|
113
123
|
|
|
114
124
|
if name.startswith("The "):
|
|
115
125
|
term.append_synonym(name.removeprefix("The "))
|
|
@@ -120,23 +130,29 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
|
|
|
120
130
|
RMAP[relationship["type"]], Reference(prefix=PREFIX, identifier=target_id)
|
|
121
131
|
)
|
|
122
132
|
|
|
123
|
-
|
|
133
|
+
if record.get("status") != "active":
|
|
134
|
+
term.is_obsolete = True
|
|
124
135
|
|
|
125
136
|
for address in record.get("addresses", []):
|
|
126
137
|
city = address.get("geonames_city")
|
|
127
138
|
if not city:
|
|
128
139
|
continue
|
|
129
|
-
|
|
130
|
-
|
|
140
|
+
geonames_reference = Reference(
|
|
141
|
+
prefix="geonames", identifier=str(city["id"]), name=city["city"]
|
|
131
142
|
)
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
143
|
+
seen_geonames_references.add(geonames_reference)
|
|
144
|
+
term.append_relationship(RMAP["Located in"], geonames_reference)
|
|
145
|
+
|
|
146
|
+
for label_dict in record.get("labels", []):
|
|
147
|
+
label = label_dict["label"]
|
|
148
|
+
label = label.strip().replace("\n", " ")
|
|
149
|
+
language = label_dict["iso639"]
|
|
150
|
+
term.append_synonym(label, language=language)
|
|
136
151
|
if label.startswith("The "):
|
|
137
|
-
term.append_synonym(label.removeprefix("The "))
|
|
152
|
+
term.append_synonym(label.removeprefix("The "), language=language)
|
|
138
153
|
|
|
139
154
|
for synonym in record.get("aliases", []):
|
|
155
|
+
synonym = synonym.strip().replace("\n", " ")
|
|
140
156
|
term.append_synonym(synonym)
|
|
141
157
|
if synonym.startswith("The "):
|
|
142
158
|
term.append_synonym(synonym.removeprefix("The "))
|
|
@@ -162,10 +178,21 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
|
|
|
162
178
|
if isinstance(identifiers, str):
|
|
163
179
|
identifiers = [identifiers]
|
|
164
180
|
for xref_id in identifiers:
|
|
165
|
-
|
|
181
|
+
xref_id = xref_id.replace(" ", "")
|
|
182
|
+
try:
|
|
183
|
+
xref = Reference(prefix=norm_prefix, identifier=xref_id)
|
|
184
|
+
except ValidationError:
|
|
185
|
+
tqdm.write(f"[{term.curie}] invalid xref: {norm_prefix}:{xref_id}")
|
|
186
|
+
else:
|
|
187
|
+
term.append_xref(xref)
|
|
166
188
|
|
|
167
189
|
yield term
|
|
168
190
|
|
|
191
|
+
for geonames_ref in sorted(seen_geonames_references):
|
|
192
|
+
geonames_term = Term(reference=geonames_ref, type="Instance")
|
|
193
|
+
geonames_term.append_parent(CITY_CLASS)
|
|
194
|
+
yield geonames_term
|
|
195
|
+
|
|
169
196
|
|
|
170
197
|
def _get_info(*, force: bool = False):
|
|
171
198
|
client = zenodo_client.Zenodo()
|
|
@@ -193,7 +220,7 @@ def get_latest(*, force: bool = False):
|
|
|
193
220
|
|
|
194
221
|
def get_ror_to_country_geonames(**kwargs: Any) -> dict[str, str]:
|
|
195
222
|
"""Get a mapping of ROR ids to GeoNames IDs for countries."""
|
|
196
|
-
from pyobo.sources.geonames import get_city_to_country
|
|
223
|
+
from pyobo.sources.geonames.geonames import get_city_to_country
|
|
197
224
|
|
|
198
225
|
city_to_country = get_city_to_country()
|
|
199
226
|
rv = {}
|
|
@@ -207,4 +234,4 @@ def get_ror_to_country_geonames(**kwargs: Any) -> dict[str, str]:
|
|
|
207
234
|
|
|
208
235
|
|
|
209
236
|
if __name__ == "__main__":
|
|
210
|
-
RORGetter
|
|
237
|
+
RORGetter.cli()
|
|
File without changes
|
pyobo/sources/selventa/schem.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""Selventa chemicals.
|
|
2
2
|
|
|
3
|
-
.. seealso::
|
|
3
|
+
.. seealso::
|
|
4
|
+
|
|
5
|
+
https://github.com/pyobo/pyobo/issues/27
|
|
4
6
|
"""
|
|
5
7
|
|
|
6
8
|
from collections.abc import Iterable
|
|
@@ -29,11 +31,6 @@ class SCHEMGetter(Obo):
|
|
|
29
31
|
return iter_terms(force=force)
|
|
30
32
|
|
|
31
33
|
|
|
32
|
-
def get_obo(*, force: bool = False) -> Obo:
|
|
33
|
-
"""Get Selventa chemical as OBO."""
|
|
34
|
-
return SCHEMGetter(force=force)
|
|
35
|
-
|
|
36
|
-
|
|
37
34
|
def iter_terms(force: bool = False) -> Iterable[Term]:
|
|
38
35
|
"""Iterate over selventa chemical terms."""
|
|
39
36
|
df = ensure_df(PREFIX, url=URL, skiprows=8, force=force)
|
|
@@ -45,4 +42,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
|
|
|
45
42
|
|
|
46
43
|
|
|
47
44
|
if __name__ == "__main__":
|
|
48
|
-
|
|
45
|
+
SCHEMGetter.cli()
|
pyobo/sources/selventa/scomp.py
CHANGED
|
@@ -26,11 +26,6 @@ class SCOMPGetter(Obo):
|
|
|
26
26
|
return iter_terms(force=force)
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
def get_obo(*, force: bool = False) -> Obo:
|
|
30
|
-
"""Get Selventa Complexes as OBO."""
|
|
31
|
-
return SCOMPGetter(force=force)
|
|
32
|
-
|
|
33
|
-
|
|
34
29
|
def iter_terms(force: bool = False) -> Iterable[Term]:
|
|
35
30
|
"""Iterate over selventa complex terms."""
|
|
36
31
|
df = ensure_df(PREFIX, url=URL, skiprows=9, force=force)
|
|
@@ -54,4 +49,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
|
|
|
54
49
|
|
|
55
50
|
|
|
56
51
|
if __name__ == "__main__":
|
|
57
|
-
|
|
52
|
+
SCOMPGetter.cli()
|
pyobo/sources/selventa/sdis.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""Selventa diseases.
|
|
2
2
|
|
|
3
|
-
.. seealso::
|
|
3
|
+
.. seealso::
|
|
4
|
+
|
|
5
|
+
https://github.com/pyobo/pyobo/issues/26
|
|
4
6
|
"""
|
|
5
7
|
|
|
6
8
|
from collections.abc import Iterable
|
|
@@ -29,11 +31,6 @@ class SDISGetter(Obo):
|
|
|
29
31
|
return iter_terms(force=force)
|
|
30
32
|
|
|
31
33
|
|
|
32
|
-
def get_obo(*, force: bool = False) -> Obo:
|
|
33
|
-
"""Get Selventa Diseases as OBO."""
|
|
34
|
-
return SDISGetter(force=force)
|
|
35
|
-
|
|
36
|
-
|
|
37
34
|
def iter_terms(force: bool = False) -> Iterable[Term]:
|
|
38
35
|
"""Iterate over selventa disease terms."""
|
|
39
36
|
df = ensure_df(PREFIX, url=URL, skiprows=9, force=force)
|
|
@@ -48,4 +45,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
|
|
|
48
45
|
|
|
49
46
|
|
|
50
47
|
if __name__ == "__main__":
|
|
51
|
-
|
|
48
|
+
SDISGetter.cli()
|
pyobo/sources/selventa/sfam.py
CHANGED
|
@@ -26,11 +26,6 @@ class SFAMGetter(Obo):
|
|
|
26
26
|
return iter_terms(force=force)
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
def get_obo(*, force: bool = False) -> Obo:
|
|
30
|
-
"""Get Selventa Families as OBO."""
|
|
31
|
-
return SFAMGetter(force=force)
|
|
32
|
-
|
|
33
|
-
|
|
34
29
|
def iter_terms(force: bool = False) -> Iterable[Term]:
|
|
35
30
|
"""Iterate over selventa family terms."""
|
|
36
31
|
df = ensure_df(PREFIX, url=URL, skiprows=9, force=force)
|
|
@@ -52,4 +47,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
|
|
|
52
47
|
|
|
53
48
|
|
|
54
49
|
if __name__ == "__main__":
|
|
55
|
-
|
|
50
|
+
SFAMGetter.cli()
|
pyobo/sources/sgd.py
CHANGED
|
@@ -3,8 +3,10 @@
|
|
|
3
3
|
from collections.abc import Iterable
|
|
4
4
|
from urllib.parse import unquote_plus
|
|
5
5
|
|
|
6
|
+
from pystow.utils import read_tarfile_csv
|
|
7
|
+
|
|
6
8
|
from ..struct import Obo, Reference, Synonym, Term, from_species
|
|
7
|
-
from ..utils.path import
|
|
9
|
+
from ..utils.path import ensure_path
|
|
8
10
|
|
|
9
11
|
__all__ = [
|
|
10
12
|
"SGDGetter",
|
|
@@ -31,24 +33,17 @@ class SGDGetter(Obo):
|
|
|
31
33
|
yield from get_terms(self, force=force)
|
|
32
34
|
|
|
33
35
|
|
|
34
|
-
def get_obo(force: bool = False) -> Obo:
|
|
35
|
-
"""Get SGD as OBO."""
|
|
36
|
-
return SGDGetter(force=force)
|
|
37
|
-
|
|
38
|
-
|
|
39
36
|
def get_terms(ontology: Obo, force: bool = False) -> Iterable[Term]:
|
|
40
37
|
"""Get SGD terms."""
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
path = ensure_path(PREFIX, url=URL, version=ontology._version_or_raise, force=force)
|
|
39
|
+
df = read_tarfile_csv(
|
|
40
|
+
path,
|
|
44
41
|
inner_path=INNER_PATH,
|
|
45
42
|
sep="\t",
|
|
46
43
|
skiprows=18,
|
|
47
44
|
header=None,
|
|
48
45
|
names=HEADER,
|
|
49
|
-
force=force,
|
|
50
46
|
dtype=str,
|
|
51
|
-
version=ontology._version_or_raise,
|
|
52
47
|
)
|
|
53
48
|
df = df[df["feature"] == "gene"]
|
|
54
49
|
for data in df["data"]:
|