pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/sources/complexportal.py
CHANGED
|
@@ -7,7 +7,16 @@ import pandas as pd
|
|
|
7
7
|
from tqdm.auto import tqdm
|
|
8
8
|
|
|
9
9
|
from pyobo.resources.ncbitaxon import get_ncbitaxon_name
|
|
10
|
-
from pyobo.struct import
|
|
10
|
+
from pyobo.struct import (
|
|
11
|
+
Obo,
|
|
12
|
+
Reference,
|
|
13
|
+
Synonym,
|
|
14
|
+
Term,
|
|
15
|
+
_parse_str_or_curie_or_uri,
|
|
16
|
+
from_species,
|
|
17
|
+
has_citation,
|
|
18
|
+
has_part,
|
|
19
|
+
)
|
|
11
20
|
from pyobo.utils.path import ensure_df
|
|
12
21
|
|
|
13
22
|
__all__ = [
|
|
@@ -96,13 +105,14 @@ def _parse_xrefs(s) -> list[tuple[Reference, str]]:
|
|
|
96
105
|
xref = xref.replace("protein ontology:PR_", "PR:")
|
|
97
106
|
xref = xref.replace("rhea:rhea ", "rhea:")
|
|
98
107
|
xref = xref.replace("rhea:Rhea ", "rhea:")
|
|
108
|
+
xref = xref.replace("rhea:RHEA ", "rhea:")
|
|
99
109
|
xref = xref.replace("rhea:RHEA:rhea", "rhea:")
|
|
100
110
|
xref = xref.replace("rhea:RHEA: ", "rhea:")
|
|
101
111
|
xref = xref.replace("rhea:RHEA:rhea ", "rhea:")
|
|
102
112
|
xref = xref.replace("intenz:RHEA:", "rhea:")
|
|
103
|
-
xref = xref.replace("eccode::", "
|
|
104
|
-
xref = xref.replace("eccode:EC:", "
|
|
105
|
-
xref = xref.replace("intenz:EC:", "
|
|
113
|
+
xref = xref.replace("eccode::", "ec:")
|
|
114
|
+
xref = xref.replace("eccode:EC:", "ec:")
|
|
115
|
+
xref = xref.replace("intenz:EC:", "ec:")
|
|
106
116
|
xref = xref.replace("eccode:RHEA:", "rhea:")
|
|
107
117
|
xref = xref.replace("efo:MONDO:", "MONDO:")
|
|
108
118
|
xref = xref.replace("omim:MIM:", "omim:")
|
|
@@ -125,7 +135,7 @@ def _parse_xrefs(s) -> list[tuple[Reference, str]]:
|
|
|
125
135
|
xref_curie = _clean_intenz(xref_curie)
|
|
126
136
|
|
|
127
137
|
try:
|
|
128
|
-
reference =
|
|
138
|
+
reference = _parse_str_or_curie_or_uri(xref_curie)
|
|
129
139
|
except ValueError:
|
|
130
140
|
logger.warning("can not parse CURIE: %s", xref_curie)
|
|
131
141
|
continue
|
|
@@ -146,18 +156,13 @@ class ComplexPortalGetter(Obo):
|
|
|
146
156
|
"""An ontology representation of the Complex Portal."""
|
|
147
157
|
|
|
148
158
|
bioversions_key = ontology = PREFIX
|
|
149
|
-
typedefs = [from_species, has_part]
|
|
159
|
+
typedefs = [from_species, has_part, has_citation]
|
|
150
160
|
|
|
151
161
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
152
162
|
"""Iterate over terms in the ontology."""
|
|
153
163
|
return get_terms(version=self._version_or_raise)
|
|
154
164
|
|
|
155
165
|
|
|
156
|
-
def get_obo(force: bool = False) -> Obo:
|
|
157
|
-
"""Get the ComplexPortal OBO."""
|
|
158
|
-
return ComplexPortalGetter(force=force)
|
|
159
|
-
|
|
160
|
-
|
|
161
166
|
def get_df(version: str, force: bool = False) -> pd.DataFrame:
|
|
162
167
|
"""Get a combine ComplexPortal dataframe."""
|
|
163
168
|
url_base = f"ftp://ftp.ebi.ac.uk/pub/databases/intact/complex/{version}/complextab"
|
|
@@ -222,29 +227,24 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
222
227
|
taxonomy_name,
|
|
223
228
|
members,
|
|
224
229
|
) in it:
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
230
|
+
term = Term(
|
|
231
|
+
reference=Reference(prefix=PREFIX, identifier=complexportal_id, name=name),
|
|
232
|
+
definition=definition.strip() if pd.notna(definition) else None,
|
|
233
|
+
synonyms=[Synonym(name=alias) for alias in aliases],
|
|
234
|
+
)
|
|
228
235
|
for reference, note in xrefs:
|
|
229
236
|
if note == "identity":
|
|
230
|
-
|
|
237
|
+
term.append_xref(reference)
|
|
231
238
|
elif note == "see-also" and reference.prefix == "pubmed":
|
|
232
|
-
|
|
239
|
+
term.append_provenance(reference)
|
|
233
240
|
elif (note, reference.prefix) not in unhandled_xref_type:
|
|
234
241
|
logger.debug(f"unhandled xref type: {note} / {reference.prefix}")
|
|
235
242
|
unhandled_xref_type.add((note, reference.prefix))
|
|
236
243
|
|
|
237
|
-
term = Term(
|
|
238
|
-
reference=Reference(prefix=PREFIX, identifier=complexportal_id, name=name),
|
|
239
|
-
definition=definition.strip() if pd.notna(definition) else None,
|
|
240
|
-
synonyms=synonyms,
|
|
241
|
-
xrefs=_xrefs,
|
|
242
|
-
provenance=provenance,
|
|
243
|
-
)
|
|
244
244
|
term.set_species(identifier=taxonomy_id, name=taxonomy_name)
|
|
245
245
|
|
|
246
246
|
for reference, _count in members:
|
|
247
|
-
term.
|
|
247
|
+
term.annotate_object(has_part, reference)
|
|
248
248
|
|
|
249
249
|
yield term
|
|
250
250
|
|
pyobo/sources/conso.py
CHANGED
|
@@ -4,7 +4,7 @@ from collections.abc import Iterable
|
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
|
|
7
|
-
from ..struct import Obo, Reference, Synonym, Term
|
|
7
|
+
from ..struct import Obo, Reference, Synonym, Term, _parse_str_or_curie_or_uri, has_citation
|
|
8
8
|
from ..utils.io import multidict
|
|
9
9
|
from ..utils.path import ensure_df
|
|
10
10
|
|
|
@@ -25,36 +25,28 @@ class CONSOGetter(Obo):
|
|
|
25
25
|
|
|
26
26
|
ontology = PREFIX
|
|
27
27
|
dynamic_version = True
|
|
28
|
+
typedefs = [has_citation]
|
|
28
29
|
|
|
29
30
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
30
31
|
"""Iterate over terms in the ontology."""
|
|
31
32
|
return iter_terms()
|
|
32
33
|
|
|
33
34
|
|
|
34
|
-
def get_obo() -> Obo:
|
|
35
|
-
"""Get CONSO as OBO."""
|
|
36
|
-
return CONSOGetter()
|
|
37
|
-
|
|
38
|
-
|
|
39
35
|
def iter_terms() -> Iterable[Term]:
|
|
40
36
|
"""Get CONSO terms."""
|
|
41
37
|
terms_df = ensure_df(PREFIX, url=TERMS_URL)
|
|
42
38
|
|
|
43
39
|
synonyms_df = ensure_df(PREFIX, url=SYNONYMS_URL)
|
|
44
40
|
synonyms_df["reference"] = synonyms_df["reference"].map(
|
|
45
|
-
lambda s: [
|
|
41
|
+
lambda s: [_parse_str_or_curie_or_uri(s)] if pd.notna(s) and s != "?" else [],
|
|
46
42
|
)
|
|
47
|
-
synonyms_df["specificity"] = synonyms_df["specificity"].map(
|
|
48
|
-
lambda s: "EXACT" if pd.isna(s) or s == "?" else s
|
|
49
|
-
)
|
|
50
|
-
|
|
51
43
|
synonyms = multidict(
|
|
52
44
|
(
|
|
53
45
|
identifier,
|
|
54
46
|
Synonym(
|
|
55
47
|
name=synonym,
|
|
56
48
|
provenance=provenance,
|
|
57
|
-
specificity=specificity,
|
|
49
|
+
specificity=None if pd.isna(specificity) or specificity == "?" else specificity,
|
|
58
50
|
),
|
|
59
51
|
)
|
|
60
52
|
for identifier, synonym, provenance, specificity in synonyms_df.values
|
|
@@ -66,21 +58,21 @@ def iter_terms() -> Iterable[Term]:
|
|
|
66
58
|
for _, row in terms_df.iterrows():
|
|
67
59
|
if row["Name"] == "WITHDRAWN":
|
|
68
60
|
continue
|
|
69
|
-
|
|
70
|
-
for curie in row["References"].split(","):
|
|
71
|
-
curie = curie.strip()
|
|
72
|
-
if not curie:
|
|
73
|
-
continue
|
|
74
|
-
reference = Reference.from_curie(curie)
|
|
75
|
-
if reference is not None:
|
|
76
|
-
provenance.append(reference)
|
|
61
|
+
|
|
77
62
|
identifier = row["Identifier"]
|
|
78
|
-
|
|
63
|
+
term = Term(
|
|
79
64
|
reference=Reference(prefix=PREFIX, identifier=identifier, name=row["Name"]),
|
|
80
65
|
definition=row["Description"],
|
|
81
|
-
provenance=provenance,
|
|
82
66
|
synonyms=synonyms.get(identifier, []),
|
|
83
67
|
)
|
|
68
|
+
for curie in row["References"].split(","):
|
|
69
|
+
curie = curie.strip()
|
|
70
|
+
if not curie:
|
|
71
|
+
continue
|
|
72
|
+
reference = _parse_str_or_curie_or_uri(curie)
|
|
73
|
+
if reference is not None:
|
|
74
|
+
term.append_provenance(reference)
|
|
75
|
+
yield term
|
|
84
76
|
|
|
85
77
|
|
|
86
78
|
if __name__ == "__main__":
|
pyobo/sources/cpt.py
CHANGED
|
File without changes
|
pyobo/sources/credit.py
CHANGED
|
@@ -23,20 +23,12 @@ class CreditGetter(Obo):
|
|
|
23
23
|
|
|
24
24
|
ontology = PREFIX
|
|
25
25
|
static_version = "2022"
|
|
26
|
-
idspaces = {
|
|
27
|
-
PREFIX: "https://credit.niso.org/contributor-roles/",
|
|
28
|
-
}
|
|
29
26
|
|
|
30
27
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
31
28
|
"""Iterate over terms in the ontology."""
|
|
32
29
|
return get_terms(force=force)
|
|
33
30
|
|
|
34
31
|
|
|
35
|
-
def get_obo(force: bool = False) -> Obo:
|
|
36
|
-
"""Get RGD as OBO."""
|
|
37
|
-
return CreditGetter(force=force)
|
|
38
|
-
|
|
39
|
-
|
|
40
32
|
def get_terms(force: bool = False) -> list[Term]:
|
|
41
33
|
"""Get terms from the Contributor Roles Taxonomy via GitHub."""
|
|
42
34
|
path = ensure_path(PREFIX, url=url, name="picklist-api.json", force=force)
|
|
@@ -65,4 +57,4 @@ def get_terms(force: bool = False) -> list[Term]:
|
|
|
65
57
|
|
|
66
58
|
|
|
67
59
|
if __name__ == "__main__":
|
|
68
|
-
|
|
60
|
+
CreditGetter.cli()
|
pyobo/sources/cvx.py
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
"""Converter for CVX."""
|
|
2
2
|
|
|
3
|
+
import re
|
|
3
4
|
from collections import defaultdict
|
|
4
5
|
from collections.abc import Iterable
|
|
5
6
|
|
|
6
7
|
import pandas as pd
|
|
7
8
|
|
|
8
|
-
from pyobo import Obo, Reference, Term
|
|
9
|
+
from pyobo import Obo, Reference, Term, TypeDef, default_reference
|
|
10
|
+
from pyobo.struct.struct import acronym
|
|
9
11
|
|
|
10
12
|
__all__ = [
|
|
11
13
|
"CVXGetter",
|
|
@@ -13,6 +15,12 @@ __all__ = [
|
|
|
13
15
|
|
|
14
16
|
cvx_url = "https://www2a.cdc.gov/vaccines/iis/iisstandards/downloads/cvx.txt"
|
|
15
17
|
PREFIX = "cvx"
|
|
18
|
+
STATUS = TypeDef(
|
|
19
|
+
reference=default_reference(PREFIX, "status", name="has status"), is_metadata_tag=True
|
|
20
|
+
)
|
|
21
|
+
NONVACCINE = TypeDef(reference=default_reference(PREFIX, "nonvaccine"), is_metadata_tag=True)
|
|
22
|
+
|
|
23
|
+
ACRONYM_RE = re.compile("^[A-Z]+$")
|
|
16
24
|
|
|
17
25
|
|
|
18
26
|
class CVXGetter(Obo):
|
|
@@ -20,6 +28,8 @@ class CVXGetter(Obo):
|
|
|
20
28
|
|
|
21
29
|
ontology = PREFIX
|
|
22
30
|
dynamic_version = True
|
|
31
|
+
synonym_typedefs = [acronym]
|
|
32
|
+
typedefs = [STATUS, NONVACCINE]
|
|
23
33
|
|
|
24
34
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
25
35
|
"""Iterate over terms in the ontology."""
|
|
@@ -71,8 +81,20 @@ def iter_terms() -> Iterable[Term]:
|
|
|
71
81
|
reference=Reference(prefix=PREFIX, identifier=cvx, name=full_name),
|
|
72
82
|
is_obsolete=is_obsolete,
|
|
73
83
|
)
|
|
74
|
-
if
|
|
75
|
-
|
|
84
|
+
if (
|
|
85
|
+
short_name.casefold()
|
|
86
|
+
== full_name.casefold()
|
|
87
|
+
.replace("virus vaccine", "")
|
|
88
|
+
.replace("vaccine", "")
|
|
89
|
+
.replace(" ", " ")
|
|
90
|
+
.strip()
|
|
91
|
+
):
|
|
92
|
+
pass
|
|
93
|
+
elif short_name != full_name:
|
|
94
|
+
if ACRONYM_RE.match(short_name):
|
|
95
|
+
term.append_exact_synonym(short_name, type=acronym.reference)
|
|
96
|
+
else:
|
|
97
|
+
term.append_synonym(short_name)
|
|
76
98
|
if pd.notna(notes):
|
|
77
99
|
term.append_comment(notes)
|
|
78
100
|
if is_obsolete:
|
|
@@ -80,9 +102,9 @@ def iter_terms() -> Iterable[Term]:
|
|
|
80
102
|
if replacement_identifier:
|
|
81
103
|
term.append_replaced_by(Reference(prefix=PREFIX, identifier=replacement_identifier))
|
|
82
104
|
if pd.notna(status):
|
|
83
|
-
term.
|
|
105
|
+
term.annotate_string(STATUS, status)
|
|
84
106
|
if pd.notna(nonvaccine):
|
|
85
|
-
term.
|
|
107
|
+
term.annotate_boolean(NONVACCINE, nonvaccine)
|
|
86
108
|
terms[cvx] = term
|
|
87
109
|
|
|
88
110
|
for child, parents in dd.items():
|
pyobo/sources/depmap.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""DepMap cell lines."""
|
|
2
2
|
|
|
3
3
|
from collections.abc import Iterable
|
|
4
|
-
from typing import Optional
|
|
5
4
|
|
|
6
5
|
import pandas as pd
|
|
7
6
|
import pystow
|
|
@@ -10,7 +9,6 @@ from pyobo import Obo, Reference, Term
|
|
|
10
9
|
from pyobo.struct.typedef import exact_match
|
|
11
10
|
|
|
12
11
|
__all__ = [
|
|
13
|
-
"get_obo",
|
|
14
12
|
"DepMapGetter",
|
|
15
13
|
]
|
|
16
14
|
|
|
@@ -30,28 +28,24 @@ class DepMapGetter(Obo):
|
|
|
30
28
|
return iter_terms(version=self._version_or_raise, force=force)
|
|
31
29
|
|
|
32
30
|
|
|
33
|
-
def
|
|
34
|
-
"""Get DepMap cell lines as OBO."""
|
|
35
|
-
return DepMapGetter(force=force)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def get_url(version: Optional[str] = None) -> str:
|
|
31
|
+
def get_url(version: str | None = None) -> str:
|
|
39
32
|
"""Get the URL for the given version of the DepMap cell line metadata file.
|
|
40
33
|
|
|
41
34
|
:param version: The version of the data
|
|
35
|
+
|
|
42
36
|
:returns: The URL as a string for downloading the dat
|
|
43
37
|
|
|
44
38
|
.. warning::
|
|
45
39
|
|
|
46
|
-
This does not currently take the version into account. Need to write a crawler
|
|
47
|
-
to access.
|
|
40
|
+
This does not currently take the version into account. Need to write a crawler
|
|
41
|
+
since data is not easy to access.
|
|
48
42
|
"""
|
|
49
43
|
#: This is the DepMap Public 21Q2 version. There isn't a way to do this automatically without writing a crawler
|
|
50
44
|
url = "https://ndownloader.figshare.com/files/27902376"
|
|
51
45
|
return url
|
|
52
46
|
|
|
53
47
|
|
|
54
|
-
def _fix_mangled_int(x: str) ->
|
|
48
|
+
def _fix_mangled_int(x: str) -> str | None:
|
|
55
49
|
return str(int(float(x))) if pd.notna(x) else None
|
|
56
50
|
|
|
57
51
|
|
|
@@ -74,7 +68,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
74
68
|
columns
|
|
75
69
|
].values:
|
|
76
70
|
if pd.isna(name):
|
|
77
|
-
|
|
71
|
+
if pd.notna(sname):
|
|
72
|
+
name, sname = sname, None
|
|
73
|
+
else:
|
|
74
|
+
name = None
|
|
78
75
|
term = Term.from_triple(PREFIX, identifier, name)
|
|
79
76
|
if pd.notna(sname):
|
|
80
77
|
term.append_synonym(sname)
|
pyobo/sources/dictybase_gene.py
CHANGED
|
@@ -9,7 +9,7 @@ from collections.abc import Iterable
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
from tqdm.auto import tqdm
|
|
11
11
|
|
|
12
|
-
from pyobo.struct import Obo,
|
|
12
|
+
from pyobo.struct import Obo, Term, from_species, has_gene_product
|
|
13
13
|
from pyobo.utils.path import ensure_df
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
@@ -41,11 +41,6 @@ class DictybaseGetter(Obo):
|
|
|
41
41
|
return get_terms(force=force)
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
def get_obo(force: bool = False) -> Obo:
|
|
45
|
-
"""Get dictyBase Gene as OBO."""
|
|
46
|
-
return DictybaseGetter(force=force)
|
|
47
|
-
|
|
48
|
-
|
|
49
44
|
def get_terms(force: bool = False) -> Iterable[Term]:
|
|
50
45
|
"""Get terms."""
|
|
51
46
|
# TODO the mappings file has actually no uniprot at all, and requires text mining
|
|
@@ -67,7 +62,7 @@ def get_terms(force: bool = False) -> Iterable[Term]:
|
|
|
67
62
|
term.append_synonym(synonym.strip())
|
|
68
63
|
if synonyms and pd.notna(synonyms):
|
|
69
64
|
for synonym in synonyms.split(","):
|
|
70
|
-
term.append_synonym(
|
|
65
|
+
term.append_synonym(synonym.strip())
|
|
71
66
|
# for uniprot_id in uniprot_mappings.get(identifier, []):
|
|
72
67
|
# if not uniprot_id or pd.isna(uniprot_id) or uniprot_id in {"unknown", "pseudogene"}:
|
|
73
68
|
# continue
|
|
@@ -8,17 +8,17 @@ import itertools as itt
|
|
|
8
8
|
import logging
|
|
9
9
|
from collections.abc import Iterable, Mapping
|
|
10
10
|
from functools import lru_cache
|
|
11
|
-
from typing import Any
|
|
11
|
+
from typing import Any
|
|
12
12
|
from xml.etree import ElementTree
|
|
13
13
|
|
|
14
14
|
import pystow
|
|
15
15
|
from tqdm.auto import tqdm
|
|
16
16
|
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
17
|
+
from ...getters import NoBuildError
|
|
18
|
+
from ...struct import Obo, Reference, Term
|
|
19
|
+
from ...struct.typedef import has_inchi, has_salt, has_smiles
|
|
20
|
+
from ...utils.cache import cached_pickle
|
|
21
|
+
from ...utils.path import prefix_directory_join
|
|
22
22
|
|
|
23
23
|
__all__ = [
|
|
24
24
|
"DrugBankGetter",
|
|
@@ -40,11 +40,6 @@ class DrugBankGetter(Obo):
|
|
|
40
40
|
return iter_terms(version=self._version_or_raise, force=force)
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
def get_obo(force: bool = False) -> Obo:
|
|
44
|
-
"""Get DrugBank as OBO."""
|
|
45
|
-
return DrugBankGetter(force=force)
|
|
46
|
-
|
|
47
|
-
|
|
48
43
|
def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
49
44
|
"""Iterate over DrugBank terms in OBO."""
|
|
50
45
|
for drug_info in iterate_drug_info(version, force=force):
|
|
@@ -120,13 +115,13 @@ def _make_term(drug_info: Mapping[str, Any]) -> Term:
|
|
|
120
115
|
if identifier:
|
|
121
116
|
term.append_xref(Reference(prefix=xref_prefix, identifier=identifier))
|
|
122
117
|
|
|
123
|
-
for
|
|
124
|
-
identifier = drug_info.get(
|
|
118
|
+
for key, typedef_ in [("smiles", has_smiles), ("inchi", has_inchi)]:
|
|
119
|
+
identifier = drug_info.get(key)
|
|
125
120
|
if identifier:
|
|
126
|
-
term.
|
|
121
|
+
term.annotate_string(typedef_, identifier)
|
|
127
122
|
|
|
128
123
|
for salt in drug_info.get("salts", []):
|
|
129
|
-
term.
|
|
124
|
+
term.annotate_object(
|
|
130
125
|
has_salt,
|
|
131
126
|
Reference(
|
|
132
127
|
prefix="drugbank.salt",
|
|
@@ -139,7 +134,7 @@ def _make_term(drug_info: Mapping[str, Any]) -> Term:
|
|
|
139
134
|
|
|
140
135
|
|
|
141
136
|
@lru_cache
|
|
142
|
-
def get_xml_root(version:
|
|
137
|
+
def get_xml_root(version: str | None = None) -> ElementTree.Element:
|
|
143
138
|
"""Get the DrugBank XML parser root.
|
|
144
139
|
|
|
145
140
|
Takes between 35-60 seconds.
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
Run with ``python -m pyobo.sources.drugbank_salt``
|
|
4
4
|
|
|
5
|
-
Get relations between drugbank salts and drugbank parents with
|
|
6
|
-
|
|
5
|
+
Get relations between drugbank salts and drugbank parents with ``pyobo relations
|
|
6
|
+
drugbank --relation obo:has_salt`` or
|
|
7
7
|
|
|
8
8
|
.. code-block:: python
|
|
9
9
|
|
|
@@ -16,7 +16,7 @@ import logging
|
|
|
16
16
|
from collections.abc import Iterable
|
|
17
17
|
|
|
18
18
|
from .drugbank import iterate_drug_info
|
|
19
|
-
from
|
|
19
|
+
from ...struct import Obo, Reference, Term
|
|
20
20
|
|
|
21
21
|
__all__ = [
|
|
22
22
|
"DrugBankSaltGetter",
|
|
@@ -38,11 +38,6 @@ class DrugBankSaltGetter(Obo):
|
|
|
38
38
|
return iter_terms(version=self._version_or_raise, force=force)
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
def get_obo(force: bool = False) -> Obo:
|
|
42
|
-
"""Get DrugBank Salts as OBO."""
|
|
43
|
-
return DrugBankSaltGetter(force=force)
|
|
44
|
-
|
|
45
|
-
|
|
46
41
|
def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
47
42
|
"""Iterate over DrugBank Salt terms in OBO."""
|
|
48
43
|
for drug_info in iterate_drug_info(version, force=force):
|
pyobo/sources/drugcentral.py
CHANGED
|
@@ -7,6 +7,7 @@ from contextlib import closing
|
|
|
7
7
|
|
|
8
8
|
import bioregistry
|
|
9
9
|
import psycopg2
|
|
10
|
+
from pydantic import ValidationError
|
|
10
11
|
from tqdm.auto import tqdm
|
|
11
12
|
|
|
12
13
|
from pyobo.struct import Obo, Reference, Synonym, Term
|
|
@@ -32,18 +33,13 @@ class DrugCentralGetter(Obo):
|
|
|
32
33
|
"""An ontology representation of the DrugCentral database."""
|
|
33
34
|
|
|
34
35
|
ontology = bioversions_key = PREFIX
|
|
35
|
-
typedefs = [exact_match]
|
|
36
|
+
typedefs = [exact_match, has_inchi, has_smiles]
|
|
36
37
|
|
|
37
38
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
38
39
|
"""Iterate over terms in the ontology."""
|
|
39
40
|
return iter_terms()
|
|
40
41
|
|
|
41
42
|
|
|
42
|
-
def get_obo(force: bool = False) -> Obo:
|
|
43
|
-
"""Get DrugCentral OBO."""
|
|
44
|
-
return DrugCentralGetter(force=force)
|
|
45
|
-
|
|
46
|
-
|
|
47
43
|
def iter_terms() -> Iterable[Term]:
|
|
48
44
|
"""Iterate over DrugCentral terms."""
|
|
49
45
|
with closing(psycopg2.connect(**PARAMS)) as conn:
|
|
@@ -71,10 +67,18 @@ def iter_terms() -> Iterable[Term]:
|
|
|
71
67
|
if xref_prefix_norm == "pdb.ligand":
|
|
72
68
|
# there is a weird invalid escaped \W appearing in pdb ligand ids
|
|
73
69
|
identifier = identifier.strip()
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
Reference(prefix=xref_prefix_norm, identifier=identifier)
|
|
77
|
-
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
xref = Reference(prefix=xref_prefix_norm, identifier=identifier)
|
|
73
|
+
except ValidationError:
|
|
74
|
+
# TODO mmsl is systematically incorrect, figure this out
|
|
75
|
+
if xref_prefix_norm != "mmsl":
|
|
76
|
+
tqdm.write(
|
|
77
|
+
f"[drugcentral:{drugcentral_id}] had invalid xref: {prefix}:{identifier}"
|
|
78
|
+
)
|
|
79
|
+
continue
|
|
80
|
+
else:
|
|
81
|
+
xrefs[str(drugcentral_id)].append(xref)
|
|
78
82
|
with closing(conn.cursor()) as cur:
|
|
79
83
|
cur.execute("SELECT id, name FROM public.synonyms")
|
|
80
84
|
synonyms: defaultdict[str, list[Synonym]] = defaultdict(list)
|
|
@@ -85,16 +89,16 @@ def iter_terms() -> Iterable[Term]:
|
|
|
85
89
|
drugcentral_id = str(drugcentral_id)
|
|
86
90
|
term = Term(
|
|
87
91
|
reference=Reference(prefix=PREFIX, identifier=drugcentral_id, name=name),
|
|
88
|
-
definition=definition,
|
|
92
|
+
definition=definition.replace("\n", " ") if definition else None,
|
|
89
93
|
synonyms=synonyms.get(drugcentral_id, []),
|
|
90
94
|
xrefs=xrefs.get(drugcentral_id, []),
|
|
91
95
|
)
|
|
92
96
|
if inchi_key:
|
|
93
97
|
term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
|
|
94
98
|
if smiles:
|
|
95
|
-
term.
|
|
99
|
+
term.annotate_string(has_smiles, smiles)
|
|
96
100
|
if inchi:
|
|
97
|
-
term.
|
|
101
|
+
term.annotate_string(has_inchi, inchi)
|
|
98
102
|
if cas:
|
|
99
103
|
term.append_exact_match(Reference(prefix="cas", identifier=cas))
|
|
100
104
|
yield term
|