pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -113
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +108 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +183 -161
- pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +196 -118
- pyobo/gilda_utils.py +79 -200
- pyobo/identifier_utils/__init__.py +41 -0
- pyobo/identifier_utils/api.py +296 -0
- pyobo/identifier_utils/model.py +130 -0
- pyobo/identifier_utils/preprocessing.json +812 -0
- pyobo/identifier_utils/preprocessing.py +61 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +43 -39
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1358 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +0 -5
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +3 -8
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +10 -3
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +270 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1413 -643
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +13 -11
- pyobo/utils/io.py +17 -31
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +76 -70
- pyobo/version.py +3 -3
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
- pyobo-0.12.0.dist-info/RECORD +202 -0
- pyobo-0.12.0.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/sources/uniprot/uniprot.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
"""Converter for UniProt."""
|
|
2
2
|
|
|
3
3
|
from collections.abc import Iterable
|
|
4
|
-
from operator import attrgetter
|
|
5
4
|
from pathlib import Path
|
|
6
|
-
from typing import
|
|
5
|
+
from typing import cast
|
|
7
6
|
|
|
8
7
|
from tqdm.auto import tqdm
|
|
9
8
|
|
|
@@ -11,7 +10,17 @@ from pyobo import Obo, Reference
|
|
|
11
10
|
from pyobo.api.utils import get_version
|
|
12
11
|
from pyobo.constants import RAW_MODULE
|
|
13
12
|
from pyobo.identifier_utils import standardize_ec
|
|
14
|
-
from pyobo.struct import
|
|
13
|
+
from pyobo.struct import (
|
|
14
|
+
Term,
|
|
15
|
+
TypeDef,
|
|
16
|
+
_parse_str_or_curie_or_uri,
|
|
17
|
+
default_reference,
|
|
18
|
+
derives_from,
|
|
19
|
+
enables,
|
|
20
|
+
from_species,
|
|
21
|
+
has_citation,
|
|
22
|
+
participates_in,
|
|
23
|
+
)
|
|
15
24
|
from pyobo.struct.typedef import gene_product_of, located_in, molecularly_interacts_with
|
|
16
25
|
from pyobo.utils.io import open_reader
|
|
17
26
|
|
|
@@ -43,6 +52,7 @@ PARAMS = {
|
|
|
43
52
|
"query": QUERY,
|
|
44
53
|
"fields": FIELDS,
|
|
45
54
|
}
|
|
55
|
+
IS_REVIEWED = TypeDef(reference=default_reference(PREFIX, "reviewed"), is_metadata_tag=True)
|
|
46
56
|
|
|
47
57
|
|
|
48
58
|
class UniProtGetter(Obo):
|
|
@@ -57,6 +67,8 @@ class UniProtGetter(Obo):
|
|
|
57
67
|
molecularly_interacts_with,
|
|
58
68
|
derives_from,
|
|
59
69
|
located_in,
|
|
70
|
+
IS_REVIEWED,
|
|
71
|
+
has_citation,
|
|
60
72
|
]
|
|
61
73
|
|
|
62
74
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
@@ -64,12 +76,7 @@ class UniProtGetter(Obo):
|
|
|
64
76
|
yield from iter_terms(version=self._version_or_raise)
|
|
65
77
|
|
|
66
78
|
|
|
67
|
-
def
|
|
68
|
-
"""Get UniProt as OBO."""
|
|
69
|
-
return UniProtGetter(force=force)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
|
|
79
|
+
def iter_terms(version: str | None = None) -> Iterable[Term]:
|
|
73
80
|
"""Iterate over UniProt Terms."""
|
|
74
81
|
with open_reader(ensure(version=version)) as reader:
|
|
75
82
|
_ = next(reader) # header
|
|
@@ -89,7 +96,7 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
|
|
|
89
96
|
go_processes,
|
|
90
97
|
bindings,
|
|
91
98
|
description,
|
|
92
|
-
) in tqdm(reader, desc="
|
|
99
|
+
) in tqdm(reader, desc=f"[{PREFIX}] mapping", unit_scale=True):
|
|
93
100
|
if description:
|
|
94
101
|
description = description.removeprefix("FUNCTION: ")
|
|
95
102
|
term = Term(
|
|
@@ -99,19 +106,19 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
|
|
|
99
106
|
term.set_species(taxonomy_id)
|
|
100
107
|
if gene_ids:
|
|
101
108
|
for gene_id in gene_ids.split(";"):
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
109
|
+
if gene_id := gene_id.strip():
|
|
110
|
+
term.annotate_object(
|
|
111
|
+
gene_product_of, Reference(prefix="ncbigene", identifier=gene_id)
|
|
112
|
+
)
|
|
105
113
|
|
|
106
|
-
|
|
107
|
-
term.append_property("reviewed", "true")
|
|
114
|
+
term.annotate_boolean(IS_REVIEWED, True)
|
|
108
115
|
|
|
109
116
|
for go_process_ref in _parse_go(go_processes):
|
|
110
|
-
term.
|
|
117
|
+
term.annotate_object(participates_in, go_process_ref)
|
|
111
118
|
for go_function_ref in _parse_go(go_functions):
|
|
112
|
-
term.
|
|
119
|
+
term.annotate_object(enables, go_function_ref)
|
|
113
120
|
for go_component_ref in _parse_go(go_components):
|
|
114
|
-
term.
|
|
121
|
+
term.annotate_object(located_in, go_component_ref)
|
|
115
122
|
|
|
116
123
|
if proteome:
|
|
117
124
|
uniprot_proteome_id = proteome.split(":")[0]
|
|
@@ -122,11 +129,11 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
|
|
|
122
129
|
|
|
123
130
|
if rhea_curies:
|
|
124
131
|
for rhea_curie in rhea_curies.split(" "):
|
|
125
|
-
term.
|
|
132
|
+
term.annotate_object(
|
|
126
133
|
# FIXME this needs a different relation than enables
|
|
127
134
|
# see https://github.com/biopragmatics/pyobo/pull/168#issuecomment-1918680152
|
|
128
135
|
enables,
|
|
129
|
-
cast(Reference,
|
|
136
|
+
cast(Reference, _parse_str_or_curie_or_uri(rhea_curie, strict=True)),
|
|
130
137
|
)
|
|
131
138
|
|
|
132
139
|
if bindings:
|
|
@@ -136,22 +143,23 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
|
|
|
136
143
|
if part.startswith("/ligand_id"):
|
|
137
144
|
curie = part.removeprefix('/ligand_id="').rstrip('"')
|
|
138
145
|
binding_references.add(
|
|
139
|
-
cast(Reference,
|
|
146
|
+
cast(Reference, _parse_str_or_curie_or_uri(curie, strict=True))
|
|
140
147
|
)
|
|
141
|
-
for binding_reference in sorted(binding_references
|
|
142
|
-
term.
|
|
148
|
+
for binding_reference in sorted(binding_references):
|
|
149
|
+
term.annotate_object(molecularly_interacts_with, binding_reference)
|
|
143
150
|
|
|
144
151
|
if ecs:
|
|
145
152
|
for ec in ecs.split(";"):
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
153
|
+
if ec := ec.strip():
|
|
154
|
+
term.annotate_object(
|
|
155
|
+
enables, Reference(prefix="ec", identifier=standardize_ec(ec))
|
|
156
|
+
)
|
|
149
157
|
for pubmed in pubmeds.split(";"):
|
|
150
|
-
if pubmed:
|
|
151
|
-
term.append_provenance(Reference(prefix="pubmed", identifier=pubmed
|
|
158
|
+
if pubmed := pubmed.strip():
|
|
159
|
+
term.append_provenance(Reference(prefix="pubmed", identifier=pubmed))
|
|
152
160
|
for pdb in pdbs.split(";"):
|
|
153
|
-
if pdb:
|
|
154
|
-
term.append_xref(Reference(prefix="pdb", identifier=pdb
|
|
161
|
+
if pdb := pdb.strip():
|
|
162
|
+
term.append_xref(Reference(prefix="pdb", identifier=pdb))
|
|
155
163
|
yield term
|
|
156
164
|
|
|
157
165
|
|
|
@@ -164,10 +172,10 @@ def _parse_go(go_terms) -> list[Reference]:
|
|
|
164
172
|
return rv
|
|
165
173
|
|
|
166
174
|
|
|
167
|
-
def ensure(version:
|
|
175
|
+
def ensure(version: str | None = None, force: bool = False) -> Path:
|
|
168
176
|
"""Ensure the reviewed uniprot names are available."""
|
|
169
177
|
if version is None:
|
|
170
|
-
version = get_version("uniprot")
|
|
178
|
+
version = get_version("uniprot", strict=True)
|
|
171
179
|
return RAW_MODULE.ensure(
|
|
172
180
|
PREFIX,
|
|
173
181
|
version,
|
|
@@ -1,38 +1,13 @@
|
|
|
1
|
-
"""Converter for UniProt PTMs.
|
|
2
|
-
|
|
3
|
-
Line code Content Occurrence in an entry
|
|
4
|
-
--------- --------------------------- ------------------------------
|
|
5
|
-
ID Identifier (FT description) Once; starts a PTM entry
|
|
6
|
-
AC Accession (PTM-xxxx) Once
|
|
7
|
-
FT Feature key Once
|
|
8
|
-
TG Target Once; two targets separated by
|
|
9
|
-
a dash in case of intrachain
|
|
10
|
-
crosslinks
|
|
11
|
-
PA Position of the modification Optional; once
|
|
12
|
-
on the amino acid
|
|
13
|
-
PP Position of the modification Optional; once
|
|
14
|
-
in the polypeptide
|
|
15
|
-
CF Correction formula Optional; once
|
|
16
|
-
MM Monoisotopic mass difference Optional; once
|
|
17
|
-
MA Average mass difference Optional; once
|
|
18
|
-
LC Cellular location Optional; once; alternatives
|
|
19
|
-
can be proposed
|
|
20
|
-
TR Taxonomic range Optional; once or more
|
|
21
|
-
KW Keyword Optional; once or more
|
|
22
|
-
DR Cross-reference to external Optional; once or more
|
|
23
|
-
databases
|
|
24
|
-
// Terminator Once; ends an entr
|
|
25
|
-
|
|
26
|
-
"""
|
|
1
|
+
"""Converter for UniProt PTMs."""
|
|
27
2
|
|
|
28
3
|
import itertools as itt
|
|
29
4
|
from collections import defaultdict
|
|
30
5
|
from collections.abc import Iterable, Mapping
|
|
31
|
-
from typing import Optional
|
|
32
6
|
|
|
33
7
|
from tqdm.auto import tqdm
|
|
34
8
|
|
|
35
9
|
from pyobo import Obo, Reference, Term
|
|
10
|
+
from pyobo.struct import _parse_str_or_curie_or_uri
|
|
36
11
|
from pyobo.utils.path import ensure_path
|
|
37
12
|
|
|
38
13
|
__all__ = [
|
|
@@ -54,11 +29,6 @@ class UniProtPtmGetter(Obo):
|
|
|
54
29
|
yield from iter_terms(force=force)
|
|
55
30
|
|
|
56
31
|
|
|
57
|
-
def get_obo(force: bool = False) -> Obo:
|
|
58
|
-
"""Get UniProt PTMs as OBO."""
|
|
59
|
-
return UniProtPtmGetter(force=force)
|
|
60
|
-
|
|
61
|
-
|
|
62
32
|
def iter_terms(force: bool = False) -> Iterable[Term]:
|
|
63
33
|
"""Iterate over UniProt PTM Terms."""
|
|
64
34
|
path = ensure_path(PREFIX, url=URL, force=force)
|
|
@@ -71,7 +41,7 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
|
|
|
71
41
|
yield term
|
|
72
42
|
|
|
73
43
|
|
|
74
|
-
def _parse(i, lines: Iterable[tuple[str, str]]) ->
|
|
44
|
+
def _parse(i, lines: Iterable[tuple[str, str]]) -> Term | None:
|
|
75
45
|
dd_: defaultdict[str, list[str]] = defaultdict(list)
|
|
76
46
|
for key, value in lines:
|
|
77
47
|
dd_[key].append(value)
|
|
@@ -97,7 +67,7 @@ def _parse(i, lines: Iterable[tuple[str, str]]) -> Optional[Term]:
|
|
|
97
67
|
if line.startswith(y):
|
|
98
68
|
line = x + line[len(y) :]
|
|
99
69
|
|
|
100
|
-
ref =
|
|
70
|
+
ref = _parse_str_or_curie_or_uri(line.replace("; ", ":"))
|
|
101
71
|
if ref:
|
|
102
72
|
term.append_xref(ref)
|
|
103
73
|
else:
|
pyobo/sources/utils.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from collections.abc import Mapping
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
|
|
6
7
|
from ..utils.io import multisetdict
|
|
7
8
|
|
|
@@ -13,9 +14,9 @@ __all__ = [
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
def get_go_mapping(path:
|
|
17
|
+
def get_go_mapping(path: Path, prefix: str) -> Mapping[str, set[tuple[str, str]]]:
|
|
17
18
|
"""Get a GO mapping file."""
|
|
18
|
-
with open(
|
|
19
|
+
with path.open() as file:
|
|
19
20
|
return multisetdict(
|
|
20
21
|
process_go_mapping_line(line.strip(), prefix=prefix) for line in file if line[0] != "!"
|
|
21
22
|
)
|
pyobo/sources/wikipathways.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
"""Converter for WikiPathways."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
import urllib.error
|
|
5
4
|
from collections.abc import Iterable
|
|
6
5
|
|
|
6
|
+
from pystow.utils import DownloadError
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
|
|
7
9
|
from .gmt_utils import parse_wikipathways_gmt
|
|
8
10
|
from ..constants import SPECIES_REMAPPING
|
|
9
11
|
from ..struct import Obo, Reference, Term, from_species
|
|
@@ -51,21 +53,16 @@ class WikiPathwaysGetter(Obo):
|
|
|
51
53
|
return iter_terms(version=self._version_or_raise)
|
|
52
54
|
|
|
53
55
|
|
|
54
|
-
def get_obo() -> Obo:
|
|
55
|
-
"""Get WikiPathways as OBO."""
|
|
56
|
-
return WikiPathwaysGetter()
|
|
57
|
-
|
|
58
|
-
|
|
59
56
|
def iter_terms(version: str) -> Iterable[Term]:
|
|
60
57
|
"""Get WikiPathways terms."""
|
|
61
58
|
base_url = f"http://data.wikipathways.org/{version}/gmt/wikipathways-{version}-gmt"
|
|
62
59
|
|
|
63
|
-
for species_code, taxonomy_id in _PATHWAY_INFO:
|
|
60
|
+
for species_code, taxonomy_id in tqdm(_PATHWAY_INFO, desc=f"[{PREFIX}]", unit="species"):
|
|
64
61
|
url = f"{base_url}-{species_code}.gmt"
|
|
65
62
|
try:
|
|
66
63
|
path = ensure_path(PREFIX, url=url, version=version)
|
|
67
|
-
except
|
|
68
|
-
|
|
64
|
+
except DownloadError as e:
|
|
65
|
+
tqdm.write(f"[{PREFIX}] {e}")
|
|
69
66
|
continue
|
|
70
67
|
species_code = species_code.replace("_", " ")
|
|
71
68
|
taxonomy_name = SPECIES_REMAPPING.get(species_code, species_code)
|
|
@@ -74,7 +71,7 @@ def iter_terms(version: str) -> Iterable[Term]:
|
|
|
74
71
|
term = Term(reference=Reference(prefix=PREFIX, identifier=identifier, name=name))
|
|
75
72
|
term.set_species(taxonomy_id, taxonomy_name)
|
|
76
73
|
for ncbigene_id in genes:
|
|
77
|
-
term.
|
|
74
|
+
term.annotate_object(
|
|
78
75
|
has_participant,
|
|
79
76
|
Reference(prefix="ncbigene", identifier=ncbigene_id),
|
|
80
77
|
)
|
pyobo/sources/zfin.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
from collections import defaultdict
|
|
5
5
|
from collections.abc import Iterable
|
|
6
|
-
from typing import Optional
|
|
7
6
|
|
|
8
7
|
from tqdm.auto import tqdm
|
|
9
8
|
|
|
@@ -12,6 +11,7 @@ from pyobo.struct import (
|
|
|
12
11
|
Obo,
|
|
13
12
|
Reference,
|
|
14
13
|
Term,
|
|
14
|
+
_parse_str_or_curie_or_uri,
|
|
15
15
|
from_species,
|
|
16
16
|
has_gene_product,
|
|
17
17
|
orthologous,
|
|
@@ -48,11 +48,6 @@ class ZFINGetter(Obo):
|
|
|
48
48
|
return get_terms(force=force, version=self._version_or_raise)
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
def get_obo(force: bool = False) -> Obo:
|
|
52
|
-
"""Get ZFIN OBO."""
|
|
53
|
-
return ZFINGetter(force=force)
|
|
54
|
-
|
|
55
|
-
|
|
56
51
|
MARKERS_COLUMNS = [
|
|
57
52
|
"zfin_id",
|
|
58
53
|
"name",
|
|
@@ -62,7 +57,7 @@ MARKERS_COLUMNS = [
|
|
|
62
57
|
]
|
|
63
58
|
|
|
64
59
|
|
|
65
|
-
def get_terms(force: bool = False, version:
|
|
60
|
+
def get_terms(force: bool = False, version: str | None = None) -> Iterable[Term]:
|
|
66
61
|
"""Get terms."""
|
|
67
62
|
alt_ids_df = ensure_df(
|
|
68
63
|
PREFIX,
|
|
@@ -73,7 +68,7 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
|
|
|
73
68
|
names=["alt", "zfin_id"],
|
|
74
69
|
version=version,
|
|
75
70
|
)
|
|
76
|
-
primary_to_alt_ids = defaultdict(set)
|
|
71
|
+
primary_to_alt_ids: defaultdict[str, set[str]] = defaultdict(set)
|
|
77
72
|
for alt_id, zfin_id in alt_ids_df.values:
|
|
78
73
|
primary_to_alt_ids[zfin_id].add(alt_id)
|
|
79
74
|
|
|
@@ -135,7 +130,7 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
|
|
|
135
130
|
# Entity type is redundant of identifier
|
|
136
131
|
# term.append_property("type", entity_type)
|
|
137
132
|
for alt_id in primary_to_alt_ids[identifier]:
|
|
138
|
-
term.append_alt(alt_id)
|
|
133
|
+
term.append_alt(Reference(prefix=PREFIX, identifier=alt_id))
|
|
139
134
|
entrez_id = entrez_mappings.get(identifier)
|
|
140
135
|
if entrez_id:
|
|
141
136
|
try:
|
|
@@ -151,7 +146,7 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
|
|
|
151
146
|
for hgnc_id in human_orthologs.get(identifier, []):
|
|
152
147
|
term.append_relationship(orthologous, Reference(prefix="hgnc", identifier=hgnc_id))
|
|
153
148
|
for mgi_curie in mouse_orthologs.get(identifier, []):
|
|
154
|
-
mouse_ortholog =
|
|
149
|
+
mouse_ortholog = _parse_str_or_curie_or_uri(mgi_curie)
|
|
155
150
|
if mouse_ortholog:
|
|
156
151
|
term.append_relationship(orthologous, mouse_ortholog)
|
|
157
152
|
for flybase_id in fly_orthologs.get(identifier, []):
|
pyobo/ssg/__init__.py
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
"""Static site generator."""
|
|
2
2
|
|
|
3
|
-
import itertools as itt
|
|
4
3
|
from collections import defaultdict
|
|
5
4
|
from collections.abc import Sequence
|
|
6
5
|
from operator import attrgetter
|
|
7
6
|
from pathlib import Path
|
|
8
|
-
from typing import Optional, Union
|
|
9
7
|
|
|
10
8
|
import bioregistry
|
|
11
9
|
from bioregistry.constants import BIOREGISTRY_DEFAULT_BASE_URL
|
|
@@ -31,21 +29,22 @@ index_template = environment.get_template("index.html")
|
|
|
31
29
|
|
|
32
30
|
def make_site(
|
|
33
31
|
obo: Obo,
|
|
34
|
-
directory:
|
|
32
|
+
directory: str | Path,
|
|
35
33
|
use_subdirectories: bool = True,
|
|
36
34
|
manifest: bool = False,
|
|
37
|
-
resource:
|
|
38
|
-
metaregistry_metaprefix:
|
|
39
|
-
metaregistry_name:
|
|
40
|
-
metaregistry_base_url:
|
|
41
|
-
show_properties_in_manifest:
|
|
35
|
+
resource: bioregistry.Resource | None = None,
|
|
36
|
+
metaregistry_metaprefix: str | None = None,
|
|
37
|
+
metaregistry_name: str | None = None,
|
|
38
|
+
metaregistry_base_url: str | None = None,
|
|
39
|
+
show_properties_in_manifest: Sequence[tuple[str, str]] | None = None,
|
|
42
40
|
) -> None:
|
|
43
41
|
"""Make a website in the given directory.
|
|
44
42
|
|
|
45
43
|
:param obo: The ontology to generate a site for
|
|
46
44
|
:param directory: The directory in which to generate the site
|
|
47
|
-
:param use_subdirectories: If true, creates directories for each
|
|
48
|
-
inside. If false, creates HTML files
|
|
45
|
+
:param use_subdirectories: If true, creates directories for each
|
|
46
|
+
term/property/typedef with an index.html inside. If false, creates HTML files
|
|
47
|
+
named with the identifiers.
|
|
49
48
|
:param manifest: If true, lists all entries on the homepage.
|
|
50
49
|
:param resource: A custom resource
|
|
51
50
|
"""
|
|
@@ -65,15 +64,12 @@ def make_site(
|
|
|
65
64
|
if resource is None:
|
|
66
65
|
raise KeyError
|
|
67
66
|
|
|
67
|
+
terms = [term for term in obo if term.prefix == obo.ontology]
|
|
68
|
+
|
|
68
69
|
if not manifest:
|
|
69
70
|
_manifest = None
|
|
70
71
|
else:
|
|
71
|
-
_manifest = sorted(
|
|
72
|
-
(term for term in itt.chain(obo, obo.typedefs or []) if term.prefix == obo.ontology),
|
|
73
|
-
key=attrgetter("identifier"),
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
terms = [term for term in obo if term.prefix == obo.ontology]
|
|
72
|
+
_manifest = sorted(terms, key=attrgetter("identifier"))
|
|
77
73
|
|
|
78
74
|
directory.joinpath("index.html").write_text(
|
|
79
75
|
index_template.render(
|
pyobo/ssg/base.html
CHANGED
|
File without changes
|
pyobo/ssg/index.html
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
Metadata
|
|
9
9
|
</h5>
|
|
10
10
|
<div class="card-body">
|
|
11
|
-
<p>{{ resource.get_description() }}</p>
|
|
11
|
+
<p>{{ resource.get_description(use_markdown=True) }}</p>
|
|
12
12
|
<dl>
|
|
13
13
|
{% if obo.data_version %}
|
|
14
14
|
<dt>Data Version</dt>
|
|
@@ -64,6 +64,30 @@
|
|
|
64
64
|
</div>
|
|
65
65
|
{% endif %}
|
|
66
66
|
|
|
67
|
+
{% if obo.typedefs %}
|
|
68
|
+
<div class="card" style="margin-top: 1em;">
|
|
69
|
+
<h5 class="card-header">
|
|
70
|
+
Type Definitions
|
|
71
|
+
</h5>
|
|
72
|
+
<table class="table table-striped">
|
|
73
|
+
<thead>
|
|
74
|
+
<tr>
|
|
75
|
+
<th>CURIE</th>
|
|
76
|
+
<th>Name</th>
|
|
77
|
+
</tr>
|
|
78
|
+
</thead>
|
|
79
|
+
<tbody>
|
|
80
|
+
{%- for typedef in obo.typedefs %}
|
|
81
|
+
<tr>
|
|
82
|
+
<td>{{ typedef.curie }}</td>
|
|
83
|
+
<td>{% if typedef.name %}{{ typedef.name }}{% endif %}</td>
|
|
84
|
+
</tr>
|
|
85
|
+
{%- endfor %}
|
|
86
|
+
</tbody>
|
|
87
|
+
</table>
|
|
88
|
+
</div>
|
|
89
|
+
{% endif %}
|
|
90
|
+
|
|
67
91
|
{% if manifest %}
|
|
68
92
|
<div class="card" style="margin-top: 1em;">
|
|
69
93
|
<h5 class="card-header">
|
|
@@ -85,7 +109,7 @@
|
|
|
85
109
|
{% if term.is_metadata_tag %}
|
|
86
110
|
Property
|
|
87
111
|
{% else %}
|
|
88
|
-
{{ term.
|
|
112
|
+
{{ term.type }}
|
|
89
113
|
{% endif %}
|
|
90
114
|
</td>
|
|
91
115
|
<td align="right"><a href="{{ term.identifier }}">{{ term.identifier }}</a></td>
|
|
@@ -96,16 +120,5 @@
|
|
|
96
120
|
</tbody>
|
|
97
121
|
</table>
|
|
98
122
|
</div>
|
|
99
|
-
{% elif obo.typedefs %}
|
|
100
|
-
<div class="card" style="margin-top: 1em;">
|
|
101
|
-
<h5 class="card-header">
|
|
102
|
-
Relation Definitions
|
|
103
|
-
</h5>
|
|
104
|
-
<ul class="list-group list-group-flush">
|
|
105
|
-
{% for typedef in obo.typedefs %}
|
|
106
|
-
<li class="list-group-item">{{ typedef }}</li>
|
|
107
|
-
{% endfor %}
|
|
108
|
-
</ul>
|
|
109
|
-
</div>
|
|
110
123
|
{% endif %}
|
|
111
124
|
{%- endblock content %}
|
pyobo/ssg/term.html
CHANGED
|
@@ -2,6 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
{% block title %}{{ term.name or term.identifier }}{% endblock title %}
|
|
4
4
|
|
|
5
|
+
{% macro display_value(v) %}
|
|
6
|
+
{% if v.prefix %}
|
|
7
|
+
<a href="{{ v.bioregistry_link }}">{{ v.curie }}</a>
|
|
8
|
+
{% elif v.datatype.identifier == "anyURI" %}
|
|
9
|
+
<a href="{{ v.value }}">{{ v.value }}</a>
|
|
10
|
+
{% else %}
|
|
11
|
+
{{ v.value }}
|
|
12
|
+
{% endif %}
|
|
13
|
+
{% endmacro %}
|
|
14
|
+
|
|
5
15
|
{% block content -%}
|
|
6
16
|
<div class="card">
|
|
7
17
|
<h5 class="card-header">
|
|
@@ -69,11 +79,11 @@
|
|
|
69
79
|
</dt>
|
|
70
80
|
<dd>
|
|
71
81
|
{% if values|length == 1 %}
|
|
72
|
-
{{ values[0] }}
|
|
82
|
+
{{ display_value(values[0]) }}
|
|
73
83
|
{% else %}
|
|
74
84
|
<ul>
|
|
75
85
|
{% for value in values %}
|
|
76
|
-
<li>{{ value }}</li>
|
|
86
|
+
<li>{{ display_value(value) }}</li>
|
|
77
87
|
{% endfor %}
|
|
78
88
|
</ul>
|
|
79
89
|
{% endif %}
|
pyobo/ssg/typedef.html
CHANGED
|
File without changes
|
pyobo/struct/__init__.py
CHANGED
|
@@ -1,23 +1,31 @@
|
|
|
1
1
|
"""Data structures for OBO."""
|
|
2
2
|
|
|
3
|
-
from .reference import
|
|
4
|
-
|
|
3
|
+
from .reference import (
|
|
4
|
+
OBOLiteral,
|
|
5
|
+
Reference,
|
|
6
|
+
Referenced,
|
|
7
|
+
_parse_str_or_curie_or_uri,
|
|
8
|
+
default_reference,
|
|
9
|
+
)
|
|
10
|
+
from .struct import (
|
|
11
|
+
CHARLIE_TERM,
|
|
12
|
+
HUMAN_TERM,
|
|
13
|
+
PYOBO_INJECTED,
|
|
5
14
|
Obo,
|
|
6
15
|
Synonym,
|
|
7
|
-
SynonymSpecificities,
|
|
8
|
-
SynonymSpecificity,
|
|
9
16
|
SynonymTypeDef,
|
|
10
17
|
Term,
|
|
18
|
+
TypeDef,
|
|
11
19
|
make_ad_hoc_ontology,
|
|
12
20
|
)
|
|
13
|
-
from .
|
|
14
|
-
|
|
15
|
-
TypeDef,
|
|
21
|
+
from .struct_utils import Annotation, Stanza
|
|
22
|
+
from .typedef import (
|
|
16
23
|
derives_from,
|
|
17
24
|
enables,
|
|
18
25
|
from_species,
|
|
19
26
|
gene_product_member_of,
|
|
20
|
-
|
|
27
|
+
has_category,
|
|
28
|
+
has_citation,
|
|
21
29
|
has_gene_product,
|
|
22
30
|
has_member,
|
|
23
31
|
has_part,
|
|
@@ -32,3 +40,41 @@ from .typedef import ( # noqa: F401
|
|
|
32
40
|
transcribes_to,
|
|
33
41
|
translates_to,
|
|
34
42
|
)
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
"CHARLIE_TERM",
|
|
46
|
+
"HUMAN_TERM",
|
|
47
|
+
"PYOBO_INJECTED",
|
|
48
|
+
"Annotation",
|
|
49
|
+
"OBOLiteral",
|
|
50
|
+
"Obo",
|
|
51
|
+
"Reference",
|
|
52
|
+
"Referenced",
|
|
53
|
+
"Stanza",
|
|
54
|
+
"Synonym",
|
|
55
|
+
"SynonymTypeDef",
|
|
56
|
+
"Term",
|
|
57
|
+
"TypeDef",
|
|
58
|
+
"_parse_str_or_curie_or_uri",
|
|
59
|
+
"default_reference",
|
|
60
|
+
"derives_from",
|
|
61
|
+
"enables",
|
|
62
|
+
"from_species",
|
|
63
|
+
"gene_product_member_of",
|
|
64
|
+
"has_category",
|
|
65
|
+
"has_citation",
|
|
66
|
+
"has_gene_product",
|
|
67
|
+
"has_member",
|
|
68
|
+
"has_part",
|
|
69
|
+
"has_participant",
|
|
70
|
+
"is_a",
|
|
71
|
+
"make_ad_hoc_ontology",
|
|
72
|
+
"member_of",
|
|
73
|
+
"orthologous",
|
|
74
|
+
"part_of",
|
|
75
|
+
"participates_in",
|
|
76
|
+
"species_specific",
|
|
77
|
+
"superclass_of",
|
|
78
|
+
"transcribes_to",
|
|
79
|
+
"translates_to",
|
|
80
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Functional OWL interface."""
|