pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -117
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +107 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +210 -160
- pyobo/cli/database_utils.py +155 -0
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +209 -191
- pyobo/gilda_utils.py +52 -250
- pyobo/identifier_utils/__init__.py +33 -0
- pyobo/identifier_utils/api.py +305 -0
- pyobo/identifier_utils/preprocessing.json +873 -0
- pyobo/identifier_utils/preprocessing.py +27 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +48 -40
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1354 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +9 -6
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +8 -13
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +11 -4
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +272 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1484 -657
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +16 -15
- pyobo/utils/io.py +51 -41
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +73 -70
- pyobo/version.py +3 -3
- pyobo-0.12.1.dist-info/METADATA +671 -0
- pyobo-0.12.1.dist-info/RECORD +201 -0
- pyobo-0.12.1.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo/xrefdb/xrefs_pipeline.py +0 -180
- pyobo-0.11.2.dist-info/METADATA +0 -711
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Convert NCBI Genetic Codes to an ontology.
|
|
2
|
+
|
|
3
|
+
.. seealso::
|
|
4
|
+
|
|
5
|
+
https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/index.cgi?chapter=cgencodes
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from collections.abc import Iterable
|
|
9
|
+
|
|
10
|
+
from pyobo import default_reference
|
|
11
|
+
from pyobo.struct import CHARLIE_TERM, HUMAN_TERM, PYOBO_INJECTED, Obo, Reference, Term, TypeDef
|
|
12
|
+
from pyobo.struct.typedef import comment, has_contributor, see_also, term_replaced_by
|
|
13
|
+
from pyobo.utils.path import ensure_path
|
|
14
|
+
|
|
15
|
+
PREFIX = "ncbi.gc"
|
|
16
|
+
URI_PREFIX = (
|
|
17
|
+
"https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/index.cgi?chapter=cgencodes#SG"
|
|
18
|
+
)
|
|
19
|
+
URL = "ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt"
|
|
20
|
+
VERSION = "4.6"
|
|
21
|
+
|
|
22
|
+
GC_ROOT = default_reference(prefix=PREFIX, identifier="root", name="genetic code translation table")
|
|
23
|
+
NCBITAXON_ROOT = Reference(prefix="NCBITaxon", identifier="1", name="root")
|
|
24
|
+
|
|
25
|
+
has_gc_code = TypeDef(
|
|
26
|
+
reference=default_reference(
|
|
27
|
+
prefix=PREFIX,
|
|
28
|
+
identifier="hasGeneticCodeTranslationTable",
|
|
29
|
+
name="has genetic code translation table",
|
|
30
|
+
),
|
|
31
|
+
definition="Connects a taxonomy term to a genetic code translation table",
|
|
32
|
+
domain=NCBITAXON_ROOT,
|
|
33
|
+
range=GC_ROOT,
|
|
34
|
+
).append_contributor(CHARLIE_TERM)
|
|
35
|
+
|
|
36
|
+
NUCLEAR_GENETIC_CODE = default_reference(
|
|
37
|
+
prefix=PREFIX, identifier="nuclear-genetic-code", name="nuclear genetic code translation table"
|
|
38
|
+
)
|
|
39
|
+
MITOCHONDRIAL_GENETIC_CODE = default_reference(
|
|
40
|
+
prefix=PREFIX,
|
|
41
|
+
identifier="mitochondrial-genetic-code",
|
|
42
|
+
name="mitochondrial genetic code translation table",
|
|
43
|
+
)
|
|
44
|
+
PLASTID_GENETIC_CODE = default_reference(
|
|
45
|
+
prefix=PREFIX, identifier="plastid-genetic-code", name="plastid genetic code translation table"
|
|
46
|
+
)
|
|
47
|
+
NUCLEUS = Reference(prefix="GO", identifier="0005634", name="nucleus")
|
|
48
|
+
MITOCHONDIA = Reference(prefix="GO", identifier="0005739", name="mitochondrion")
|
|
49
|
+
PLASTID = Reference(prefix="GO", identifier="0009536", name="plastid")
|
|
50
|
+
|
|
51
|
+
CATEGORY_TO_CELLULAR_COMPONENT = {
|
|
52
|
+
NUCLEAR_GENETIC_CODE: NUCLEUS,
|
|
53
|
+
MITOCHONDRIAL_GENETIC_CODE: MITOCHONDIA,
|
|
54
|
+
PLASTID_GENETIC_CODE: PLASTID,
|
|
55
|
+
}
|
|
56
|
+
CATEGORY_TO_TABLES = {
|
|
57
|
+
NUCLEAR_GENETIC_CODE: [12, 31, 6, 28, 10, 27, 29, 26, 30, 15],
|
|
58
|
+
MITOCHONDRIAL_GENETIC_CODE: [14, 13, 16, 9, 5, 4, 22, 23, 21, 2, 3, 24],
|
|
59
|
+
PLASTID_GENETIC_CODE: [11, 32],
|
|
60
|
+
}
|
|
61
|
+
TABLE_TO_CATEGORY = {
|
|
62
|
+
str(value): key for key, values in CATEGORY_TO_TABLES.items() for value in values
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class NCBIGCGetter(Obo):
|
|
67
|
+
"""Get terms in GC."""
|
|
68
|
+
|
|
69
|
+
ontology = PREFIX
|
|
70
|
+
static_version = VERSION
|
|
71
|
+
root_terms = [GC_ROOT]
|
|
72
|
+
typedefs = [has_gc_code, has_contributor, see_also, comment, term_replaced_by]
|
|
73
|
+
|
|
74
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
75
|
+
"""Iterate over terms in the ontology."""
|
|
76
|
+
return get_terms()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_terms() -> Iterable[Term]:
|
|
80
|
+
"""Get terms for GC."""
|
|
81
|
+
yield CHARLIE_TERM
|
|
82
|
+
yield Term(reference=NCBITAXON_ROOT)
|
|
83
|
+
yield HUMAN_TERM
|
|
84
|
+
|
|
85
|
+
path = ensure_path(PREFIX, url=URL)
|
|
86
|
+
# first, remove comment lines
|
|
87
|
+
lines = [
|
|
88
|
+
line.strip()
|
|
89
|
+
for line in path.read_text().splitlines()
|
|
90
|
+
if not line.startswith("--") and line.strip()
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
lines = lines[1:-2]
|
|
94
|
+
entries: list[dict[str, str]] = []
|
|
95
|
+
entry: dict[str, str] = {}
|
|
96
|
+
for line in lines:
|
|
97
|
+
# start a new entry
|
|
98
|
+
if line == "{":
|
|
99
|
+
if entry:
|
|
100
|
+
entries.append(entry)
|
|
101
|
+
entry = {}
|
|
102
|
+
elif line == "},":
|
|
103
|
+
pass
|
|
104
|
+
else:
|
|
105
|
+
key, data = line.split(" ", 1)
|
|
106
|
+
if key == "name":
|
|
107
|
+
data = data.lstrip('"')
|
|
108
|
+
if data.startswith("SGC"):
|
|
109
|
+
key = "symbol"
|
|
110
|
+
entry[key] = data.rstrip(",").rstrip().rstrip('"')
|
|
111
|
+
elif key == "id":
|
|
112
|
+
entry["identifier"] = data.rstrip(",").rstrip()
|
|
113
|
+
|
|
114
|
+
yield (
|
|
115
|
+
Term(
|
|
116
|
+
reference=GC_ROOT,
|
|
117
|
+
definition="A table for translating codons into amino acids. This can change for "
|
|
118
|
+
"different taxa, or be different in different organelles that include genetic information.",
|
|
119
|
+
)
|
|
120
|
+
.append_contributor(CHARLIE_TERM)
|
|
121
|
+
.append_comment(PYOBO_INJECTED)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
for reference in CATEGORY_TO_TABLES:
|
|
125
|
+
term = Term(reference=reference)
|
|
126
|
+
term.append_parent(GC_ROOT)
|
|
127
|
+
term.append_contributor(CHARLIE_TERM)
|
|
128
|
+
term.append_comment(PYOBO_INJECTED)
|
|
129
|
+
if substructure := CATEGORY_TO_CELLULAR_COMPONENT.get(reference):
|
|
130
|
+
term.append_see_also(substructure)
|
|
131
|
+
yield term
|
|
132
|
+
|
|
133
|
+
for entry in entries:
|
|
134
|
+
identifier = entry["identifier"]
|
|
135
|
+
term = Term.from_triple(PREFIX, identifier, entry["name"])
|
|
136
|
+
term.append_parent(TABLE_TO_CATEGORY.get(identifier, GC_ROOT))
|
|
137
|
+
# TODO if symbol is available, what does it mean?
|
|
138
|
+
yield term
|
|
139
|
+
|
|
140
|
+
yield (
|
|
141
|
+
Term(
|
|
142
|
+
reference=Reference(prefix=PREFIX, identifier="7"),
|
|
143
|
+
is_obsolete=True,
|
|
144
|
+
)
|
|
145
|
+
.append_replaced_by(Reference(prefix=PREFIX, identifier="4"))
|
|
146
|
+
.append_comment("Kinetoplast code now merged in code id 4, as of 1995.")
|
|
147
|
+
)
|
|
148
|
+
yield (
|
|
149
|
+
Term(
|
|
150
|
+
reference=Reference(prefix=PREFIX, identifier="8"),
|
|
151
|
+
is_obsolete=True,
|
|
152
|
+
)
|
|
153
|
+
.append_replaced_by(Reference(prefix=PREFIX, identifier="1"))
|
|
154
|
+
.append_comment("all plant chloroplast differences due to RNA edit, as of 1995.")
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
for cellular_component in CATEGORY_TO_CELLULAR_COMPONENT.values():
|
|
158
|
+
yield Term(reference=cellular_component)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
if __name__ == "__main__":
|
|
162
|
+
NCBIGCGetter.cli()
|
|
@@ -7,8 +7,8 @@ import bioregistry
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
from tqdm.auto import tqdm
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
10
|
+
from ...struct import Obo, Reference, Term, from_species
|
|
11
|
+
from ...utils.path import ensure_df
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
14
14
|
"NCBIGeneGetter",
|
|
@@ -34,7 +34,7 @@ CONSORTIUM_SPECIES_MAPPING = {
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
GENE_INFO_URL = "ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz"
|
|
37
|
-
#: Columns
|
|
37
|
+
#: Columns for gene_info.gz that are used
|
|
38
38
|
GENE_INFO_COLUMNS = [
|
|
39
39
|
"#tax_id",
|
|
40
40
|
"GeneID",
|
|
@@ -93,11 +93,6 @@ class NCBIGeneGetter(Obo):
|
|
|
93
93
|
return get_terms(force=force)
|
|
94
94
|
|
|
95
95
|
|
|
96
|
-
def get_obo(force: bool = False) -> Obo:
|
|
97
|
-
"""Get Entrez as OBO."""
|
|
98
|
-
return NCBIGeneGetter(force=force)
|
|
99
|
-
|
|
100
|
-
|
|
101
96
|
def get_gene_info_df(force: bool = False) -> pd.DataFrame:
|
|
102
97
|
"""Get the gene info dataframe."""
|
|
103
98
|
return ensure_df(
|
|
@@ -111,17 +106,16 @@ def get_gene_info_df(force: bool = False) -> pd.DataFrame:
|
|
|
111
106
|
)
|
|
112
107
|
|
|
113
108
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
for xrefs in df[df[
|
|
118
|
-
|
|
119
|
-
|
|
109
|
+
def _get_xref_mapping() -> list[str]:
|
|
110
|
+
namespaces: set[str] = set()
|
|
111
|
+
df = get_gene_info_df()
|
|
112
|
+
for xrefs in df[df["dbXrefs"].notna()]["dbXrefs"]:
|
|
113
|
+
for xref in xrefs.split("|"):
|
|
114
|
+
namespaces.add(xref.split(":")[0])
|
|
115
|
+
return sorted(namespaces, key=str.casefold)
|
|
120
116
|
|
|
121
|
-
print('namespaces:')
|
|
122
|
-
print(*sorted(namespaces), sep='\n')
|
|
123
|
-
"""
|
|
124
117
|
|
|
118
|
+
# this was retrieved from :func:`_get_xref_mapping`
|
|
125
119
|
xref_mapping = {
|
|
126
120
|
"APHIDBASE",
|
|
127
121
|
"ASAP",
|
|
@@ -157,7 +151,12 @@ xref_mapping = {x.lower() for x in xref_mapping}
|
|
|
157
151
|
|
|
158
152
|
|
|
159
153
|
def get_terms(force: bool = False) -> Iterable[Term]:
|
|
160
|
-
"""Get Entrez terms.
|
|
154
|
+
"""Get Entrez terms.
|
|
155
|
+
|
|
156
|
+
:param force: should re-download be forced?
|
|
157
|
+
|
|
158
|
+
:yields: terms for each line
|
|
159
|
+
"""
|
|
161
160
|
df = get_gene_info_df(force=force)
|
|
162
161
|
|
|
163
162
|
it = tqdm(
|
|
@@ -192,4 +191,4 @@ def get_terms(force: bool = False) -> Iterable[Term]:
|
|
|
192
191
|
|
|
193
192
|
|
|
194
193
|
if __name__ == "__main__":
|
|
195
|
-
|
|
194
|
+
NCBIGeneGetter.cli()
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""A source for NIH RePORTER projects."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from nih_reporter_downloader import get_projects_df
|
|
7
|
+
|
|
8
|
+
from pyobo import Reference
|
|
9
|
+
from pyobo.struct import CHARLIE_TERM, HUMAN_TERM, PYOBO_INJECTED, Obo, Term, default_reference
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"NIHReporterGetter",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
PREFIX = "nihreporter.project"
|
|
16
|
+
PROJECTS_SUBSET = [
|
|
17
|
+
"APPLICATION_ID",
|
|
18
|
+
"PROJECT_TITLE",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
PROJECT_TERM = (
|
|
22
|
+
Term(reference=default_reference(PREFIX, "project", name="project"))
|
|
23
|
+
.append_contributor(CHARLIE_TERM)
|
|
24
|
+
.append_comment(PYOBO_INJECTED)
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class NIHReporterGetter(Obo):
|
|
29
|
+
"""An ontology representation of NIH RePORTER."""
|
|
30
|
+
|
|
31
|
+
ontology = PREFIX
|
|
32
|
+
dynamic_version = True
|
|
33
|
+
|
|
34
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
35
|
+
"""Iterate over terms in the ontology."""
|
|
36
|
+
yield CHARLIE_TERM
|
|
37
|
+
yield HUMAN_TERM
|
|
38
|
+
yield PROJECT_TERM
|
|
39
|
+
yield from iterate_nih_reporter_projects()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def iterate_nih_reporter_projects() -> Iterable[Term]:
|
|
43
|
+
"""Iterate over NIH RePORTER projects."""
|
|
44
|
+
projects_df = get_projects_df()
|
|
45
|
+
for identifier, name in projects_df[PROJECTS_SUBSET].values:
|
|
46
|
+
term = Term(
|
|
47
|
+
reference=Reference(
|
|
48
|
+
prefix=PREFIX,
|
|
49
|
+
identifier=str(identifier),
|
|
50
|
+
name=name.replace("\n", " ") if pd.notna(name) else None,
|
|
51
|
+
),
|
|
52
|
+
type="Instance",
|
|
53
|
+
)
|
|
54
|
+
term.append_parent(PROJECT_TERM)
|
|
55
|
+
# TODO there is a lot more information that can be added here
|
|
56
|
+
yield term
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
if __name__ == "__main__":
|
|
60
|
+
NIHReporterGetter.cli()
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Converter for NLM Providers."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
|
|
5
|
+
from pyobo.sources.nlm.utils import (
|
|
6
|
+
JOURNAL_TERM,
|
|
7
|
+
PREFIX_CATALOG,
|
|
8
|
+
PUBLISHED_IN,
|
|
9
|
+
PUBLISHER_TERM,
|
|
10
|
+
get_journals,
|
|
11
|
+
get_publishers,
|
|
12
|
+
)
|
|
13
|
+
from pyobo.struct import CHARLIE_TERM, HUMAN_TERM, Obo, Term
|
|
14
|
+
from pyobo.struct.typedef import exact_match, has_end_date, has_start_date
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"NLMCatalogGetter",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class NLMCatalogGetter(Obo):
|
|
22
|
+
"""An ontology representation of NLM Providers."""
|
|
23
|
+
|
|
24
|
+
bioversions_key = ontology = PREFIX_CATALOG
|
|
25
|
+
dynamic_version = True
|
|
26
|
+
typedefs = [PUBLISHED_IN, has_end_date, has_start_date, exact_match]
|
|
27
|
+
root_terms = [JOURNAL_TERM.reference, PUBLISHER_TERM.reference]
|
|
28
|
+
|
|
29
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
30
|
+
"""Iterate over journal terms for NLM Catalog."""
|
|
31
|
+
yield from get_terms(force=force)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_terms(*, force: bool = False) -> Iterable[Term]:
|
|
35
|
+
"""Get NLM catalog terms."""
|
|
36
|
+
yield JOURNAL_TERM
|
|
37
|
+
yield PUBLISHER_TERM
|
|
38
|
+
yield CHARLIE_TERM
|
|
39
|
+
yield HUMAN_TERM
|
|
40
|
+
|
|
41
|
+
journal_id_to_publisher_key = get_publishers(force=force)
|
|
42
|
+
yield from sorted(set(journal_id_to_publisher_key.values()))
|
|
43
|
+
|
|
44
|
+
yield from get_journals(force=force, journal_id_to_publisher_key=journal_id_to_publisher_key)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
if __name__ == "__main__":
|
|
48
|
+
NLMCatalogGetter.cli()
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Converter for NLM Providers."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
|
|
5
|
+
from pyobo.sources.nlm.utils import PREFIX_PUBLISHER, PUBLISHER_TERM, get_publishers
|
|
6
|
+
from pyobo.struct import CHARLIE_TERM, HUMAN_TERM, Obo, Term
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"NLMPublisherGetter",
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class NLMPublisherGetter(Obo):
|
|
14
|
+
"""An ontology representation of NLM Publishers."""
|
|
15
|
+
|
|
16
|
+
bioversions_key = ontology = PREFIX_PUBLISHER
|
|
17
|
+
dynamic_version = True
|
|
18
|
+
root_terms = [PUBLISHER_TERM.reference]
|
|
19
|
+
|
|
20
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
21
|
+
"""Iterate over gene terms for NLM Catalog."""
|
|
22
|
+
yield from get_terms(force=force)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_terms(*, force: bool = False) -> Iterable[Term]:
|
|
26
|
+
"""Get NLM publisher terms."""
|
|
27
|
+
yield PUBLISHER_TERM
|
|
28
|
+
yield CHARLIE_TERM
|
|
29
|
+
yield HUMAN_TERM
|
|
30
|
+
|
|
31
|
+
journal_id_to_publisher_key = get_publishers(force=force)
|
|
32
|
+
yield from sorted(set(journal_id_to_publisher_key.values()))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
if __name__ == "__main__":
|
|
36
|
+
NLMPublisherGetter.cli()
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Utilities for NLM."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from xml.etree import ElementTree
|
|
5
|
+
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
|
|
8
|
+
from pyobo import Reference, Term, TypeDef, default_reference, ensure_path
|
|
9
|
+
from pyobo.struct.struct import CHARLIE_TERM, PYOBO_INJECTED
|
|
10
|
+
from pyobo.struct.typedef import has_end_date, has_start_date
|
|
11
|
+
from pyobo.utils.path import ensure_df
|
|
12
|
+
|
|
13
|
+
PREFIX_CATALOG = "nlm"
|
|
14
|
+
PREFIX_PUBLISHER = "nlm.publisher"
|
|
15
|
+
|
|
16
|
+
CATALOG_TO_PUBLISHER = "https://ftp.ncbi.nlm.nih.gov/pubmed/xmlprovidernames.txt"
|
|
17
|
+
JOURNAL_INFO_PATH = "https://ftp.ncbi.nlm.nih.gov/pubmed/jourcache.xml"
|
|
18
|
+
PUBLISHED_IN = TypeDef(
|
|
19
|
+
reference=default_reference(PREFIX_CATALOG, "published_in", name="published in"),
|
|
20
|
+
xrefs=[
|
|
21
|
+
Reference(prefix="biolink", identifier="published_in"),
|
|
22
|
+
Reference(prefix="uniprot.core", identifier="publishedIn"),
|
|
23
|
+
],
|
|
24
|
+
)
|
|
25
|
+
JOURNAL_TERM = (
|
|
26
|
+
Term(reference=default_reference(PREFIX_CATALOG, "journal", name="journal"))
|
|
27
|
+
.append_exact_match(Reference(prefix="SIO", identifier="000160"))
|
|
28
|
+
.append_exact_match(Reference(prefix="FBCV", identifier="0000787"))
|
|
29
|
+
.append_exact_match(Reference(prefix="MI", identifier="0885"))
|
|
30
|
+
.append_exact_match(Reference(prefix="bibo", identifier="Journal"))
|
|
31
|
+
.append_exact_match(Reference(prefix="uniprot.core", identifier="Journal"))
|
|
32
|
+
.append_contributor(CHARLIE_TERM)
|
|
33
|
+
.append_comment(PYOBO_INJECTED)
|
|
34
|
+
)
|
|
35
|
+
PUBLISHER_TERM = (
|
|
36
|
+
Term(reference=default_reference(PREFIX_CATALOG, "publisher", name="publisher"))
|
|
37
|
+
.append_exact_match(Reference(prefix="biolink", identifier="publisher"))
|
|
38
|
+
.append_exact_match(Reference(prefix="schema", identifier="publisher"))
|
|
39
|
+
.append_exact_match(Reference(prefix="uniprot.core", identifier="publisher"))
|
|
40
|
+
.append_contributor(CHARLIE_TERM)
|
|
41
|
+
.append_comment(PYOBO_INJECTED)
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_publishers(*, force: bool = False) -> dict[str, Term]:
|
|
46
|
+
"""Get NLM publishers."""
|
|
47
|
+
journal_to_publisher_df = ensure_df(
|
|
48
|
+
PREFIX_CATALOG, url=CATALOG_TO_PUBLISHER, sep="|", force=force, dtype=str
|
|
49
|
+
)
|
|
50
|
+
journal_id_to_publisher_key: dict[str, Term] = {
|
|
51
|
+
journal_id: Term(
|
|
52
|
+
reference=Reference(prefix=PREFIX_PUBLISHER, identifier=identifier, name=name),
|
|
53
|
+
type="Instance",
|
|
54
|
+
).append_parent(PUBLISHER_TERM)
|
|
55
|
+
for journal_id, identifier, name in journal_to_publisher_df.values
|
|
56
|
+
}
|
|
57
|
+
return journal_id_to_publisher_key
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_journals(
|
|
61
|
+
*, force: bool = False, journal_id_to_publisher_key: dict[str, Term] | None = None
|
|
62
|
+
) -> Iterable[Term]:
|
|
63
|
+
"""Get NLM Catalog terms."""
|
|
64
|
+
path = ensure_path(PREFIX_CATALOG, url=JOURNAL_INFO_PATH, force=force)
|
|
65
|
+
root = ElementTree.parse(path).getroot()
|
|
66
|
+
|
|
67
|
+
if journal_id_to_publisher_key is None:
|
|
68
|
+
journal_id_to_publisher_key = get_publishers(force=force)
|
|
69
|
+
elements = root.findall("Journal")
|
|
70
|
+
for element in elements:
|
|
71
|
+
if term := _process_journal(element, journal_id_to_publisher_key):
|
|
72
|
+
yield term
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _process_journal(element, journal_id_to_publisher_key: dict[str, Term]) -> Term | None:
|
|
76
|
+
# TODO enrich with context from https://ftp.ncbi.nlm.nih.gov/pubmed/J_Entrez.txt and https://ftp.ncbi.nlm.nih.gov/pubmed/J_Medline.txt
|
|
77
|
+
|
|
78
|
+
nlm_id = element.findtext("NlmUniqueID")
|
|
79
|
+
name = element.findtext("Name")
|
|
80
|
+
|
|
81
|
+
if not nlm_id.isnumeric():
|
|
82
|
+
# TODO investigate these records, which all appear to have IDs that
|
|
83
|
+
# end in R like 17410670R (Proceedings of the staff meetings. Honolulu. Clinic)
|
|
84
|
+
# which corresponds to https://www.ncbi.nlm.nih.gov/nlmcatalog/287649
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
issns = [(issn.text, issn.attrib["type"]) for issn in element.findall("Issn")]
|
|
88
|
+
# ActivityFlag is either "0" or "1"
|
|
89
|
+
term = Term(
|
|
90
|
+
reference=Reference(prefix=PREFIX_CATALOG, identifier=nlm_id, name=name),
|
|
91
|
+
type="Instance",
|
|
92
|
+
)
|
|
93
|
+
term.append_parent(JOURNAL_TERM)
|
|
94
|
+
for synonym in element.findall("Alias"):
|
|
95
|
+
term.append_synonym(synonym.text)
|
|
96
|
+
for issn, _issn_type in issns:
|
|
97
|
+
if issn.isnumeric():
|
|
98
|
+
issn = issn[:4] + "-" + issn[4:]
|
|
99
|
+
|
|
100
|
+
# TODO include ISSN type, this is important
|
|
101
|
+
# to determine a "canonical" one
|
|
102
|
+
term.append_xref(Reference(prefix="issn", identifier=issn))
|
|
103
|
+
if start_year := element.findtext("StartYear"):
|
|
104
|
+
if len(start_year) != 4:
|
|
105
|
+
tqdm.write(f"[{term.curie}] invalid start year: {start_year}")
|
|
106
|
+
else:
|
|
107
|
+
term.annotate_year(has_start_date, start_year)
|
|
108
|
+
if end_year := element.findtext("EndYear"):
|
|
109
|
+
if len(end_year) != 4:
|
|
110
|
+
tqdm.write(f"[{term.curie}] invalid end year: {end_year}")
|
|
111
|
+
else:
|
|
112
|
+
term.annotate_year(has_end_date, end_year)
|
|
113
|
+
# FIXME this whole thing needs reinvestigating
|
|
114
|
+
if publisher_reference := journal_id_to_publisher_key.get(term.identifier):
|
|
115
|
+
term.annotate_object(PUBLISHED_IN, publisher_reference.reference)
|
|
116
|
+
return term
|
pyobo/sources/npass.py
CHANGED
|
@@ -6,7 +6,7 @@ from collections.abc import Iterable
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
from tqdm.auto import tqdm
|
|
8
8
|
|
|
9
|
-
from ..struct import Obo, Reference,
|
|
9
|
+
from ..struct import Obo, Reference, Term
|
|
10
10
|
from ..utils.path import ensure_df
|
|
11
11
|
|
|
12
12
|
__all__ = [
|
|
@@ -32,11 +32,6 @@ class NPASSGetter(Obo):
|
|
|
32
32
|
return iter_terms(force=force, version=self._version_or_raise)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def get_obo(force: bool = False) -> Obo:
|
|
36
|
-
"""Get NPASS as OBO."""
|
|
37
|
-
return NPASSGetter()
|
|
38
|
-
|
|
39
|
-
|
|
40
35
|
def get_df(version: str, force: bool = False) -> pd.DataFrame:
|
|
41
36
|
"""Get the NPASS chemical nomenclature."""
|
|
42
37
|
base_url = f"https://bidd.group/NPASS/downloadFiles/NPASSv{version}_download"
|
|
@@ -71,7 +66,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
71
66
|
# TODO check that the first is always the parent compound?
|
|
72
67
|
if pd.notna(pubchem_compound_ids):
|
|
73
68
|
pubchem_compound_ids = [
|
|
74
|
-
|
|
69
|
+
zz
|
|
70
|
+
for xx in pubchem_compound_ids.split(";")
|
|
71
|
+
for yy in xx.strip().split(",")
|
|
72
|
+
if (zz := yy.strip())
|
|
75
73
|
]
|
|
76
74
|
if len(pubchem_compound_ids) > 1:
|
|
77
75
|
logger.debug("multiple cids for %s: %s", identifier, pubchem_compound_ids)
|
|
@@ -82,7 +80,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
82
80
|
|
|
83
81
|
for synonym in [iupac]:
|
|
84
82
|
if pd.notna(synonym):
|
|
85
|
-
term.append_synonym(
|
|
83
|
+
term.append_synonym(synonym)
|
|
86
84
|
|
|
87
85
|
yield term
|
|
88
86
|
|
pyobo/sources/omim_ps.py
CHANGED
|
@@ -11,10 +11,9 @@ __all__ = [
|
|
|
11
11
|
"OMIMPSGetter",
|
|
12
12
|
]
|
|
13
13
|
|
|
14
|
-
|
|
15
14
|
logger = logging.getLogger(__name__)
|
|
16
15
|
PREFIX = "omim.ps"
|
|
17
|
-
URL = "https://omim.org/phenotypicSeriesTitles/
|
|
16
|
+
URL = "https://omim.org/phenotypicSeriesTitles/"
|
|
18
17
|
|
|
19
18
|
|
|
20
19
|
class OMIMPSGetter(Obo):
|
|
@@ -25,8 +24,16 @@ class OMIMPSGetter(Obo):
|
|
|
25
24
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
26
25
|
"""Iterate over terms in the ontology."""
|
|
27
26
|
soup = get_soup(URL, user_agent="Mozilla/5.0")
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
content = soup.find(id="mimContent")
|
|
28
|
+
if content is None:
|
|
29
|
+
raise ValueError("omim.ps failed - scraper could not find id='mimContent' in HTML")
|
|
30
|
+
table = content.find("table") # type:ignore[attr-defined]
|
|
31
|
+
if table is None:
|
|
32
|
+
raise ValueError("omim.ps failed - scraper could not find table in HTML")
|
|
33
|
+
tbody = table.find("tbody")
|
|
34
|
+
if tbody is None:
|
|
35
|
+
raise ValueError("omim.ps failed - scraper could not find table body in HTML")
|
|
36
|
+
for row in tbody.find_all("tr"):
|
|
30
37
|
anchor = row.find("td").find("a")
|
|
31
38
|
name = anchor.text.strip()
|
|
32
39
|
identifier = anchor.attrs["href"][len("/phenotypicSeries/") :]
|
pyobo/sources/pathbank.py
CHANGED
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import logging
|
|
6
6
|
from collections import defaultdict
|
|
7
7
|
from collections.abc import Iterable, Mapping
|
|
8
|
+
from itertools import chain
|
|
8
9
|
|
|
9
10
|
import pandas as pd
|
|
10
11
|
from tqdm.auto import tqdm
|
|
@@ -77,11 +78,6 @@ class PathBankGetter(Obo):
|
|
|
77
78
|
return iter_terms(force=force, version=self._version_or_raise)
|
|
78
79
|
|
|
79
80
|
|
|
80
|
-
def get_obo(force: bool = False) -> Obo:
|
|
81
|
-
"""Get PathBank as OBO."""
|
|
82
|
-
return PathBankGetter(force=force)
|
|
83
|
-
|
|
84
|
-
|
|
85
81
|
def get_proteins_df(version: str, force: bool = False) -> pd.DataFrame:
|
|
86
82
|
"""Get the proteins dataframe."""
|
|
87
83
|
proteins_df = ensure_df(
|
|
@@ -165,9 +161,9 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
165
161
|
# but there are weird parser errors
|
|
166
162
|
)
|
|
167
163
|
term.append_exact_match(Reference(prefix="smpdb", identifier=smpdb_id))
|
|
168
|
-
term.
|
|
169
|
-
|
|
170
|
-
|
|
164
|
+
term.annotate_string(has_category, subject.lower().replace(" ", "_"))
|
|
165
|
+
for participant in chain(smpdb_id_to_proteins[smpdb_id], smpdb_id_to_metabolites[smpdb_id]):
|
|
166
|
+
term.append_relationship(has_participant, participant)
|
|
171
167
|
yield term
|
|
172
168
|
|
|
173
169
|
|
|
@@ -4,8 +4,8 @@ from collections.abc import Iterable
|
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
|
|
7
|
-
from
|
|
8
|
-
from
|
|
7
|
+
from ...struct import Obo, Reference, Term
|
|
8
|
+
from ...utils.path import ensure_df
|
|
9
9
|
|
|
10
10
|
__all__ = [
|
|
11
11
|
"PfamGetter",
|
|
@@ -47,11 +47,6 @@ class PfamGetter(Obo):
|
|
|
47
47
|
return iter_terms(self._version_or_raise, force=force)
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
def get_obo(force: bool = False) -> Obo:
|
|
51
|
-
"""Get PFAM as OBO."""
|
|
52
|
-
return PfamGetter(force=force)
|
|
53
|
-
|
|
54
|
-
|
|
55
50
|
def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
56
51
|
"""Iterate PFAM terms."""
|
|
57
52
|
df = get_pfam_clan_df(version=version, force=force)
|
|
@@ -67,4 +62,4 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
67
62
|
|
|
68
63
|
|
|
69
64
|
if __name__ == "__main__":
|
|
70
|
-
|
|
65
|
+
PfamGetter.cli()
|
|
@@ -5,7 +5,7 @@ from collections.abc import Iterable
|
|
|
5
5
|
from tqdm.auto import tqdm
|
|
6
6
|
|
|
7
7
|
from .pfam import get_pfam_clan_df
|
|
8
|
-
from
|
|
8
|
+
from ...struct import Obo, Reference, Term
|
|
9
9
|
|
|
10
10
|
__all__ = [
|
|
11
11
|
"PfamClanGetter",
|
|
@@ -25,11 +25,6 @@ class PfamClanGetter(Obo):
|
|
|
25
25
|
return iter_terms(version=self._version_or_raise, force=force)
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def get_obo(force: bool = False) -> Obo:
|
|
29
|
-
"""Get PFAM Clans as OBO."""
|
|
30
|
-
return PfamClanGetter(force=force)
|
|
31
|
-
|
|
32
|
-
|
|
33
28
|
# TODO could get definitions from ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam33.0/Pfam-C.gz
|
|
34
29
|
|
|
35
30
|
|
|
@@ -46,4 +41,4 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
46
41
|
|
|
47
42
|
|
|
48
43
|
if __name__ == "__main__":
|
|
49
|
-
|
|
44
|
+
PfamClanGetter.cli()
|