pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -117
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +107 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +210 -160
- pyobo/cli/database_utils.py +155 -0
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +209 -191
- pyobo/gilda_utils.py +52 -250
- pyobo/identifier_utils/__init__.py +33 -0
- pyobo/identifier_utils/api.py +305 -0
- pyobo/identifier_utils/preprocessing.json +873 -0
- pyobo/identifier_utils/preprocessing.py +27 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +48 -40
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1354 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +9 -6
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +8 -13
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +11 -4
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +272 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1484 -657
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +16 -15
- pyobo/utils/io.py +51 -41
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +73 -70
- pyobo/version.py +3 -3
- pyobo-0.12.1.dist-info/METADATA +671 -0
- pyobo-0.12.1.dist-info/RECORD +201 -0
- pyobo-0.12.1.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo/xrefdb/xrefs_pipeline.py +0 -180
- pyobo-0.11.2.dist-info/METADATA +0 -711
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
pyobo/xrefdb/xrefs_pipeline.py
DELETED
|
@@ -1,180 +0,0 @@
|
|
|
1
|
-
"""Pipeline for extracting all xrefs from OBO documents available."""
|
|
2
|
-
|
|
3
|
-
import gzip
|
|
4
|
-
import itertools as itt
|
|
5
|
-
import logging
|
|
6
|
-
from collections.abc import Iterable
|
|
7
|
-
from typing import Optional, cast
|
|
8
|
-
|
|
9
|
-
import bioregistry
|
|
10
|
-
import networkx as nx
|
|
11
|
-
import pandas as pd
|
|
12
|
-
from tqdm.auto import tqdm
|
|
13
|
-
|
|
14
|
-
from .sources import iter_xref_plugins
|
|
15
|
-
from .. import get_xrefs_df
|
|
16
|
-
from ..api import (
|
|
17
|
-
get_id_definition_mapping,
|
|
18
|
-
get_id_name_mapping,
|
|
19
|
-
get_id_species_mapping,
|
|
20
|
-
get_id_synonyms_mapping,
|
|
21
|
-
get_id_to_alts,
|
|
22
|
-
get_metadata,
|
|
23
|
-
get_properties_df,
|
|
24
|
-
get_relations_df,
|
|
25
|
-
get_typedef_df,
|
|
26
|
-
)
|
|
27
|
-
from ..constants import SOURCE_ID, SOURCE_PREFIX, TARGET_ID, TARGET_PREFIX
|
|
28
|
-
from ..getters import iter_helper, iter_helper_helper
|
|
29
|
-
from ..sources import ncbigene, pubchem
|
|
30
|
-
from ..utils.path import ensure_path
|
|
31
|
-
|
|
32
|
-
logger = logging.getLogger(__name__)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# TODO a normal graph can easily be turned into a directed graph where each
|
|
36
|
-
# edge points from low priority to higher priority, then the graph can
|
|
37
|
-
# be reduced to a set of star graphs and ultimately to a single dictionary
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def get_graph_from_xref_df(df: pd.DataFrame) -> nx.Graph:
|
|
41
|
-
"""Generate a graph from the mappings dataframe."""
|
|
42
|
-
rv = nx.Graph()
|
|
43
|
-
|
|
44
|
-
it = itt.chain(
|
|
45
|
-
df[[SOURCE_PREFIX, SOURCE_ID]].drop_duplicates().values,
|
|
46
|
-
df[[TARGET_PREFIX, TARGET_ID]].drop_duplicates().values,
|
|
47
|
-
)
|
|
48
|
-
it = tqdm(it, desc="loading curies", unit_scale=True)
|
|
49
|
-
for prefix, identifier in it:
|
|
50
|
-
rv.add_node(_to_curie(prefix, identifier), prefix=prefix, identifier=identifier)
|
|
51
|
-
|
|
52
|
-
it = tqdm(df.values, total=len(df.index), desc="loading xrefs", unit_scale=True)
|
|
53
|
-
for source_ns, source_id, target_ns, target_id, provenance in it:
|
|
54
|
-
rv.add_edge(
|
|
55
|
-
_to_curie(source_ns, source_id),
|
|
56
|
-
_to_curie(target_ns, target_id),
|
|
57
|
-
provenance=provenance,
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
return rv
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def _to_curie(prefix: str, identifier: str) -> str:
|
|
64
|
-
return f"{prefix}:{identifier}"
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def _iter_ncbigene(left, right):
|
|
68
|
-
ncbi_path = ensure_path(ncbigene.PREFIX, url=ncbigene.GENE_INFO_URL)
|
|
69
|
-
with gzip.open(ncbi_path, "rt") as file:
|
|
70
|
-
next(file) # throw away the header
|
|
71
|
-
for line in tqdm(
|
|
72
|
-
file, desc=f"extracting {ncbigene.PREFIX}", unit_scale=True, total=27_000_000
|
|
73
|
-
):
|
|
74
|
-
line = line.strip().split("\t")
|
|
75
|
-
yield ncbigene.PREFIX, line[left], line[right]
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def _iter_metadata(**kwargs):
|
|
79
|
-
for prefix, data in iter_helper_helper(get_metadata, **kwargs):
|
|
80
|
-
version = data["version"]
|
|
81
|
-
tqdm.write(f"[{prefix}] using version {version}")
|
|
82
|
-
yield prefix, version, data["date"], bioregistry.is_deprecated(prefix)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def _iter_names(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]:
|
|
86
|
-
"""Iterate over all prefix-identifier-name triples we can get.
|
|
87
|
-
|
|
88
|
-
:param leave: should the tqdm be left behind?
|
|
89
|
-
"""
|
|
90
|
-
yield from iter_helper(get_id_name_mapping, leave=leave, **kwargs)
|
|
91
|
-
yield from _iter_ncbigene(1, 2)
|
|
92
|
-
|
|
93
|
-
pcc_path = pubchem._ensure_cid_name_path()
|
|
94
|
-
with gzip.open(pcc_path, mode="rt", encoding="ISO-8859-1") as file:
|
|
95
|
-
for line in tqdm(
|
|
96
|
-
file, desc=f"extracting {pubchem.PREFIX}", unit_scale=True, total=103_000_000
|
|
97
|
-
):
|
|
98
|
-
identifier, name = line.strip().split("\t", 1)
|
|
99
|
-
yield pubchem.PREFIX, identifier, name
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def _iter_species(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]:
|
|
103
|
-
"""Iterate over all prefix-identifier-species triples we can get."""
|
|
104
|
-
yield from iter_helper(get_id_species_mapping, leave=leave, **kwargs)
|
|
105
|
-
# TODO ncbigene
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def _iter_definitions(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]:
|
|
109
|
-
"""Iterate over all prefix-identifier-descriptions triples we can get."""
|
|
110
|
-
yield from iter_helper(get_id_definition_mapping, leave=leave, **kwargs)
|
|
111
|
-
yield from _iter_ncbigene(1, 8)
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def _iter_alts(
|
|
115
|
-
leave: bool = False, strict: bool = True, **kwargs
|
|
116
|
-
) -> Iterable[tuple[str, str, str]]:
|
|
117
|
-
for prefix, identifier, alts in iter_helper(
|
|
118
|
-
get_id_to_alts, leave=leave, strict=strict, **kwargs
|
|
119
|
-
):
|
|
120
|
-
for alt in alts:
|
|
121
|
-
yield prefix, identifier, alt
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
def _iter_synonyms(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]:
|
|
125
|
-
"""Iterate over all prefix-identifier-synonym triples we can get.
|
|
126
|
-
|
|
127
|
-
:param leave: should the tqdm be left behind?
|
|
128
|
-
"""
|
|
129
|
-
for prefix, identifier, synonyms in iter_helper(get_id_synonyms_mapping, leave=leave, **kwargs):
|
|
130
|
-
for synonym in synonyms:
|
|
131
|
-
yield prefix, identifier, synonym
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def _iter_typedefs(**kwargs) -> Iterable[tuple[str, str, str, str]]:
|
|
135
|
-
"""Iterate over all prefix-identifier-name triples we can get."""
|
|
136
|
-
for prefix, df in iter_helper_helper(get_typedef_df, **kwargs):
|
|
137
|
-
for t in df.values:
|
|
138
|
-
if all(t):
|
|
139
|
-
yield cast(tuple[str, str, str, str], (prefix, *t))
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def _iter_relations(**kwargs) -> Iterable[tuple[str, str, str, str, str, str]]:
|
|
143
|
-
for prefix, df in iter_helper_helper(get_relations_df, **kwargs):
|
|
144
|
-
for t in df.values:
|
|
145
|
-
if all(t):
|
|
146
|
-
yield cast(tuple[str, str, str, str, str, str], (prefix, *t))
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
def _iter_properties(**kwargs) -> Iterable[tuple[str, str, str, str]]:
|
|
150
|
-
for prefix, df in iter_helper_helper(get_properties_df, **kwargs):
|
|
151
|
-
for t in df.values:
|
|
152
|
-
if all(t):
|
|
153
|
-
yield cast(tuple[str, str, str, str], (prefix, *t))
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
def _iter_xrefs(
|
|
157
|
-
*,
|
|
158
|
-
force: bool = False,
|
|
159
|
-
use_tqdm: bool = True,
|
|
160
|
-
skip_below: Optional[str] = None,
|
|
161
|
-
strict: bool = True,
|
|
162
|
-
**kwargs,
|
|
163
|
-
) -> Iterable[tuple[str, str, str, str, str]]:
|
|
164
|
-
it = iter_helper_helper(
|
|
165
|
-
get_xrefs_df,
|
|
166
|
-
use_tqdm=use_tqdm,
|
|
167
|
-
force=force,
|
|
168
|
-
skip_below=skip_below,
|
|
169
|
-
strict=strict,
|
|
170
|
-
**kwargs,
|
|
171
|
-
)
|
|
172
|
-
for prefix, df in it:
|
|
173
|
-
df.dropna(inplace=True)
|
|
174
|
-
for row in df.values:
|
|
175
|
-
if any(not element for element in row):
|
|
176
|
-
continue
|
|
177
|
-
yield cast(tuple[str, str, str, str, str], (prefix, *row, prefix))
|
|
178
|
-
for df in iter_xref_plugins(skip_below=skip_below):
|
|
179
|
-
df.dropna(inplace=True)
|
|
180
|
-
yield from tqdm(df.values, leave=False, total=len(df.index), unit_scale=True)
|