PyPI - pyobo - Versions diffs - 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl - Mend

pyobo 0.11.2py3-none-any.whl → 0.12.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -117
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +107 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +210 -160
pyobo/cli/database_utils.py +155 -0
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +209 -191
pyobo/gilda_utils.py +52 -250
pyobo/identifier_utils/__init__.py +33 -0
pyobo/identifier_utils/api.py +305 -0
pyobo/identifier_utils/preprocessing.json +873 -0
pyobo/identifier_utils/preprocessing.py +27 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +48 -40
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1354 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +9 -6
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +8 -13
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +11 -4
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +272 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1484 -657
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +16 -15
pyobo/utils/io.py +51 -41
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +73 -70
pyobo/version.py +3 -3
pyobo-0.12.1.dist-info/METADATA +671 -0
pyobo-0.12.1.dist-info/RECORD +201 -0
pyobo-0.12.1.dist-info/WHEEL +4 -0
{pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo/xrefdb/xrefs_pipeline.py +0 -180
pyobo-0.11.2.dist-info/METADATA +0 -711
pyobo-0.11.2.dist-info/RECORD +0 -157
pyobo-0.11.2.dist-info/WHEEL +0 -5
pyobo-0.11.2.dist-info/top_level.txt +0 -1

pyobo/cli/database.py CHANGED Viewed

@@ -1,14 +1,40 @@
 """CLI for PyOBO Database Generation."""
 import logging
-from typing import Optional
+import warnings
+from collections.abc import Iterable
+from pathlib import Path
+import bioregistry
 import click
 from more_click import verbose_option
 from tqdm.contrib.logging import logging_redirect_tqdm
+from typing_extensions import Unpack
 from zenodo_client import update_zenodo
-from .utils import directory_option, force_option, no_strict_option, zenodo_option
+from .database_utils import (
+    IterHelperHelperDict,
+    _iter_alts,
+    _iter_definitions,
+    _iter_edges,
+    _iter_mappings,
+    _iter_names,
+    _iter_properties,
+    _iter_relations,
+    _iter_species,
+    _iter_synonyms,
+    _iter_typedefs,
+    _iter_xrefs,
+    iter_helper_helper,
+)
+from .utils import (
+    Clickable,
+    directory_option,
+    force_option,
+    force_process_option,
+    strict_option,
+    zenodo_option,
+)
 from ..constants import (
     ALTS_DATA_RECORD,
     DEFINITIONS_RECORD,
@@ -19,39 +45,60 @@ from ..constants import (
     SPECIES_RECORD,
     SYNONYMS_RECORD,
     TYPEDEFS_RECORD,
+    DatabaseKwargs,
 )
-from ..getters import db_output_helper
-from ..xrefdb.xrefs_pipeline import (
-    _iter_alts,
-    _iter_definitions,
-    _iter_metadata,
-    _iter_names,
-    _iter_properties,
-    _iter_relations,
-    _iter_species,
-    _iter_synonyms,
-    _iter_typedefs,
-    _iter_xrefs,
-)
+from ..getters import db_output_helper, get_ontology
 __all__ = [
     "main",
 ]
+logger = logging.getLogger(__name__)
 @click.group(name="database")
 def main():
     """Build the PyOBO Database."""
-@main.command()
-@verbose_option
-@directory_option
-@zenodo_option
-@force_option
-@no_strict_option
+skip_pyobo_option = click.option(
+    "--skip-pyobo",
+    is_flag=True,
+    help="Skip prefixes whose ontologies are implemented as PyOBO sources",
+)
+skip_below_option = click.option(
+    "--skip-below", help="Skip prefixes lexically sorted below the given one"
+)
+def database_annotate(f: Clickable) -> Clickable:
+    """Add appropriate decorators to database CLI functions."""
+    decorators = [
+        main.command(),
+        zenodo_option,
+        verbose_option,
+        directory_option,
+        force_option,
+        force_process_option,
+        strict_option,
+        skip_pyobo_option,
+        skip_below_option,
+    ]
+    for decorator in decorators:
+        f = decorator(f)
+    return f
+def _update_database_kwargs(kwargs: DatabaseKwargs) -> DatabaseKwargs:
+    updated_kwargs = dict(kwargs)
+    updated_kwargs.update(force=False, force_process=False)
+    # FIXME get typing right on next line
+    return updated_kwargs  # type:ignore
+@database_annotate
 @click.pass_context
-def build(ctx: click.Context, directory: str, zenodo: bool, no_strict: bool, force: bool):
+def build(ctx: click.Context, **kwargs: Unpack[DatabaseKwargs]) -> None:
     """Build all databases."""
     # if no_strict and zenodo:
     #    click.secho("Must be strict before uploading", fg="red")
@@ -59,103 +106,97 @@ def build(ctx: click.Context, directory: str, zenodo: bool, no_strict: bool, for
     with logging_redirect_tqdm():
         click.secho("Collecting metadata and building", fg="cyan", bold=True)
         # note that this is the only one that needs a force=force
-        ctx.invoke(metadata, directory=directory, no_strict=no_strict, force=force)
+        ctx.invoke(metadata, **kwargs)
+        # After running once, we don't want to force or re-process.
+        # All the other arguments come along for the ride!
+        updated_kwargs = _update_database_kwargs(kwargs)
         click.secho("Alternate Identifiers", fg="cyan", bold=True)
-        ctx.invoke(alts, directory=directory, zenodo=zenodo, no_strict=no_strict)
+        ctx.invoke(alts, **updated_kwargs)
         click.secho("Synonyms", fg="cyan", bold=True)
-        ctx.invoke(synonyms, directory=directory, zenodo=zenodo, no_strict=no_strict)
-        click.secho("Xrefs", fg="cyan", bold=True)
-        ctx.invoke(xrefs, directory=directory, zenodo=zenodo, no_strict=no_strict)
+        ctx.invoke(synonyms, **updated_kwargs)
+        click.secho("Mappings", fg="cyan", bold=True)
+        ctx.invoke(mappings, **updated_kwargs)
         click.secho("Names", fg="cyan", bold=True)
-        ctx.invoke(names, directory=directory, zenodo=zenodo, no_strict=no_strict)
+        ctx.invoke(names, **updated_kwargs)
         click.secho("Definitions", fg="cyan", bold=True)
-        ctx.invoke(definitions, directory=directory, zenodo=zenodo, no_strict=no_strict)
+        ctx.invoke(definitions, **updated_kwargs)
         click.secho("Properties", fg="cyan", bold=True)
-        ctx.invoke(properties, directory=directory, zenodo=zenodo, no_strict=no_strict)
+        ctx.invoke(properties, **updated_kwargs)
         click.secho("Relations", fg="cyan", bold=True)
-        ctx.invoke(relations, directory=directory, zenodo=zenodo, no_strict=no_strict)
+        ctx.invoke(relations, **updated_kwargs)
+        click.secho("Edges", fg="cyan", bold=True)
+        ctx.invoke(edges, **updated_kwargs)
         click.secho("Typedefs", fg="cyan", bold=True)
-        ctx.invoke(typedefs, directory=directory, zenodo=zenodo, no_strict=no_strict)
+        ctx.invoke(typedefs, **updated_kwargs)
         click.secho("Species", fg="cyan", bold=True)
-        ctx.invoke(species, directory=directory, zenodo=zenodo, no_strict=no_strict)
+        ctx.invoke(species, **updated_kwargs)
+@database_annotate
+def cache(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
+    """Cache all things."""
+    if zenodo:
+        click.echo("no zenodo for caching")
-skip_below_option = click.option("--skip-below")
-skip_below_exclusive_option = click.option("--skip-below-exclusive", is_flag=True)
+    kwargs["force_process"] = True
+    with logging_redirect_tqdm():
+        for _ in iter_helper_helper(get_ontology, **kwargs):
+            # this pass intentional to consume the iterable
+            pass
-@main.command()
-@verbose_option
-@directory_option
-@force_option
-@no_strict_option
-@skip_below_option
-@click.option("--skip-pyobo")
-def metadata(
-    directory: str, no_strict: bool, force: bool, skip_below: Optional[str], skip_pyobo: bool
-):
+@database_annotate
+def metadata(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
     """Make the prefix-metadata dump."""
+    from ..api import get_metadata
+    def _iter_metadata(
+        **kwargs: Unpack[IterHelperHelperDict],
+    ) -> Iterable[tuple[str, str, str, bool]]:
+        for prefix, data in iter_helper_helper(get_metadata, **kwargs):
+            version = data["version"]
+            logger.debug(f"[{prefix}] using version {version}")
+            yield prefix, version, data["date"], bioregistry.is_deprecated(prefix)
+    it = _iter_metadata(**kwargs)
     db_output_helper(
-        _iter_metadata,
+        it,
         "metadata",
         ("prefix", "version", "date", "deprecated"),
-        strict=not no_strict,
-        force=force,
-        directory=directory,
         use_gzip=False,
-        skip_below=skip_below,
-        skip_pyobo=skip_pyobo,
+        directory=directory,
     )
+    if zenodo:
+        click.secho("No Zenodo record for metadata", fg="red")
-@main.command()
-@verbose_option
-@directory_option
-@zenodo_option
-@force_option
-@no_strict_option
-@skip_below_option
-@skip_below_exclusive_option
-def names(
-    directory: str,
-    zenodo: bool,
-    no_strict: bool,
-    force: bool,
-    skip_below: Optional[str],
-    skip_below_exclusive: bool,
-):
+@database_annotate
+def names(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
     """Make the prefix-identifier-name dump."""
+    it = _iter_names(**kwargs)
     with logging_redirect_tqdm():
         paths = db_output_helper(
-            _iter_names,
+            it,
             "names",
             ("prefix", "identifier", "name"),
-            strict=not no_strict,
-            force=force,
             directory=directory,
-            skip_below=skip_below,
-            skip_below_inclusive=not skip_below_exclusive,
         )
     if zenodo:
         # see https://zenodo.org/record/4020486
         update_zenodo(OOH_NA_NA_RECORD, paths)
-@main.command()
-@verbose_option
-@directory_option
-@zenodo_option
-@force_option
-@no_strict_option
-def species(directory: str, zenodo: bool, no_strict: bool, force: bool):
+@database_annotate
+def species(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
     """Make the prefix-identifier-species dump."""
     with logging_redirect_tqdm():
+        it = _iter_species(**kwargs)
         paths = db_output_helper(
-            _iter_species,
+            it,
             "species",
             ("prefix", "identifier", "species"),
-            strict=not no_strict,
-            force=force,
             directory=directory,
         )
     if zenodo:
@@ -163,110 +204,90 @@ def species(directory: str, zenodo: bool, no_strict: bool, force: bool):
         update_zenodo(SPECIES_RECORD, paths)
-@main.command()
-@verbose_option
-@directory_option
-@zenodo_option
-@force_option
-@no_strict_option
-def definitions(directory: str, zenodo: bool, no_strict: bool, force: bool):
+def _extend_skip_set(kwargs: DatabaseKwargs, skip_set: set[str]) -> None:
+    ss = kwargs.get("skip_set")
+    if ss is None:
+        kwargs["skip_set"] = skip_set
+    else:
+        ss.update(skip_set)
+@database_annotate
+def definitions(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
     """Make the prefix-identifier-definition dump."""
     with logging_redirect_tqdm():
+        _extend_skip_set(kwargs, {"kegg.pathway", "kegg.genes", "kegg.genome", "umls"})
+        it = _iter_definitions(**kwargs)
         paths = db_output_helper(
-            _iter_definitions,
+            it,
             "definitions",
             ("prefix", "identifier", "definition"),
-            strict=not no_strict,
-            force=force,
             directory=directory,
-            skip_set={"kegg.pathway", "kegg.genes", "kegg.genome", "umls"},
         )
     if zenodo:
         # see https://zenodo.org/record/4637061
         update_zenodo(DEFINITIONS_RECORD, paths)
-@main.command()
-@verbose_option
-@directory_option
-@zenodo_option
-@force_option
-@no_strict_option
-def typedefs(directory: str, zenodo: bool, no_strict: bool, force: bool):
+@database_annotate
+def typedefs(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
     """Make the typedef prefix-identifier-name dump."""
     with logging_redirect_tqdm():
+        _extend_skip_set(kwargs, {"ncbigene", "kegg.pathway", "kegg.genes", "kegg.genome"})
+        it = _iter_typedefs(**kwargs)
         paths = db_output_helper(
-            _iter_typedefs,
+            it,
             "typedefs",
             ("prefix", "typedef_prefix", "identifier", "name"),
-            strict=not no_strict,
-            force=force,
-            directory=directory,
             use_gzip=False,
-            skip_set={"ncbigene", "kegg.pathway", "kegg.genes", "kegg.genome"},
+            directory=directory,
         )
     if zenodo:
         # see https://zenodo.org/record/4644013
         update_zenodo(TYPEDEFS_RECORD, paths)
-@main.command()
-@verbose_option
-@directory_option
-@zenodo_option
-@force_option
-@no_strict_option
-def alts(directory: str, zenodo: bool, force: bool, no_strict: bool):
+@database_annotate
+def alts(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
     """Make the prefix-alt-id dump."""
     with logging_redirect_tqdm():
+        _extend_skip_set(kwargs, {"kegg.pathway", "kegg.genes", "kegg.genome", "umls"})
+        it = _iter_alts(**kwargs)
         paths = db_output_helper(
-            _iter_alts,
+            it,
             "alts",
             ("prefix", "identifier", "alt"),
             directory=directory,
-            force=force,
-            strict=not no_strict,
-            skip_set={"kegg.pathway", "kegg.genes", "kegg.genome", "umls"},
         )
     if zenodo:
         # see https://zenodo.org/record/4021476
         update_zenodo(ALTS_DATA_RECORD, paths)
-@main.command()
-@verbose_option
-@directory_option
-@zenodo_option
-@force_option
-@no_strict_option
-def synonyms(directory: str, zenodo: bool, force: bool, no_strict: bool):
+@database_annotate
+def synonyms(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
     """Make the prefix-identifier-synonym dump."""
     with logging_redirect_tqdm():
+        _extend_skip_set(kwargs, {"kegg.pathway", "kegg.genes", "kegg.genome"})
+        it = _iter_synonyms(**kwargs)
         paths = db_output_helper(
-            _iter_synonyms,
+            it,
             "synonyms",
             ("prefix", "identifier", "synonym"),
             directory=directory,
-            force=force,
-            strict=not no_strict,
-            skip_set={"kegg.pathway", "kegg.genes", "kegg.genome"},
         )
     if zenodo:
         # see https://zenodo.org/record/4021482
         update_zenodo(SYNONYMS_RECORD, paths)
-@main.command()
-@verbose_option
-@directory_option
-@zenodo_option
-@force_option
-@no_strict_option
-def relations(directory: str, zenodo: bool, force: bool, no_strict: bool):
+@database_annotate
+def relations(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
     """Make the relation dump."""
     with logging_redirect_tqdm():
+        it = _iter_relations(**kwargs)
         paths = db_output_helper(
-            _iter_relations,
+            it,
             "relations",
             (
                 "source_prefix",
@@ -276,62 +297,91 @@ def relations(directory: str, zenodo: bool, force: bool, no_strict: bool):
                 "target_prefix",
                 "target_identifier",
             ),
-            directory=directory,
-            force=force,
-            strict=not no_strict,
             summary_detailed=(0, 2, 3),  # second column corresponds to relation type
+            directory=directory,
         )
     if zenodo:
         # see https://zenodo.org/record/4625167
         update_zenodo(RELATIONS_RECORD, paths)
-@main.command()
-@verbose_option
-@directory_option
-@zenodo_option
-@force_option
-@no_strict_option
-def properties(directory: str, zenodo: bool, force: bool, no_strict: bool):
+@database_annotate
+def edges(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
+    """Make the edges dump."""
+    with logging_redirect_tqdm():
+        it = _iter_edges(**kwargs)
+        db_output_helper(
+            it,
+            "edges",
+            (
+                ":START_ID",
+                ":TYPE",
+                ":END_ID",
+                "provenance",
+            ),
+            directory=directory,
+        )
+    if zenodo:
+        raise NotImplementedError
+@database_annotate
+def properties(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
     """Make the properties dump."""
     with logging_redirect_tqdm():
+        it = _iter_properties(**kwargs)
         paths = db_output_helper(
-            _iter_properties,
+            it,
             "properties",
             ("prefix", "identifier", "property", "value"),
-            directory=directory,
-            force=force,
-            strict=not no_strict,
             summary_detailed=(0, 2),  # second column corresponds to property type
+            directory=directory,
         )
     if zenodo:
         # see https://zenodo.org/record/4625172
         update_zenodo(PROPERTIES_RECORD, paths)
-@main.command()
-@verbose_option
-@directory_option
-@zenodo_option
-@force_option
-@no_strict_option
-def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool):
+@database_annotate
+def xrefs(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
     """Make the prefix-identifier-xref dump."""
+    warnings.warn("Use pyobo.database.mappings instead", DeprecationWarning, stacklevel=2)
     with logging_redirect_tqdm():
+        it = _iter_xrefs(**kwargs)
         paths = db_output_helper(
-            _iter_xrefs,
+            it,
             "xrefs",
             ("prefix", "identifier", "xref_prefix", "xref_identifier", "provenance"),
-            directory=directory,
-            force=force,
-            strict=not no_strict,
             summary_detailed=(0, 2),  # second column corresponds to xref prefix
+            directory=directory,
         )
     if zenodo:
         # see https://zenodo.org/record/4021477
         update_zenodo(JAVERT_RECORD, paths)
+@database_annotate
+def mappings(zenodo: bool, directory: Path, **kwargs: Unpack[DatabaseKwargs]) -> None:
+    """Make the SSSOM dump."""
+    columns = [
+        "subject_id",
+        "object_id",
+        "predicate_id",
+        "mapping_justification",
+        "mapping_source",
+    ]
+    with logging_redirect_tqdm():
+        it = _iter_mappings(**kwargs)
+        db_output_helper(
+            it,
+            "mappings",
+            columns,
+            directory=directory,
+        )
+    if zenodo:
+        raise NotImplementedError("need to do initial manual upload of SSSOM build")
 if __name__ == "__main__":
     logging.captureWarnings(True)
     with logging_redirect_tqdm():

pyobo/cli/database_utils.py ADDED Viewed

@@ -0,0 +1,155 @@
+"""Pipeline for extracting all xrefs from OBO documents available."""
+from __future__ import annotations
+import gzip
+import logging
+import warnings
+from collections.abc import Iterable
+from functools import partial
+from typing import cast
+from tqdm.auto import tqdm
+from typing_extensions import Unpack
+from ..api import (
+    get_edges_df,
+    get_id_definition_mapping,
+    get_id_name_mapping,
+    get_id_species_mapping,
+    get_id_synonyms_mapping,
+    get_id_to_alts,
+    get_mappings_df,
+    get_properties_df,
+    get_relations_df,
+    get_typedef_df,
+    get_xrefs_df,
+)
+from ..getters import IterHelperHelperDict, iter_helper, iter_helper_helper
+from ..sources import pubchem
+from ..sources.ncbi import ncbigene
+from ..utils.path import ensure_path
+logger = logging.getLogger(__name__)
+def _iter_ncbigene(left: int, right: int) -> Iterable[tuple[str, str, str]]:
+    ncbi_path = ensure_path(ncbigene.PREFIX, url=ncbigene.GENE_INFO_URL)
+    with gzip.open(ncbi_path, "rt") as file:
+        next(file)  # throw away the header
+        for line in tqdm(
+            file, desc=f"[{ncbigene.PREFIX}] extracting names", unit_scale=True, total=56_700_000
+        ):
+            parts = line.strip().split("\t")
+            yield ncbigene.PREFIX, parts[left], parts[right]
+def _iter_names(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]:
+    """Iterate over all prefix-identifier-name triples we can get.
+    :param leave: should the tqdm be left behind?
+    """
+    yield from iter_helper(get_id_name_mapping, leave=leave, **kwargs)
+    yield from _iter_ncbigene(1, 2)
+    yield from _iter_pubchem_compound()
+def _iter_pubchem_compound():
+    pcc_path = pubchem._ensure_cid_name_path()
+    with gzip.open(pcc_path, mode="rt", encoding="ISO-8859-1") as file:
+        for line in tqdm(
+            file, desc=f"[{pubchem.PREFIX}] extracting names", unit_scale=True, total=119_000_000
+        ):
+            identifier, name = line.strip().split("\t", 1)
+            yield pubchem.PREFIX, identifier, name
+def _iter_species(
+    leave: bool = False, **kwargs: Unpack[IterHelperHelperDict]
+) -> Iterable[tuple[str, str, str]]:
+    """Iterate over all prefix-identifier-species triples we can get."""
+    yield from iter_helper(get_id_species_mapping, leave=leave, **kwargs)
+    # TODO ncbigene
+def _iter_definitions(
+    leave: bool = False, **kwargs: Unpack[IterHelperHelperDict]
+) -> Iterable[tuple[str, str, str]]:
+    """Iterate over all prefix-identifier-descriptions triples we can get."""
+    yield from iter_helper(get_id_definition_mapping, leave=leave, **kwargs)
+    yield from _iter_ncbigene(1, 8)
+def _iter_alts(
+    leave: bool = False, **kwargs: Unpack[IterHelperHelperDict]
+) -> Iterable[tuple[str, str, str]]:
+    for prefix, identifier, alts in iter_helper(get_id_to_alts, leave=leave, **kwargs):
+        for alt in alts:
+            yield prefix, identifier, alt
+def _iter_synonyms(
+    leave: bool = False, **kwargs: Unpack[IterHelperHelperDict]
+) -> Iterable[tuple[str, str, str]]:
+    """Iterate over all prefix-identifier-synonym triples we can get.
+    :param leave: should the tqdm be left behind?
+    """
+    for prefix, identifier, synonyms in iter_helper(get_id_synonyms_mapping, leave=leave, **kwargs):
+        for synonym in synonyms:
+            yield prefix, identifier, synonym
+def _iter_typedefs(**kwargs: Unpack[IterHelperHelperDict]) -> Iterable[tuple[str, str, str, str]]:
+    """Iterate over all prefix-identifier-name triples we can get."""
+    for prefix, df in iter_helper_helper(get_typedef_df, **kwargs):
+        for t in df.values:
+            if all(t):
+                yield cast(tuple[str, str, str, str], (prefix, *t))
+def _iter_relations(
+    **kwargs: Unpack[IterHelperHelperDict],
+) -> Iterable[tuple[str, str, str, str, str, str]]:
+    for prefix, df in iter_helper_helper(get_relations_df, **kwargs):
+        for t in df.values:
+            if all(t):
+                yield cast(tuple[str, str, str, str, str, str], (prefix, *t))
+def _iter_edges(**kwargs: Unpack[IterHelperHelperDict]) -> Iterable[tuple[str, str, str, str]]:
+    for prefix, df in iter_helper_helper(get_edges_df, **kwargs):
+        for row in df.values:
+            yield cast(tuple[str, str, str, str], (*row, prefix))
+def _iter_properties(**kwargs: Unpack[IterHelperHelperDict]) -> Iterable[tuple[str, str, str, str]]:
+    for prefix, df in iter_helper_helper(get_properties_df, **kwargs):
+        for t in df.values:
+            if all(t):
+                yield cast(tuple[str, str, str, str], (prefix, *t))
+def _iter_xrefs(
+    **kwargs: Unpack[IterHelperHelperDict],
+) -> Iterable[tuple[str, str, str, str, str]]:
+    warnings.warn(f"use {_iter_mappings.__name__} instead", DeprecationWarning, stacklevel=2)
+    it = iter_helper_helper(get_xrefs_df, **kwargs)
+    for prefix, df in it:
+        df.dropna(inplace=True)
+        for row in df.values:
+            if any(not element for element in row):
+                continue
+            yield cast(tuple[str, str, str, str, str], (prefix, *row, prefix))
+def _iter_mappings(
+    **kwargs: Unpack[IterHelperHelperDict],
+) -> Iterable[tuple[str, str, str, str, str]]:
+    f = partial(get_mappings_df, names=False, include_mapping_source_column=True)
+    # hack in a name to the partial function object since
+    # it's used for the tqdm description in iter_helper_helper
+    f.__name__ = "get_mappings_df"  # type:ignore
+    it = iter_helper_helper(f, **kwargs)
+    for _prefix, df in it:
+        yield from df.values

pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

pyobo 0.11.2py3-none-any.whl → 0.12.1py3-none-any.whl