PyPI - pyobo - Versions diffs - 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

pyobo 0.11.1py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (242) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -113
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +108 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +183 -161
pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +196 -118
pyobo/gilda_utils.py +79 -200
pyobo/identifier_utils/__init__.py +41 -0
pyobo/identifier_utils/api.py +296 -0
pyobo/identifier_utils/model.py +130 -0
pyobo/identifier_utils/preprocessing.json +812 -0
pyobo/identifier_utils/preprocessing.py +61 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +43 -39
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1358 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +0 -5
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +3 -8
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +10 -3
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +270 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1413 -643
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +13 -11
pyobo/utils/io.py +17 -31
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +76 -70
pyobo/version.py +3 -3
{pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
pyobo-0.12.0.dist-info/RECORD +202 -0
pyobo-0.12.0.dist-info/WHEEL +4 -0
{pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
{pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
pyobo/apps/__init__.py +0 -3
pyobo/apps/cli.py +0 -24
pyobo/apps/gilda/__init__.py +0 -3
pyobo/apps/gilda/__main__.py +0 -8
pyobo/apps/gilda/app.py +0 -48
pyobo/apps/gilda/cli.py +0 -36
pyobo/apps/gilda/templates/base.html +0 -33
pyobo/apps/gilda/templates/home.html +0 -11
pyobo/apps/gilda/templates/matches.html +0 -32
pyobo/apps/mapper/__init__.py +0 -3
pyobo/apps/mapper/__main__.py +0 -11
pyobo/apps/mapper/cli.py +0 -37
pyobo/apps/mapper/mapper.py +0 -187
pyobo/apps/mapper/templates/base.html +0 -35
pyobo/apps/mapper/templates/mapper_home.html +0 -64
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo-0.11.1.dist-info/RECORD +0 -173
pyobo-0.11.1.dist-info/WHEEL +0 -5
pyobo-0.11.1.dist-info/top_level.txt +0 -1

pyobo/getters.py CHANGED Viewed

@@ -1,38 +1,48 @@
 """Utilities for OBO files."""
+from __future__ import annotations
 import datetime
 import gzip
 import json
 import logging
 import pathlib
 import subprocess
+import time
 import typing
 import urllib.error
+import zipfile
 from collections import Counter
-from collections.abc import Iterable, Mapping, Sequence
+from collections.abc import Callable, Iterable, Mapping, Sequence
 from pathlib import Path
-from typing import (
-    Callable,
-    Optional,
-    TypeVar,
-    Union,
-)
+from textwrap import indent
+from typing import TypeVar
 import bioregistry
+import click
+import pystow.utils
 from bioontologies import robot
+from tabulate import tabulate
 from tqdm.auto import tqdm
+from typing_extensions import Unpack
-from .constants import DATABASE_DIRECTORY
-from .identifier_utils import MissingPrefixError, wrap_norm_prefix
+from .constants import (
+    DATABASE_DIRECTORY,
+    GetOntologyKwargs,
+    IterHelperHelperDict,
+    SlimGetOntologyKwargs,
+)
+from .identifier_utils import ParseError, wrap_norm_prefix
 from .plugins import has_nomenclature_plugin, run_nomenclature_plugin
+from .reader import from_obo_path, from_obonet
 from .struct import Obo
 from .utils.io import get_writer
 from .utils.path import ensure_path, prefix_directory_join
 from .version import get_git_hash, get_version
 __all__ = [
-    "get_ontology",
     "NoBuildError",
+    "get_ontology",
 ]
 logger = logging.getLogger(__name__)
@@ -48,7 +58,14 @@ class UnhandledFormatError(NoBuildError):
 #: The following prefixes can not be loaded through ROBOT without
 #: turning off integrity checks
-REQUIRES_NO_ROBOT_CHECK = {"clo", "vo", "orphanet.ordo", "orphanet"}
+REQUIRES_NO_ROBOT_CHECK = {
+    "clo",
+    "vo",
+    "orphanet.ordo",
+    "orphanet",
+    "foodon",
+    "caloha",
+}
 @wrap_norm_prefix
@@ -56,58 +73,85 @@ def get_ontology(
     prefix: str,
     *,
     force: bool = False,
-    rewrite: bool = False,
-    strict: bool = True,
-    version: Optional[str] = None,
+    force_process: bool = False,
+    strict: bool = False,
+    version: str | None = None,
     robot_check: bool = True,
+    upgrade: bool = True,
+    cache: bool = True,
+    use_tqdm: bool = True,
 ) -> Obo:
     """Get the OBO for a given graph.
     :param prefix: The prefix of the ontology to look up
     :param version: The pre-looked-up version of the ontology
     :param force: Download the data again
-    :param rewrite: Should the OBO cache be rewritten? Automatically set to true if ``force`` is true
-    :param strict: Should CURIEs be treated strictly? If true, raises exceptions on invalid/malformed
-    :param robot_check:
-        If set to false, will send the ``--check=false`` command to ROBOT to disregard
-        malformed ontology components. Necessary to load some ontologies like VO.
+    :param force_process: Should the OBO cache be rewritten? Automatically set to true
+        if ``force`` is true
+    :param strict: Should CURIEs be treated strictly? If true, raises exceptions on
+        invalid/malformed
+    :param robot_check: If set to false, will send the ``--check=false`` command to
+        ROBOT to disregard malformed ontology components. Necessary to load some
+        ontologies like VO.
+    :param upgrade: If set to true, will automatically upgrade relationships, such as
+        ``obo:chebi#part_of`` to ``BFO:0000051``
+    :param cache: Should cached objects be written? defaults to True
     :returns: An OBO object
     :raises OnlyOWLError: If the OBO foundry only has an OWL document for this resource.
-    Alternate usage if you have a custom url::
+    Alternate usage if you have a custom url
+    .. code-block:: python
-    >>> from pystow.utils import download
-    >>> from pyobo import Obo, from_obo_path
-    >>> url = ...
-    >>> obo_path = ...
-    >>> download(url=url, path=path)
-    >>> obo = from_obo_path(path)
+        from pystow.utils import download
+        from pyobo import Obo, from_obo_path
+        url = ...
+        obo_path = ...
+        download(url=url, path=path)
+        obo = from_obo_path(path)
     """
     if force:
-        rewrite = True
+        force_process = True
     if prefix == "uberon":
         logger.info("UBERON has so much garbage in it that defaulting to non-strict parsing")
         strict = False
-    obonet_json_gz_path = prefix_directory_join(
-        prefix, name=f"{prefix}.obonet.json.gz", ensure_exists=False, version=version
-    )
-    if obonet_json_gz_path.exists() and not force:
-        from .reader import from_obonet
-        from .utils.cache import get_gzipped_graph
-        logger.debug("[%s] using obonet cache at %s", prefix, obonet_json_gz_path)
-        return from_obonet(get_gzipped_graph(obonet_json_gz_path))
+    if not cache:
+        logger.debug("[%s] caching was turned off, so dont look for an obonet file", prefix)
+        obonet_json_gz_path = None
+    else:
+        obonet_json_gz_path = prefix_directory_join(
+            prefix, name=f"{prefix}.obonet.json.gz", ensure_exists=False, version=version
+        )
+        logger.debug(
+            "[%s] caching is turned on, so look for an obonet file at %s",
+            prefix,
+            obonet_json_gz_path,
+        )
+        if obonet_json_gz_path.exists() and not force:
+            from .utils.cache import get_gzipped_graph
+            logger.debug("[%s] using obonet cache at %s", prefix, obonet_json_gz_path)
+            return from_obonet(
+                get_gzipped_graph(obonet_json_gz_path),
+                strict=strict,
+                version=version,
+                upgrade=upgrade,
+                use_tqdm=use_tqdm,
+            )
+        else:
+            logger.debug("[%s] no obonet cache found at %s", prefix, obonet_json_gz_path)
     if has_nomenclature_plugin(prefix):
         obo = run_nomenclature_plugin(prefix, version=version)
-        logger.debug("[%s] caching nomenclature plugin", prefix)
-        obo.write_default(force=rewrite)
+        if cache:
+            logger.debug("[%s] caching nomenclature plugin", prefix)
+            obo.write_default(force=force_process)
         return obo
-    logger.debug("[%s] no obonet cache found at %s", prefix, obonet_json_gz_path)
     ontology_format, path = _ensure_ontology_path(prefix, force=force, version=version)
     if path is None:
         raise NoBuildError(prefix)
@@ -122,25 +166,23 @@ def get_ontology(
     else:
         raise UnhandledFormatError(f"[{prefix}] unhandled ontology file format: {path.suffix}")
-    from .reader import from_obo_path
-    obo = from_obo_path(path, prefix=prefix, strict=strict)
-    if version is not None:
-        if obo.data_version is None:
-            logger.warning("[%s] did not have a version, overriding with %s", obo.ontology, version)
-            obo.data_version = version
-        elif obo.data_version != version:
-            logger.warning(
-                "[%s] had version %s, overriding with %s", obo.ontology, obo.data_version, version
-            )
-            obo.data_version = version
-    obo.write_default(force=rewrite)
+    obo = from_obo_path(
+        path,
+        prefix=prefix,
+        strict=strict,
+        version=version,
+        upgrade=upgrade,
+        use_tqdm=use_tqdm,
+        _cache_path=obonet_json_gz_path,
+    )
+    if cache:
+        obo.write_default(force=force_process)
     return obo
 def _ensure_ontology_path(
-    prefix: str, force, version
-) -> Union[tuple[str, Path], tuple[None, None]]:
+    prefix: str, force: bool, version: str | None
+) -> tuple[str, Path] | tuple[None, None]:
     for ontology_format, url in [
         ("obo", bioregistry.get_obo_download(prefix)),
         ("owl", bioregistry.get_owl_download(prefix)),
@@ -148,8 +190,8 @@ def _ensure_ontology_path(
     ]:
         if url is not None:
             try:
-                path = Path(ensure_path(prefix, url=url, force=force, version=version))
-            except urllib.error.HTTPError:
+                path = ensure_path(prefix, url=url, force=force, version=version)
+            except (urllib.error.HTTPError, pystow.utils.DownloadError):
                 continue
             else:
                 return ontology_format, path
@@ -215,33 +257,42 @@ CANT_PARSE = {
     "xl",
 }
 SKIP = {
-    "ncbigene",  # too big, refs acquired from other dbs
-    "pubchem.compound",  # to big, can't deal with this now
-    "gaz",  # Gazetteer is irrelevant for biology
-    "ma",  # yanked
-    "bila",  # yanked
-    # FIXME below
-    "emapa",  # recently changed with EMAP... not sure what the difference is anymore
-    "kegg.genes",
-    "kegg.genome",
-    "kegg.pathway",
-    # URL is wrong
-    "ensemblglossary",
-    # Too much junk
-    "biolink",
+    "ncbigene": "too big, refs acquired from other dbs",
+    "pubchem.compound": "top big, can't deal with this now",
+    "gaz": "Gazetteer is irrelevant for biology",
+    "ma": "yanked",
+    "bila": "yanked",
+    # Can't download",
+    "afpo": "unable to download",
+    "atol": "unable to download",
+    "eol": "unable to download, same source as atol",
+    "hog": "unable to download",
+    "vhog": "unable to download",
+    "gorel": "unable to download",
+    "dinto": "unable to download",
+    "gainesville.core": "unable to download",
+    "ato": "can't process",
+    "emapa": "recently changed with EMAP... not sure what the difference is anymore",
+    "kegg.genes": "needs fix",  # FIXME
+    "kegg.genome": "needs fix",  # FIXME
+    "kegg.pathway": "needs fix",  # FIXME
+    "ensemblglossary": "uri is wrong",
+    "epio": "content from fraunhofer is unreliable",
+    "epso": "content from fraunhofer is unreliable",
+    "gwascentral.phenotype": "website is down? or API changed?",  # FIXME
+    "gwascentral.study": "website is down? or API changed?",  # FIXME
 }
 X = TypeVar("X")
 def iter_helper(
-    f: Callable[[str], Mapping[str, X]],
+    f: Callable[[str, Unpack[GetOntologyKwargs]], Mapping[str, X]],
     leave: bool = False,
-    strict: bool = True,
-    **kwargs,
+    **kwargs: Unpack[IterHelperHelperDict],
 ) -> Iterable[tuple[str, str, X]]:
     """Yield all mappings extracted from each database given."""
-    for prefix, mapping in iter_helper_helper(f, strict=strict, **kwargs):
+    for prefix, mapping in iter_helper_helper(f, **kwargs):
         it = tqdm(
             mapping.items(),
             desc=f"iterating {prefix}",
@@ -250,22 +301,24 @@ def iter_helper(
             disable=None,
         )
         for key, value in it:
-            value = value.strip('"').replace("\n", " ").replace("\t", " ").replace("  ", " ")
+            if isinstance(value, str):
+                value = value.strip('"').replace("\n", " ").replace("\t", " ").replace("  ", " ")
+            # TODO deal with when this is not a string?
             if value:
                 yield prefix, key, value
 def _prefixes(
-    skip_below: Optional[str] = None,
+    skip_below: str | None = None,
     skip_below_inclusive: bool = True,
     skip_pyobo: bool = False,
-    skip_set: Optional[set[str]] = None,
+    skip_set: set[str] | None = None,
 ) -> Iterable[str]:
     for prefix, resource in sorted(bioregistry.read_registry().items()):
         if resource.no_own_terms:
             continue
         if prefix in SKIP:
-            tqdm.write(f"skipping {prefix} because in default skip set")
+            tqdm.write(f"skipping {prefix} because {SKIP[prefix]}")
             continue
         if skip_set and prefix in skip_set:
             tqdm.write(f"skipping {prefix} because in skip set")
@@ -287,37 +340,39 @@ def _prefixes(
 def iter_helper_helper(
-    f: Callable[[str], X],
+    f: Callable[[str, Unpack[GetOntologyKwargs]], X],
     use_tqdm: bool = True,
-    skip_below: Optional[str] = None,
-    skip_below_inclusive: bool = True,
+    skip_below: str | None = None,
     skip_pyobo: bool = False,
-    skip_set: Optional[set[str]] = None,
-    strict: bool = True,
-    **kwargs,
+    skip_set: set[str] | None = None,
+    **kwargs: Unpack[SlimGetOntologyKwargs],
 ) -> Iterable[tuple[str, X]]:
     """Yield all mappings extracted from each database given.
-    :param f: A function that takes a prefix and gives back something that will be used by an outer function.
+    :param f: A function that takes a prefix and gives back something that will be used
+        by an outer function.
     :param use_tqdm: If true, use the tqdm progress bar
-    :param skip_below: If true, skip sources whose names are less than this (used for iterative curation
+    :param skip_below: If true, skip sources whose names are less than this (used for
+        iterative curation
     :param skip_pyobo: If true, skip sources implemented in PyOBO
     :param skip_set: A pre-defined blacklist to skip
-    :param strict: If true, will raise exceptions and crash the program instead of logging them.
+    :param strict: If true, will raise exceptions and crash the program instead of
+        logging them.
     :param kwargs: Keyword arguments passed to ``f``.
-    :yields: A prefix and the result of the callable ``f``
     :raises TypeError: If a type error is raised, it gets re-raised
     :raises urllib.error.HTTPError: If the resource could not be downloaded
     :raises urllib.error.URLError: If another problem was encountered during download
     :raises ValueError: If the data was not in the format that was expected (e.g., OWL)
+    :yields: A prefix and the result of the callable ``f``
     """
+    strict = kwargs.get("strict", True)
     prefixes = list(
         _prefixes(
             skip_set=skip_set,
             skip_below=skip_below,
             skip_pyobo=skip_pyobo,
-            skip_below_inclusive=skip_below_inclusive,
         )
     )
     prefix_it = tqdm(
@@ -325,24 +380,35 @@ def iter_helper_helper(
     )
     for prefix in prefix_it:
         prefix_it.set_postfix(prefix=prefix)
+        tqdm.write(
+            click.style(f"\n{prefix} - {bioregistry.get_name(prefix)}", fg="green", bold=True)
+        )
         try:
             yv = f(prefix, **kwargs)  # type:ignore
+        except (UnhandledFormatError, NoBuildError) as e:
+            # make sure this comes before the other runtimeerror catch
+            logger.warning("[%s] %s", prefix, e)
         except urllib.error.HTTPError as e:
             logger.warning("[%s] HTTP %s: unable to download %s", prefix, e.getcode(), e.geturl())
             if strict and not bioregistry.is_deprecated(prefix):
                 raise
-        except urllib.error.URLError:
-            logger.warning("[%s] unable to download", prefix)
+        except urllib.error.URLError as e:
+            logger.warning("[%s] unable to download - %s", prefix, e.reason)
             if strict and not bioregistry.is_deprecated(prefix):
                 raise
-        except MissingPrefixError as e:
-            logger.warning("[%s] missing prefix: %s", prefix, e)
+        except ParseError as e:
+            if not e.node:
+                logger.warning("[%s] %s", prefix, e)
+            else:
+                logger.warning(str(e))
             if strict and not bioregistry.is_deprecated(prefix):
                 raise e
+        except RuntimeError as e:
+            if "DrugBank" not in str(e):
+                raise
+            logger.warning("[drugbank] invalid credentials")
         except subprocess.CalledProcessError:
             logger.warning("[%s] ROBOT was unable to convert OWL to OBO", prefix)
-        except UnhandledFormatError as e:
-            logger.warning("[%s] %s", prefix, e)
         except ValueError as e:
             if _is_xml(e):
                 # this means that it tried doing parsing on an xml page
@@ -355,6 +421,9 @@ def iter_helper_helper(
                 logger.exception(
                     "[%s] got exception %s while parsing", prefix, e.__class__.__name__
                 )
+        except zipfile.BadZipFile as e:
+            # This can happen if there's an error on UMLS
+            logger.exception("[%s] got exception %s while parsing", prefix, e.__class__.__name__)
         except TypeError as e:
             logger.exception("[%s] got exception %s while parsing", prefix, e.__class__.__name__)
             if strict:
@@ -369,7 +438,7 @@ def _is_xml(e) -> bool:
     )
-def _prep_dir(directory: Union[None, str, pathlib.Path]) -> pathlib.Path:
+def _prep_dir(directory: None | str | pathlib.Path) -> pathlib.Path:
     if directory is None:
         rv = DATABASE_DIRECTORY
     elif isinstance(directory, str):
@@ -383,26 +452,28 @@ def _prep_dir(directory: Union[None, str, pathlib.Path]) -> pathlib.Path:
 def db_output_helper(
-    f: Callable[..., Iterable[tuple[str, ...]]],
+    it: Iterable[tuple[str, ...]],
     db_name: str,
     columns: Sequence[str],
     *,
-    directory: Union[None, str, pathlib.Path] = None,
-    strict: bool = True,
+    directory: None | str | pathlib.Path = None,
+    strict: bool = False,
     use_gzip: bool = True,
-    summary_detailed: Optional[Sequence[int]] = None,
-    **kwargs,
+    summary_detailed: Sequence[int] | None = None,
 ) -> list[pathlib.Path]:
     """Help output database builds.
-    :param f: A function that takes a prefix and gives back something that will be used by an outer function.
+    :param f: A function that takes a prefix and gives back something that will be used
+        by an outer function.
     :param db_name: name of the output resource (e.g., "alts", "names")
     :param columns: The names of the columns
-    :param directory: The directory to output everything, or defaults to :data:`pyobo.constants.DATABASE_DIRECTORY`.
+    :param directory: The directory to output everything, or defaults to
+        :data:`pyobo.constants.DATABASE_DIRECTORY`.
     :param strict: Passed to ``f`` by keyword
-    :param kwargs: Passed to ``f`` by splat
     :returns: A sequence of paths that got created.
     """
+    start = time.time()
     directory = _prep_dir(directory)
     c: typing.Counter[str] = Counter()
@@ -415,10 +486,17 @@ def db_output_helper(
     db_sample_path = directory.joinpath(f"{db_name}_sample.tsv")
     db_summary_path = directory.joinpath(f"{db_name}_summary.tsv")
     db_summary_detailed_path = directory.joinpath(f"{db_name}_summary_detailed.tsv")
+    db_metadata_path = directory.joinpath(f"{db_name}_metadata.json")
+    rv: list[tuple[str, pathlib.Path]] = [
+        ("Metadata", db_metadata_path),
+        ("Data", db_path),
+        ("Sample", db_sample_path),
+        ("Summary", db_summary_path),
+    ]
     logger.info("writing %s to %s", db_name, db_path)
     logger.info("writing %s sample to %s", db_name, db_sample_path)
-    it = f(strict=strict, **kwargs)
+    sample_rows = []
     with gzip.open(db_path, mode="wt") if use_gzip else open(db_path, "w") as gzipped_file:
         writer = get_writer(gzipped_file)
@@ -430,12 +508,13 @@ def db_output_helper(
             writer.writerow(columns)
             sample_writer.writerow(columns)
-            for row, _ in zip(it, range(10)):
+            for row, _ in zip(it, range(10), strict=False):
                 c[row[0]] += 1
                 if summary_detailed is not None:
                     c_detailed[tuple(row[i] for i in summary_detailed)] += 1
                 writer.writerow(row)
                 sample_writer.writerow(row)
+                sample_rows.append(row)
         # continue just in the gzipped one
         for row in it:
@@ -444,7 +523,6 @@ def db_output_helper(
                 c_detailed[tuple(row[i] for i in summary_detailed)] += 1
             writer.writerow(row)
-    logger.info(f"writing {db_name} summary to {db_summary_path}")
     with open(db_summary_path, "w") as file:
         writer = get_writer(file)
         writer.writerows(c.most_common())
@@ -454,8 +532,8 @@ def db_output_helper(
         with open(db_summary_detailed_path, "w") as file:
             writer = get_writer(file)
             writer.writerows((*keys, v) for keys, v in c_detailed.most_common())
+        rv.append(("Summary (Detailed)", db_summary_detailed_path))
-    db_metadata_path = directory.joinpath(f"{db_name}_metadata.json")
     with open(db_metadata_path, "w") as file:
         json.dump(
             {
@@ -468,12 +546,12 @@ def db_output_helper(
             indent=2,
         )
-    rv: list[pathlib.Path] = [
-        db_metadata_path,
-        db_path,
-        db_sample_path,
-        db_summary_path,
-    ]
-    if summary_detailed:
-        rv.append(db_summary_detailed_path)
-    return rv
+    elapsed = time.time() - start
+    click.secho(f"\nWrote the following files in {elapsed:.1f} seconds\n", fg="green")
+    click.secho(indent(tabulate(rv), " "), fg="green")
+    click.secho("\nSample rows:\n", fg="green")
+    click.secho(indent(tabulate(sample_rows, headers=columns), " "), fg="green")
+    click.echo()
+    return [path for _, path in rv]

pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

pyobo 0.11.1py3-none-any.whl → 0.12.0py3-none-any.whl