PyPI - pyobo - Versions diffs - 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

pyobo 0.11.1py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (242) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -113
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +108 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +183 -161
pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +196 -118
pyobo/gilda_utils.py +79 -200
pyobo/identifier_utils/__init__.py +41 -0
pyobo/identifier_utils/api.py +296 -0
pyobo/identifier_utils/model.py +130 -0
pyobo/identifier_utils/preprocessing.json +812 -0
pyobo/identifier_utils/preprocessing.py +61 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +43 -39
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1358 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +0 -5
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +3 -8
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +10 -3
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +270 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1413 -643
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +13 -11
pyobo/utils/io.py +17 -31
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +76 -70
pyobo/version.py +3 -3
{pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
pyobo-0.12.0.dist-info/RECORD +202 -0
pyobo-0.12.0.dist-info/WHEEL +4 -0
{pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
{pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
pyobo/apps/__init__.py +0 -3
pyobo/apps/cli.py +0 -24
pyobo/apps/gilda/__init__.py +0 -3
pyobo/apps/gilda/__main__.py +0 -8
pyobo/apps/gilda/app.py +0 -48
pyobo/apps/gilda/cli.py +0 -36
pyobo/apps/gilda/templates/base.html +0 -33
pyobo/apps/gilda/templates/home.html +0 -11
pyobo/apps/gilda/templates/matches.html +0 -32
pyobo/apps/mapper/__init__.py +0 -3
pyobo/apps/mapper/__main__.py +0 -11
pyobo/apps/mapper/cli.py +0 -37
pyobo/apps/mapper/mapper.py +0 -187
pyobo/apps/mapper/templates/base.html +0 -35
pyobo/apps/mapper/templates/mapper_home.html +0 -64
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo-0.11.1.dist-info/RECORD +0 -173
pyobo-0.11.1.dist-info/WHEEL +0 -5
pyobo-0.11.1.dist-info/top_level.txt +0 -1

pyobo/apps/mapper/mapper.py DELETED Viewed

@@ -1,187 +0,0 @@
-# -*- coding: utf-8 -*-
-"""PyOBO's Mapping Service.
-Run with ``python -m pyobo.apps.mapper``.
-"""
-import logging
-from functools import lru_cache
-from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
-import bioregistry
-import pandas as pd
-from flasgger import Swagger
-from flask import (
-    Blueprint,
-    Flask,
-    abort,
-    current_app,
-    jsonify,
-    render_template,
-    url_for,
-)
-from flask_bootstrap import VERSION_BOOTSTRAP, Bootstrap
-from werkzeug.local import LocalProxy
-from pyobo import Canonicalizer
-from pyobo.constants import PROVENANCE, SOURCE_PREFIX, TARGET_PREFIX
-from pyobo.identifier_utils import normalize_curie
-from pyobo.resource_utils import ensure_inspector_javert_df
-__all__ = [
-    "get_app",
-]
-logger = logging.getLogger(__name__)
-summary_df = LocalProxy(lambda: current_app.config["summary"])
-canonicalizer: Canonicalizer = LocalProxy(lambda: current_app.config["canonicalizer"])
-@lru_cache()
-def _single_source_shortest_path(curie: str) -> Optional[Mapping[str, List[Mapping[str, str]]]]:
-    return canonicalizer.single_source_shortest_path(curie=curie)
-@lru_cache()
-def _all_shortest_paths(source_curie: str, target_curie: str) -> List[List[Mapping[str, str]]]:
-    return canonicalizer.all_shortest_paths(source_curie=source_curie, target_curie=target_curie)
-#: The blueprint that gets added to the app
-search_blueprint = Blueprint("search", __name__)
-@search_blueprint.route("/")
-def home():
-    """Show the home page."""
-    return render_template("mapper_home.html")
-@search_blueprint.route("/mappings/<curie>")
-def single_source_mappings(curie: str):
-    """Return all length xrefs from the given identifier."""
-    if curie not in canonicalizer.graph:
-        return jsonify(
-            success=False,
-            query=dict(curie=curie),
-            message="could not find curie",
-        )
-    return jsonify(_single_source_shortest_path(curie))
-@search_blueprint.route("/mappings/<source_curie>/<target_curie>")
-def all_mappings(source_curie: str, target_curie: str):
-    """Return all shortest paths of xrefs between the two identifiers."""
-    if source_curie not in canonicalizer.graph:
-        return jsonify(
-            success=False,
-            query=dict(source_curie=source_curie, target_curie=target_curie),
-            message="could not find source curie",
-        )
-    if target_curie not in canonicalizer.graph:
-        return jsonify(
-            success=False,
-            query=dict(source_curie=source_curie, target_curie=target_curie),
-            message="could not find target curie",
-        )
-    return jsonify(_all_shortest_paths(source_curie, target_curie))
-@search_blueprint.route("/mappings/summarize")
-def summarize():
-    """Summarize the mappings."""
-    return summary_df.to_html(index=False)
-@search_blueprint.route("/mappings/summarize_by/<prefix>")
-def summarize_one(prefix: str):
-    """Summarize the mappings."""
-    norm_prefix = bioregistry.normalize_prefix(prefix)
-    if norm_prefix is None:
-        return abort(500, f"invalid prefix: {prefix}")
-    in_df = summary_df.loc[summary_df[TARGET_PREFIX] == norm_prefix, [SOURCE_PREFIX, "count"]]
-    out_df = summary_df.loc[summary_df[SOURCE_PREFIX] == norm_prefix, [TARGET_PREFIX, "count"]]
-    return f"""
-    <h1>Incoming Mappings to {norm_prefix}</h1>
-    {in_df.to_html(index=False)}
-    <h1>Outgoing Mappings from {norm_prefix}</h1>
-    {out_df.to_html(index=False)}
-    """
-@search_blueprint.route("/canonicalize/<curie>")
-def canonicalize(curie: str):
-    """Return the best CURIE."""
-    # TODO maybe normalize the curie first?
-    norm_prefix, norm_identifier = normalize_curie(curie)
-    if norm_prefix is None or norm_identifier is None:
-        return jsonify(
-            query=curie,
-            normalizable=False,
-        )
-    norm_curie = f"{norm_prefix}:{norm_identifier}"
-    rv: Dict[str, Any] = dict(query=curie)
-    if norm_curie != curie:
-        rv["norm_curie"] = norm_curie
-    if norm_curie not in canonicalizer.graph:
-        rv["found"] = False
-    else:
-        result_curie = canonicalizer.canonicalize(norm_curie)
-        rv.update(
-            found=True,
-            result=result_curie,
-            mappings=url_for(
-                f".{all_mappings.__name__}",
-                source_curie=norm_curie,
-                target_curie=result_curie,
-            ),
-        )
-    return jsonify(rv)
-def get_app(paths: Union[None, str, Iterable[str]] = None) -> Flask:
-    """Build the Flask app."""
-    app = Flask(__name__)
-    Swagger(app)
-    logger.info("using bootstrap_flask %s", VERSION_BOOTSTRAP)
-    Bootstrap(app)
-    if paths is None:
-        df = ensure_inspector_javert_df()
-    elif isinstance(paths, str):
-        df = pd.read_csv(paths, sep="\t", dtype=str)
-    else:
-        df = pd.concat(pd.read_csv(path, sep="\t", dtype=str) for path in paths)
-    app.config["summary"] = summarize_xref_df(df)
-    app.config["summary_provenances"] = summarize_xref_provenances_df(df)
-    # TODO allow for specification of priorities in the canonicalizer
-    app.config["canonicalizer"] = Canonicalizer.from_df(df)
-    app.register_blueprint(search_blueprint)
-    return app
-def summarize_xref_df(df: pd.DataFrame) -> pd.DataFrame:
-    """Get all meta-mappings."""
-    return _summarize(df, [SOURCE_PREFIX, TARGET_PREFIX])
-def summarize_xref_provenances_df(df: pd.DataFrame) -> pd.DataFrame:
-    """Get all meta-mappings."""
-    return _summarize(df, [SOURCE_PREFIX, TARGET_PREFIX, PROVENANCE])
-def _summarize(df: pd.DataFrame, columns) -> pd.DataFrame:
-    """Get all meta-mappings."""
-    rv = df[columns].groupby(columns).size().reset_index()
-    rv.columns = [*columns, "count"]
-    rv.sort_values("count", inplace=True, ascending=False)
-    return rv

pyobo/apps/mapper/templates/base.html DELETED Viewed

@@ -1,35 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-    {% block head %}
-        <!-- Required meta tags -->
-        <meta charset="utf-8">
-        <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
-        {% block styles %}
-            <!-- Bootstrap CSS -->
-            {{ bootstrap.load_css() }}
-        {% endblock %}
-        <script src="https://kit.fontawesome.com/4c86883252.js" crossorigin="anonymous"></script>
-        <title>{% block title %}{% endblock %}</title>
-    {% endblock %}
-</head>
-<body>
-<div class="container" style="margin-top: 25px; margin-bottom: 25px">
-    <div class="row justify-content-center">
-        <div class="col-md-8 col-lg-8">
-            <div class="card">
-                <!-- Your page content -->
-                {% block content %}{% endblock %}
-            </div>
-        </div>
-    </div>
-</div>
-{% block scripts %}
-    <!-- Optional JavaScript -->
-    {{ bootstrap.load_js() }}
-{% endblock %}
-</body>
-</html>

pyobo/apps/mapper/templates/mapper_home.html DELETED Viewed

@@ -1,64 +0,0 @@
-{% extends "base.html" %}
-{% block content %}
-    <h5 class="card-header">Inspector Javert's Mapper</h5>
-    <div class="card-body">
-        <p class="card-text">
-            This service resolves <a href="https://en.wikipedia.org/wiki/CURIE">CURIEs</a>
-            to the best CURIE that's mapped to it.
-        </p>
-        <p>
-            A summary of all of the xrefs can be found <a href="{{ url_for('.summarize') }}">here</a>.
-            You can also look at a summary for a specific prefix like <a
-                href="{{ url_for('.summarize_one', prefix='umls') }}"><code>UMLS</code></a>.
-        </p>
-        <ul>
-            <li>
-                Use the <code>/mappings</code> endpoint to look up equivalent entities,
-                for example, <a
-                    href="{{ url_for('.single_source_mappings', curie='hgnc:6893') }}"><code>hgnc:6893</code></a>.
-            </li>
-            <li>
-                Use the <code>/mappings</code> endpoint to look up all mappings between two entities,
-                for example, <a
-                    href="{{ url_for('.all_mappings', source_curie='hgnc:6893', target_curie='ensembl:ENSG00000186868') }}"><code>hgnc:6893</code>
-                and <code>ensembl:ENSG00000186868</code></a>.
-            </li>
-        </ul>
-    </div>
-    <table class="table">
-        <thead>
-        <tr>
-            <th scope="col">CURIE</th>
-            <th scope="col">Description</th>
-        </tr>
-        </thead>
-        <tr>
-            <td><a href="{{ url_for('.canonicalize', curie='hgnc:6893') }}">hgnc:6893</a></td>
-            <td>✅ maps correct identifier to higher priority namespace (ncbigene)</td>
-        </tr>
-        <tr>
-            <td><a href="{{ url_for('.canonicalize', curie='ncbigene:4137') }}">ncbigene:4137</a></td>
-            <td>✅ already priority namespace</td>
-        </tr>
-        <tr>
-            <td><a href="{{ url_for('.canonicalize', curie='DOID:00000') }}">DOID:00000</a></td>
-            <td>❌ invalid identifier</td>
-        </tr>
-        <tr>
-            <td><a href="{{ url_for('.canonicalize', curie='NNN:00000') }}">NNN:00000</a></td>
-            <td>❌ invalid prefix</td>
-        </tr>
-        <tr>
-            <td><a href="{{ url_for('.canonicalize', curie='wikidata:Q42') }}">wikidata:Q42</a></td>
-            <td>❌ unmapped prefix</td>
-        </tr>
-    </table>
-    <div class="card-footer text-center">
-        Developed with ❤️ in 🇩🇪 by <a href="https://github.com/cthoyt">@cthoyt</a>.
-        (<span class="text-muted"><a href="https://github.com/pyobo/pyobo">source code</a></span> |
-        <span class="text-muted"><a
-                href="https://cthoyt.com/2020/04/19/inspector-javerts-xref-database.html">blog post</a></span>)
-    </div>
-{% endblock %}

pyobo/aws.py DELETED Viewed

@@ -1,162 +0,0 @@
-"""Interface for caching data on AWS S3."""
-import logging
-import os
-from typing import Optional
-import boto3
-import humanize
-from tabulate import tabulate
-from pyobo import (
-    get_id_name_mapping,
-    get_id_synonyms_mapping,
-    get_id_to_alts,
-    get_properties_df,
-    get_relations_df,
-    get_xrefs_df,
-)
-from pyobo.api.utils import get_version
-from pyobo.constants import RAW_DIRECTORY
-from pyobo.registries import iter_cached_obo
-from pyobo.utils.path import prefix_cache_join
-__all__ = [
-    "download_artifacts",
-    "upload_artifacts",
-    "upload_artifacts_for_prefix",
-    "list_artifacts",
-]
-logger = logging.getLogger(__name__)
-def download_artifacts(bucket: str, suffix: Optional[str] = None) -> None:
-    """Download compiled parts from AWS.
-    :param bucket: The name of the S3 bucket to download
-    :param suffix: If specified, only download files with this suffix. Might
-     be useful to specify ``suffix='names.tsv`` if you just want to run the
-     name resolution service.
-    """
-    s3_client = boto3.client("s3")
-    all_objects = s3_client.list_objects(Bucket=bucket)
-    for entry in all_objects["Contents"]:
-        key = entry["Key"]
-        if suffix and not key.endswith(suffix):
-            pass
-        path = os.path.join(RAW_DIRECTORY, key)
-        os.makedirs(os.path.dirname(path), exist_ok=True)
-        if os.path.exists(path):
-            continue  # no need to download again
-        logging.warning("downloading %s to %s", key, path)
-        s3_client.download_file(bucket, key, path)
-def upload_artifacts(
-    bucket: str,
-    whitelist: Optional[set[str]] = None,
-    blacklist: Optional[set[str]] = None,
-    s3_client=None,
-) -> None:
-    """Upload all artifacts to AWS."""
-    if s3_client is None:
-        s3_client = boto3.client("s3")
-    all_objects = s3_client.list_objects(Bucket=bucket)
-    uploaded_prefixes = {entry["Key"].split("/")[0] for entry in all_objects["Contents"]}
-    for prefix, _ in sorted(iter_cached_obo()):
-        if prefix in uploaded_prefixes:
-            continue
-        if whitelist and prefix not in whitelist:
-            continue
-        if blacklist and prefix in blacklist:
-            continue
-        upload_artifacts_for_prefix(prefix=prefix, bucket=bucket, s3_client=s3_client)
-def upload_artifacts_for_prefix(
-    *, prefix: str, bucket: str, s3_client=None, version: Optional[str] = None
-):
-    """Upload compiled parts for the given prefix to AWS."""
-    if s3_client is None:
-        s3_client = boto3.client("s3")
-    if version is None:
-        version = get_version(prefix)
-    logger.info("[%s] getting id->name mapping", prefix)
-    get_id_name_mapping(prefix)
-    id_name_path = prefix_cache_join(prefix, name="names.tsv", version=version)
-    if not id_name_path.exists():
-        raise FileNotFoundError
-    id_name_key = os.path.join(prefix, "cache", "names.tsv")
-    logger.info("[%s] uploading id->name mapping", prefix)
-    upload_file(path=id_name_path, bucket=bucket, key=id_name_key, s3_client=s3_client)
-    logger.info("[%s] getting id->synonyms mapping", prefix)
-    get_id_synonyms_mapping(prefix)
-    id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=version)
-    if not id_synonyms_path.exists():
-        raise FileNotFoundError
-    id_synonyms_key = os.path.join(prefix, "cache", "synonyms.tsv")
-    logger.info("[%s] uploading id->synonyms mapping", prefix)
-    upload_file(path=id_synonyms_path, bucket=bucket, key=id_synonyms_key, s3_client=s3_client)
-    logger.info("[%s] getting xrefs", prefix)
-    get_xrefs_df(prefix)
-    xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=version)
-    if not xrefs_path.exists():
-        raise FileNotFoundError
-    xrefs_key = os.path.join(prefix, "cache", "xrefs.tsv")
-    logger.info("[%s] uploading xrefs", prefix)
-    upload_file(path=xrefs_path, bucket=bucket, key=xrefs_key, s3_client=s3_client)
-    logger.info("[%s] getting relations", prefix)
-    get_relations_df(prefix)
-    relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
-    if not relations_path.exists():
-        raise FileNotFoundError
-    relations_key = os.path.join(prefix, "cache", "relations.tsv")
-    logger.info("[%s] uploading relations", prefix)
-    upload_file(path=relations_path, bucket=bucket, key=relations_key, s3_client=s3_client)
-    logger.info("[%s] getting properties", prefix)
-    get_properties_df(prefix)
-    properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
-    if not properties_path.exists():
-        raise FileNotFoundError
-    properties_key = os.path.join(prefix, "cache", "properties.tsv")
-    logger.info("[%s] uploading properties", prefix)
-    upload_file(path=properties_path, bucket=bucket, key=properties_key, s3_client=s3_client)
-    logger.info("[%s] getting alternative identifiers", prefix)
-    get_id_to_alts(prefix)
-    alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version)
-    if not alts_path.exists():
-        raise FileNotFoundError
-    alts_key = os.path.join(prefix, "cache", "alt_ids.tsv")
-    logger.info("[%s] uploading alternative identifiers", prefix)
-    upload_file(path=alts_path, bucket=bucket, key=alts_key)
-def upload_file(*, path, bucket, key, s3_client=None):
-    """Upload a file to an S3 bucket.
-    :param path: The local file path
-    :param bucket: The name of the S3 bucket
-    :param key: The relative file path to put on the S3 bucket
-    """
-    if s3_client is None:
-        s3_client = boto3.client("s3")
-    s3_client.upload_file(path, bucket, key)
-def list_artifacts(bucket: str) -> str:
-    """List the files in a given bucket."""
-    s3_client = boto3.client("s3")
-    all_objects = s3_client.list_objects(Bucket=bucket)
-    rows = [
-        (entry["Key"], humanize.naturalsize(entry["Size"])) for entry in all_objects["Contents"]
-    ]
-    return tabulate(rows, headers=["File", "Size"])

pyobo/cli/aws.py DELETED Viewed

@@ -1,47 +0,0 @@
-"""CLI for PyOBO's interface to S3."""
-import click
-from more_click import verbose_option
-from ..aws import download_artifacts, list_artifacts, upload_artifacts
-__all__ = [
-    "main",
-]
-bucket_argument = click.argument("bucket")
-@click.group(name="aws")
-def main():
-    """S3 utilities."""
-@main.command()
-@bucket_argument
-@verbose_option
-def download(bucket):
-    """Download all artifacts from the S3 bucket."""
-    download_artifacts(bucket)
-@main.command()
-@bucket_argument
-@verbose_option
-@click.option("-w", "--whitelist", multiple=True)
-@click.option("-b", "--blacklist", multiple=True)
-def upload(bucket, whitelist, blacklist):
-    """Download all artifacts from the S3 bucket."""
-    upload_artifacts(bucket, whitelist=whitelist, blacklist=blacklist)
-@main.command()
-@bucket_argument
-@verbose_option
-def ls(bucket):
-    """List all artifacts on the S3 bucket."""
-    click.echo(list_artifacts(bucket))
-if __name__ == "__main__":
-    main()

pyobo/identifier_utils.py DELETED Viewed

@@ -1,142 +0,0 @@
-"""Utilities for handling prefixes."""
-from __future__ import annotations
-import logging
-from functools import wraps
-import bioregistry
-from curies import Reference, ReferenceTuple
-from .registries import (
-    curie_has_blacklisted_prefix,
-    curie_has_blacklisted_suffix,
-    curie_is_blacklisted,
-    remap_full,
-    remap_prefix,
-)
-__all__ = [
-    "normalize_curie",
-    "wrap_norm_prefix",
-    "standardize_ec",
-]
-logger = logging.getLogger(__name__)
-class MissingPrefixError(ValueError):
-    """Raised on a missing prefix."""
-    reference: Reference | None
-    def __init__(
-        self, prefix: str, curie: str, xref: str | None = None, ontology: str | None = None
-    ):
-        """Initialize the error."""
-        self.prefix = prefix
-        self.curie = curie
-        self.xref = xref
-        self.ontology = ontology
-        self.reference = None
-    def __str__(self) -> str:
-        s = ""
-        if self.ontology:
-            s += f"[{self.ontology}] "
-        s += f"unhandled prefix {self.prefix} found in curie {self.curie}"
-        if self.xref:
-            s += f"/xref {self.xref}"
-        if self.reference is not None:
-            s += f" from {self.reference.curie}"
-        return s
-def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True) -> str | None:
-    """Normalize a namespace and return, if possible."""
-    norm_prefix = bioregistry.normalize_prefix(prefix)
-    if norm_prefix is not None:
-        return norm_prefix
-    elif strict:
-        raise MissingPrefixError(prefix=prefix, curie=curie, xref=xref)
-    else:
-        return None
-BAD_CURIES = set()
-def normalize_curie(curie: str, *, strict: bool = True) -> tuple[str, str] | tuple[None, None]:
-    """Parse a string that looks like a CURIE.
-    :param curie: A compact uniform resource identifier (CURIE)
-    :param strict: Should an exception be thrown if the CURIE can not be parsed w.r.t. the Bioregistry?
-    :return: A parse tuple or a tuple of None, None if not able to parse and not strict
-    - Normalizes the namespace
-    - Checks against a blacklist for the entire curie, for the namespace, and for suffixes.
-    """
-    if curie_is_blacklisted(curie):
-        return None, None
-    if curie_has_blacklisted_prefix(curie):
-        return None, None
-    if curie_has_blacklisted_suffix(curie):
-        return None, None
-    # Remap the curie with the full list
-    curie = remap_full(curie)
-    # Remap node's prefix (if necessary)
-    curie = remap_prefix(curie)
-    try:
-        head_ns, identifier = curie.split(":", 1)
-    except ValueError:  # skip nodes that don't look like normal CURIEs
-        if curie not in BAD_CURIES:
-            BAD_CURIES.add(curie)
-            logger.debug(f"could not split CURIE on colon: {curie}")
-        return None, None
-    # remove redundant prefix
-    if identifier.casefold().startswith(f"{head_ns.casefold()}:"):
-        identifier = identifier[len(head_ns) + 1 :]
-    norm_node_prefix = _normalize_prefix(head_ns, curie=curie, strict=strict)
-    if not norm_node_prefix:
-        return None, None
-    return norm_node_prefix, identifier
-def wrap_norm_prefix(f):
-    """Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
-    @wraps(f)
-    def _wrapped(prefix: str | Reference | ReferenceTuple, *args, **kwargs):
-        if isinstance(prefix, str):
-            norm_prefix = bioregistry.normalize_prefix(prefix)
-            if norm_prefix is None:
-                raise ValueError(f"Invalid prefix: {prefix}")
-            prefix = norm_prefix
-        elif isinstance(prefix, Reference):
-            norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
-            if norm_prefix is None:
-                raise ValueError(f"Invalid prefix: {prefix.prefix}")
-            prefix = Reference(prefix=norm_prefix, identifier=prefix.identifier)
-        elif isinstance(prefix, ReferenceTuple):
-            norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
-            if norm_prefix is None:
-                raise ValueError(f"Invalid prefix: {prefix.prefix}")
-            prefix = ReferenceTuple(norm_prefix, prefix.identifier)
-        else:
-            raise TypeError
-        return f(prefix, *args, **kwargs)
-    return _wrapped
-def standardize_ec(ec: str) -> str:
-    """Standardize an EC code identifier by removing all trailing dashes and dots."""
-    ec = ec.strip().replace(" ", "")
-    for _ in range(4):
-        ec = ec.rstrip("-").rstrip(".")
-    return ec

pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

pyobo 0.11.1py3-none-any.whl → 0.12.0py3-none-any.whl