PyPI - pyobo - Versions diffs - 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl - Mend

pyobo 0.11.2py3-none-any.whl → 0.12.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -117
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +107 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +210 -160
pyobo/cli/database_utils.py +155 -0
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +209 -191
pyobo/gilda_utils.py +52 -250
pyobo/identifier_utils/__init__.py +33 -0
pyobo/identifier_utils/api.py +305 -0
pyobo/identifier_utils/preprocessing.json +873 -0
pyobo/identifier_utils/preprocessing.py +27 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +48 -40
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1354 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +9 -6
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +8 -13
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +11 -4
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +272 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1484 -657
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +16 -15
pyobo/utils/io.py +51 -41
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +73 -70
pyobo/version.py +3 -3
pyobo-0.12.1.dist-info/METADATA +671 -0
pyobo-0.12.1.dist-info/RECORD +201 -0
pyobo-0.12.1.dist-info/WHEEL +4 -0
{pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo/xrefdb/xrefs_pipeline.py +0 -180
pyobo-0.11.2.dist-info/METADATA +0 -711
pyobo-0.11.2.dist-info/RECORD +0 -157
pyobo-0.11.2.dist-info/WHEEL +0 -5
pyobo-0.11.2.dist-info/top_level.txt +0 -1

pyobo/gilda_utils.py CHANGED Viewed

@@ -2,271 +2,73 @@
 from __future__ import annotations
-import logging
-from collections.abc import Iterable
-from subprocess import CalledProcessError
+import warnings
+from collections.abc import Iterable, Sequence
+from typing import TYPE_CHECKING, Any, cast
-import bioregistry
-import gilda.api
-import gilda.term
-from gilda.grounder import Grounder
-from gilda.process import normalize
-from gilda.term import filter_out_duplicates
-from tqdm.auto import tqdm
+import ssslm
+from ssslm import literal_mappings_to_gilda
+from typing_extensions import Unpack
-from pyobo import (
-    get_descendants,
-    get_id_name_mapping,
-    get_id_species_mapping,
-    get_id_synonyms_mapping,
-    get_ids,
-    get_obsolete,
+from pyobo.api import (
+    get_literal_mappings,
+    get_literal_mappings_subset,
 )
-from pyobo.getters import NoBuildError
-from pyobo.utils.io import multidict
+from pyobo.constants import GetOntologyKwargs
+from pyobo.struct.reference import Reference
+if TYPE_CHECKING:
+    import gilda
 __all__ = [
-    "iter_gilda_prediction_tuples",
-    "get_grounder",
+    "get_gilda_term_subset",
     "get_gilda_terms",
+    "get_grounder",
 ]
-logger = logging.getLogger(__name__)
-def iter_gilda_prediction_tuples(
-    prefix: str,
-    relation: str = "skos:exactMatch",
-    *,
-    grounder: Grounder | None = None,
-    identifiers_are_names: bool = False,
-    strict: bool = False,
-) -> Iterable[tuple[str, str, str, str, str, str, str, str, float]]:
-    """Iterate over prediction tuples for a given prefix."""
-    if grounder is None:
-        grounder = gilda.api.grounder
-    id_name_mapping = get_id_name_mapping(prefix, strict=strict)
-    it = tqdm(
-        id_name_mapping.items(), desc=f"[{prefix}] gilda tuples", unit_scale=True, unit="name"
-    )
-    for identifier, name in it:
-        for scored_match in grounder.ground(name):
-            target_prefix = scored_match.term.db.lower()
-            yield (
-                prefix,
-                normalize_identifier(prefix, identifier),
-                name,
-                relation,
-                target_prefix,
-                normalize_identifier(target_prefix, scored_match.term.id),
-                scored_match.term.entry_name,
-                "semapv:LexicalMatching",
-                round(scored_match.score, 3),
-            )
-    if identifiers_are_names:
-        it = tqdm(get_ids(prefix), desc=f"[{prefix}] gilda tuples", unit_scale=True, unit="id")
-        for identifier in it:
-            for scored_match in grounder.ground(identifier):
-                target_prefix = scored_match.term.db.lower()
-                yield (
-                    prefix,
-                    normalize_identifier(prefix, identifier),
-                    identifier,
-                    relation,
-                    target_prefix,
-                    normalize_identifier(target_prefix, scored_match.term.id),
-                    scored_match.term.entry_name,
-                    "semapv:LexicalMatching",
-                    scored_match.score,
-                )
+def get_grounder(*args: Any, **kwargs: Any) -> gilda.Grounder:
+    """Get a grounder."""
+    warnings.warn("use pyobo.ner.get_grounder", DeprecationWarning, stacklevel=2)
+    import pyobo.ner
-def normalize_identifier(prefix: str, identifier: str) -> str:
-    """Normalize the identifier."""
-    resource = bioregistry.get_resource(prefix)
-    if resource is None:
-        raise KeyError
-    return resource.miriam_standardize_identifier(identifier) or identifier
+    grounder = cast(ssslm.ner.GildaGrounder, pyobo.get_grounder(*args, **kwargs))
+    return grounder._grounder
-def get_grounder(
-    prefixes: str | Iterable[str],
-    *,
-    unnamed: Iterable[str] | None = None,
-    grounder_cls: type[Grounder] | None = None,
-    versions: None | str | Iterable[str | None] | dict[str, str] = None,
-    strict: bool = True,
-    skip_obsolete: bool = False,
-    progress: bool = True,
-) -> Grounder:
-    """Get a Gilda grounder for the given prefix(es)."""
-    unnamed = set() if unnamed is None else set(unnamed)
-    if isinstance(prefixes, str):
-        prefixes = [prefixes]
-    else:
-        prefixes = list(prefixes)
-    if versions is None:
-        versions = [None] * len(prefixes)
-    elif isinstance(versions, str):
-        versions = [versions]
-    elif isinstance(versions, dict):
-        versions = [versions.get(prefix) for prefix in prefixes]
-    else:
-        versions = list(versions)
-    if len(prefixes) != len(versions):
-        raise ValueError
-    terms: list[gilda.term.Term] = []
-    for prefix, version in zip(tqdm(prefixes, leave=False, disable=not progress), versions):
-        try:
-            p_terms = list(
-                get_gilda_terms(
-                    prefix,
-                    identifiers_are_names=prefix in unnamed,
-                    version=version,
-                    strict=strict,
-                    skip_obsolete=skip_obsolete,
-                    progress=progress,
-                )
-            )
-        except (NoBuildError, CalledProcessError):
-            continue
-        else:
-            terms.extend(p_terms)
-    terms = filter_out_duplicates(terms)
-    terms_dict = multidict((term.norm_text, term) for term in terms)
-    if grounder_cls is None:
-        return Grounder(terms_dict)
-    else:
-        return grounder_cls(terms_dict)
-def _fast_term(
-    *,
-    text: str,
-    prefix: str,
-    identifier: str,
-    name: str,
-    status: str,
-    organism: str | None = None,
-) -> gilda.term.Term | None:
-    try:
-        term = gilda.term.Term(
-            norm_text=normalize(text),
-            text=text,
-            db=prefix,
-            id=identifier,
-            entry_name=name,
-            status=status,
-            source=prefix,
-            organism=organism,
-        )
-    except ValueError:
-        return None
-    return term
+def get_gilda_terms(prefix: str, *, skip_obsolete: bool = False, **kwargs) -> Iterable[gilda.Term]:
+    """Get gilda terms."""
+    warnings.warn(
+        "use pyobo.get_literal_mappings() directly and convert to gilda yourself",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    yield from literal_mappings_to_gilda(
+        get_literal_mappings(prefix, skip_obsolete=skip_obsolete, **kwargs)
+    )
-def get_gilda_terms(
-    prefix: str,
+def get_gilda_term_subset(
+    source: str,
+    ancestors: str | Sequence[str],
     *,
-    identifiers_are_names: bool = False,
-    version: str | None = None,
-    strict: bool = True,
     skip_obsolete: bool = False,
-    progress: bool = True,
-) -> Iterable[gilda.term.Term]:
-    """Get gilda terms for the given namespace."""
-    id_to_name = get_id_name_mapping(prefix, version=version, strict=strict)
-    id_to_species = get_id_species_mapping(prefix, version=version, strict=strict)
-    obsoletes = get_obsolete(prefix, version=version, strict=strict) if skip_obsolete else set()
-    it = tqdm(
-        id_to_name.items(),
-        desc=f"[{prefix}] mapping",
-        unit_scale=True,
-        unit="name",
-        disable=not progress,
+    **kwargs: Unpack[GetOntologyKwargs],
+) -> Iterable[gilda.Term]:
+    """Get a subset of terms."""
+    warnings.warn(
+        "use pyobo.get_literal_mappings_subset() directly and convert to gilda yourself",
+        DeprecationWarning,
+        stacklevel=2,
     )
-    for identifier, name in it:
-        if identifier in obsoletes:
-            continue
-        term = _fast_term(
-            text=name,
-            prefix=prefix,
-            identifier=identifier,
-            name=name,
-            status="name",
-            organism=id_to_species.get(identifier),
-        )
-        if term is not None:
-            yield term
-    id_to_synonyms = get_id_synonyms_mapping(prefix, version=version)
-    if id_to_synonyms:
-        it = tqdm(
-            id_to_synonyms.items(),
-            desc=f"[{prefix}] mapping",
-            unit_scale=True,
-            unit="synonym",
-            disable=not progress,
-        )
-        for identifier, synonyms in it:
-            if identifier in obsoletes:
-                continue
-            name = id_to_name[identifier]
-            for synonym in synonyms:
-                if not synonym:
-                    continue
-                term = _fast_term(
-                    text=synonym,
-                    prefix=prefix,
-                    identifier=identifier,
-                    name=name,
-                    status="synonym",
-                    organism=id_to_species.get(identifier),
-                )
-                if term is not None:
-                    yield term
-    if identifiers_are_names:
-        it = tqdm(
-            get_ids(prefix),
-            desc=f"[{prefix}] mapping",
-            unit_scale=True,
-            unit="id",
-            disable=not progress,
+    if isinstance(ancestors, str):
+        ancestors = [ancestors]
+    yield from literal_mappings_to_gilda(
+        get_literal_mappings_subset(
+            source,
+            ancestors=[Reference.from_curie(a) for a in ancestors],
+            skip_obsolete=skip_obsolete,
+            **kwargs,
         )
-        for identifier in it:
-            if identifier in obsoletes:
-                continue
-            term = _fast_term(
-                text=identifier,
-                prefix=prefix,
-                identifier=identifier,
-                name=identifier,
-                status="name",
-                organism=id_to_species.get(identifier),
-            )
-            if term is not None:
-                yield term
-def get_gilda_term_subset(
-    source: str, ancestors: str | list[str], **kwargs
-) -> Iterable[gilda.term.Term]:
-    """Get a subset of terms."""
-    subset = {
-        descendant
-        for parent_curie in _ensure_list(ancestors)
-        for descendant in get_descendants(*parent_curie.split(":")) or []
-    }
-    for term in get_gilda_terms(source, **kwargs):
-        if bioregistry.curie_to_str(term.db, term.id) in subset:
-            yield term
-def _ensure_list(s: str | list[str]) -> list[str]:
-    if isinstance(s, str):
-        return [s]
-    return s
+    )

pyobo/identifier_utils/__init__.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""Extract registry information."""
+from .api import (
+    DefaultCoercionError,
+    EmptyStringError,
+    NotCURIEError,
+    ParseError,
+    ParseValidationError,
+    UnparsableIRIError,
+    UnregisteredPrefixError,
+    _is_valid_identifier,
+    _parse_str_or_curie_or_uri_helper,
+    standardize_ec,
+    wrap_norm_prefix,
+)
+from .preprocessing import get_rules
+from .relations import ground_relation
+__all__ = [
+    "DefaultCoercionError",
+    "EmptyStringError",
+    "NotCURIEError",
+    "ParseError",
+    "ParseValidationError",
+    "UnparsableIRIError",
+    "UnregisteredPrefixError",
+    "_is_valid_identifier",
+    "_parse_str_or_curie_or_uri_helper",
+    "get_rules",
+    "ground_relation",
+    "standardize_ec",
+    "wrap_norm_prefix",
+]

pyobo/identifier_utils/api.py ADDED Viewed

@@ -0,0 +1,305 @@
+"""Utilities for handling prefixes."""
+from __future__ import annotations
+import logging
+from functools import lru_cache, wraps
+from typing import Annotated, ClassVar
+import bioregistry
+import click
+from bioregistry import NormalizedNamableReference as Reference
+from bioregistry.constants import FailureReturnType
+from curies import ReferenceTuple
+from curies.preprocessing import BlocklistError, PreprocessingConverter
+from pydantic import ValidationError
+from typing_extensions import Doc
+from .preprocessing import get_rules
+from .relations import ground_relation
+__all__ = [
+    "DefaultCoercionError",
+    "EmptyStringError",
+    "NotCURIEError",
+    "ParseError",
+    "ParseValidationError",
+    "UnparsableIRIError",
+    "UnregisteredPrefixError",
+    "_parse_str_or_curie_or_uri_helper",
+    "standardize_ec",
+    "wrap_norm_prefix",
+]
+logger = logging.getLogger(__name__)
+Line = Annotated[str | None, Doc("""The OBO line where the parsing happened""")]
+class ParseError(BaseException):
+    """Raised on a missing prefix."""
+    message: ClassVar[str]
+    def __init__(
+        self,
+        curie: str,
+        *,
+        context: str | None,
+        ontology_prefix: str | None = None,
+        node: Reference | None = None,
+        predicate: Reference | None = None,
+        line: Line = None,
+    ) -> None:
+        """Initialize the error."""
+        self.curie = curie
+        self.context = context
+        self.ontology_prefix = ontology_prefix
+        self.node = node
+        self.predicate = predicate
+        self.line = line
+    def __str__(self) -> str:
+        s = ""
+        if self.node:
+            if self.predicate:
+                s += f"[{self.node.curie} - {self.predicate.curie}] "
+            else:
+                s += f"[{self.node.curie}] "
+        elif self.ontology_prefix:
+            s += f"[{self.ontology_prefix}] "
+        s += f"{self.message} {click.style(self.curie, fg='cyan')}"
+        if self.context:
+            s += f" in {self.context}"
+        if self.line and self.line != self.curie:
+            s += f" in {click.style(self.line, fg='yellow')}"
+        return s
+class ParseValidationError(ParseError):
+    """Raised on a validation error."""
+    message = "failed Pydantic validation"
+    def __init__(self, *args, exc: ValidationError, **kwargs) -> None:
+        """Initialize the error."""
+        super().__init__(*args, **kwargs)
+        self.exc = exc
+class UnregisteredPrefixError(ParseError):
+    """Raised on a missing prefix."""
+    message = "unregistered prefix in"
+class UnparsableIRIError(ParseError):
+    """Raised on a an unparsable IRI."""
+    message = "couldn't parse IRI"
+class EmptyStringError(ParseError):
+    """Raised on a an empty string."""
+    message = "is empty"
+class NotCURIEError(ParseError):
+    """Raised on a text that can't be parsed as a CURIE."""
+    message = "not a CURIE"
+class DefaultCoercionError(ParseError):
+    """Raised on a text that can't be coerced into a default reference."""
+    message = "can't be coerced into a default reference"
+def _is_uri(s: str) -> bool:
+    return s.startswith("http:") or s.startswith("https:")
+def _preclean_uri(s: str) -> str:
+    s = s.strip().removeprefix(r"url\:").removeprefix(r"uri\:")
+    s = s.strip().removeprefix(r"URL\:").removeprefix(r"URI\:")
+    s = s.strip().removeprefix("url:").removeprefix("uri:")
+    s = s.removeprefix("URL:").removeprefix("URI:")
+    s = s.removeprefix("WWW:").removeprefix("www:").lstrip()
+    s = s.replace("http\\:", "http:")
+    s = s.replace("https\\:", "https:")
+    s = s.rstrip("/")
+    return s
+@lru_cache(1)
+def _get_converter() -> PreprocessingConverter:
+    return PreprocessingConverter(
+        converter=bioregistry.manager.converter,
+        rules=get_rules(),
+        preclean=_preclean_uri,
+    )
+def _parse_str_or_curie_or_uri_helper(
+    str_or_curie_or_uri: str,
+    *,
+    ontology_prefix: str | None = None,
+    node: Reference | None = None,
+    predicate: Reference | None = None,
+    upgrade: bool = True,
+    line: str | None = None,
+    name: str | None = None,
+    context: str | None = None,
+) -> Reference | ParseError | BlocklistError:
+    """Parse a string that looks like a CURIE.
+    :param str_or_curie_or_uri: A compact uniform resource identifier (CURIE)
+    :param ontology_prefix: The ontology in which the CURIE appears
+    :returns: A parse tuple or a tuple of None, None if not able to parse and not strict
+    - Normalizes the namespace
+    - Checks against a blacklist for the entire curie, for the namespace, and for
+      suffixes.
+    """
+    str_or_curie_or_uri = _preclean_uri(str_or_curie_or_uri)
+    if not str_or_curie_or_uri:
+        return EmptyStringError(
+            str_or_curie_or_uri,
+            ontology_prefix=ontology_prefix,
+            node=node,
+            predicate=predicate,
+            line=line,
+            context=context,
+        )
+    rules = get_rules()
+    if upgrade:
+        # Remap the curie with the full list
+        if r1 := rules.remap_full(
+            str_or_curie_or_uri, reference_cls=Reference, context=ontology_prefix
+        ):
+            return r1
+        # Remap node's prefix (if necessary)
+        str_or_curie_or_uri = rules.remap_prefix(str_or_curie_or_uri, context=ontology_prefix)
+        if r2 := ground_relation(str_or_curie_or_uri):
+            return r2
+    if rules.str_is_blocked(str_or_curie_or_uri, context=ontology_prefix):
+        return BlocklistError()
+    if _is_uri(str_or_curie_or_uri):
+        rt = bioregistry.parse_iri(
+            str_or_curie_or_uri, on_failure_return_type=FailureReturnType.single
+        )
+        if rt is None:
+            return UnparsableIRIError(
+                str_or_curie_or_uri,
+                ontology_prefix=ontology_prefix,
+                node=node,
+                predicate=predicate,
+                line=line,
+                context=context,
+            )
+        try:
+            rv = Reference.model_validate(
+                {"prefix": rt.prefix, "identifier": rt.identifier, "name": name}
+            )
+        except ValidationError as exc:
+            return ParseValidationError(
+                str_or_curie_or_uri,
+                ontology_prefix=ontology_prefix,
+                node=node,
+                predicate=predicate,
+                line=line,
+                context=context,
+                exc=exc,
+            )
+        else:
+            return rv
+    prefix, delimiter, identifier = str_or_curie_or_uri.partition(":")
+    if not delimiter:
+        return NotCURIEError(
+            str_or_curie_or_uri,
+            ontology_prefix=ontology_prefix,
+            node=node,
+            predicate=predicate,
+            line=line,
+            context=context,
+        )
+    norm_node_prefix = bioregistry.normalize_prefix(prefix)
+    if not norm_node_prefix:
+        return UnregisteredPrefixError(
+            str_or_curie_or_uri,
+            ontology_prefix=ontology_prefix,
+            node=node,
+            predicate=predicate,
+            line=line,
+            context=context,
+        )
+    identifier = bioregistry.standardize_identifier(norm_node_prefix, identifier)
+    try:
+        rv = Reference.model_validate(
+            {"prefix": norm_node_prefix, "identifier": identifier, "name": name}
+        )
+    except ValidationError as exc:
+        return ParseValidationError(
+            str_or_curie_or_uri,
+            ontology_prefix=ontology_prefix,
+            node=node,
+            predicate=predicate,
+            line=line,
+            exc=exc,
+            context=context,
+        )
+    else:
+        return rv
+def wrap_norm_prefix(f):
+    """Decorate a function that take in a prefix to auto-normalize, or return None if it can't be normalized."""
+    @wraps(f)
+    def _wrapped(prefix: str | Reference | ReferenceTuple, *args, **kwargs):
+        if isinstance(prefix, str):
+            norm_prefix = bioregistry.normalize_prefix(prefix)
+            if norm_prefix is None:
+                raise ValueError(f"Invalid prefix: {prefix}")
+            prefix = norm_prefix
+        elif isinstance(prefix, Reference):
+            norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
+            if norm_prefix is None:
+                raise ValueError(f"Invalid prefix: {prefix.prefix}")
+            prefix = Reference(prefix=norm_prefix, identifier=prefix.identifier)
+        elif isinstance(prefix, ReferenceTuple):
+            norm_prefix = bioregistry.normalize_prefix(prefix.prefix)
+            if norm_prefix is None:
+                raise ValueError(f"Invalid prefix: {prefix.prefix}")
+            prefix = ReferenceTuple(norm_prefix, prefix.identifier)
+        else:
+            raise TypeError
+        return f(prefix, *args, **kwargs)
+    return _wrapped
+def standardize_ec(ec: str) -> str:
+    """Standardize an EC code identifier by removing all trailing dashes and dots."""
+    ec = ec.strip().replace(" ", "")
+    for _ in range(4):
+        ec = ec.rstrip("-").rstrip(".")
+    return ec
+def _is_valid_identifier(curie_or_uri: str) -> bool:
+    # TODO this needs more careful implementation
+    return bool(curie_or_uri.strip()) and " " not in curie_or_uri

pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

pyobo 0.11.2py3-none-any.whl → 0.12.1py3-none-any.whl