PyPI - pyobo - Versions diffs - 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

pyobo 0.11.2py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (227) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -113
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +108 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +183 -161
pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +196 -118
pyobo/gilda_utils.py +79 -200
pyobo/identifier_utils/__init__.py +41 -0
pyobo/identifier_utils/api.py +296 -0
pyobo/identifier_utils/model.py +130 -0
pyobo/identifier_utils/preprocessing.json +812 -0
pyobo/identifier_utils/preprocessing.py +61 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +43 -39
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1358 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +0 -5
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +3 -8
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +10 -3
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +270 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1413 -643
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +13 -11
pyobo/utils/io.py +17 -31
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +76 -70
pyobo/version.py +3 -3
{pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
pyobo-0.12.0.dist-info/RECORD +202 -0
pyobo-0.12.0.dist-info/WHEEL +4 -0
{pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo-0.11.2.dist-info/RECORD +0 -157
pyobo-0.11.2.dist-info/WHEEL +0 -5
pyobo-0.11.2.dist-info/top_level.txt +0 -1

pyobo/sources/__init__.py CHANGED Viewed

@@ -3,10 +3,12 @@
 from class_resolver import ClassResolver
 from .antibodyregistry import AntibodyRegistryGetter
+from .bigg import BiGGCompartmentGetter, BiGGMetaboliteGetter, BiGGModelGetter, BiGGReactionGetter
 from .ccle import CCLEGetter
 from .cgnc import CGNCGetter
-from .chembl import ChEMBLCompoundGetter
+from .chembl import ChEMBLCompoundGetter, ChEMBLTargetGetter
 from .civic_gene import CIVICGeneGetter
+from .clinicaltrials import ClinicalTrialsGetter
 from .complexportal import ComplexPortalGetter
 from .conso import CONSOGetter
 from .cpt import CPTGetter
@@ -14,34 +16,38 @@ from .credit import CreditGetter
 from .cvx import CVXGetter
 from .depmap import DepMapGetter
 from .dictybase_gene import DictybaseGetter
-from .drugbank import DrugBankGetter
-from .drugbank_salt import DrugBankSaltGetter
+from .drugbank import DrugBankGetter, DrugBankSaltGetter
 from .drugcentral import DrugCentralGetter
 from .expasy import ExpasyGetter
 from .famplex import FamPlexGetter
 from .flybase import FlyBaseGetter
-from .geonames import GeonamesGetter
-from .gwascentral_phenotype import GWASCentralPhenotypeGetter
-from .gwascentral_study import GWASCentralStudyGetter
-from .hgnc import HGNCGetter
-from .hgncgenefamily import HGNCGroupGetter
-from .icd10 import ICD10Getter
-from .icd11 import ICD11Getter
+from .gard import GARDGetter
+from .geonames import GeonamesFeatureGetter, GeonamesGetter
+from .gtdb import GTDBGetter
+from .gwascentral import GWASCentralPhenotypeGetter, GWASCentralStudyGetter
+from .hgnc import HGNCGetter, HGNCGroupGetter
+from .icd import ICD10Getter, ICD11Getter
 from .interpro import InterProGetter
 from .itis import ITISGetter
 from .kegg import KEGGGeneGetter, KEGGGenomeGetter, KEGGPathwayGetter
 from .mesh import MeSHGetter
 from .mgi import MGIGetter
-from .mirbase import MiRBaseGetter
-from .mirbase_family import MiRBaseFamilyGetter
-from .mirbase_mature import MiRBaseMatureGetter
+from .mirbase import MiRBaseFamilyGetter, MiRBaseGetter, MiRBaseMatureGetter
 from .msigdb import MSigDBGetter
-from .ncbigene import NCBIGeneGetter
+from .ncbi import NCBIGCGetter, NCBIGeneGetter
+from .nih_reporter import NIHReporterGetter
+from .nlm import NLMCatalogGetter, NLMPublisherGetter
 from .npass import NPASSGetter
 from .omim_ps import OMIMPSGetter
 from .pathbank import PathBankGetter
-from .pfam import PfamGetter
-from .pfam_clan import PfamClanGetter
+from .pfam import PfamClanGetter, PfamGetter
+from .pharmgkb import (
+    PharmGKBChemicalGetter,
+    PharmGKBDiseaseGetter,
+    PharmGKBGeneGetter,
+    PharmGKBPathwayGetter,
+    PharmGKBVariantGetter,
+)
 from .pid import PIDGetter
 from .pombase import PomBaseGetter
 from .pubchem import PubChemCompoundGetter
@@ -51,15 +57,21 @@ from .rhea import RheaGetter
 from .ror import RORGetter
 from .selventa import SCHEMGetter, SCOMPGetter, SDISGetter, SFAMGetter
 from .sgd import SGDGetter
+from .signor import SignorGetter
 from .slm import SLMGetter
-from .umls import UMLSGetter
+from .umls import UMLSGetter, UMLSSTyGetter
+from .unimod import UnimodGetter
 from .uniprot import UniProtGetter, UniProtPtmGetter
 from .wikipathways import WikiPathwaysGetter
 from .zfin import ZFINGetter
-from ..struct import Obo
+from ..struct.struct import AdHocOntologyBase, Obo
 __all__ = [
     "AntibodyRegistryGetter",
+    "BiGGCompartmentGetter",
+    "BiGGMetaboliteGetter",
+    "BiGGModelGetter",
+    "BiGGReactionGetter",
     "CCLEGetter",
     "CGNCGetter",
     "CIVICGeneGetter",
@@ -67,6 +79,8 @@ __all__ = [
     "CPTGetter",
     "CVXGetter",
     "ChEMBLCompoundGetter",
+    "ChEMBLTargetGetter",
+    "ClinicalTrialsGetter",
     "ComplexPortalGetter",
     "CreditGetter",
     "DepMapGetter",
@@ -77,8 +91,11 @@ __all__ = [
     "ExpasyGetter",
     "FamPlexGetter",
     "FlyBaseGetter",
+    "GARDGetter",
+    "GTDBGetter",
     "GWASCentralPhenotypeGetter",
     "GWASCentralStudyGetter",
+    "GeonamesFeatureGetter",
     "GeonamesGetter",
     "HGNCGetter",
     "HGNCGroupGetter",
@@ -95,13 +112,22 @@ __all__ = [
     "MiRBaseFamilyGetter",
     "MiRBaseGetter",
     "MiRBaseMatureGetter",
+    "NCBIGCGetter",
     "NCBIGeneGetter",
+    "NIHReporterGetter",
+    "NLMCatalogGetter",
+    "NLMPublisherGetter",
     "NPASSGetter",
     "OMIMPSGetter",
     "PIDGetter",
     "PathBankGetter",
     "PfamClanGetter",
     "PfamGetter",
+    "PharmGKBChemicalGetter",
+    "PharmGKBDiseaseGetter",
+    "PharmGKBGeneGetter",
+    "PharmGKBPathwayGetter",
+    "PharmGKBVariantGetter",
     "PomBaseGetter",
     "PubChemCompoundGetter",
     "RGDGetter",
@@ -114,24 +140,21 @@ __all__ = [
     "SFAMGetter",
     "SGDGetter",
     "SLMGetter",
+    "SignorGetter",
     "UMLSGetter",
+    "UMLSSTyGetter",
     "UniProtGetter",
     "UniProtPtmGetter",
+    "UnimodGetter",
     "WikiPathwaysGetter",
     "ZFINGetter",
     "ontology_resolver",
 ]
-def _assert_sorted():
-    _sorted = sorted(__all__)
-    if _sorted != __all__:
-        raise ValueError(f"unsorted. should be:\n{_sorted}")
-_assert_sorted()
-del _assert_sorted
-ontology_resolver: ClassResolver[Obo] = ClassResolver.from_subclasses(base=Obo, suffix="Getter")
+ontology_resolver: ClassResolver[Obo] = ClassResolver.from_subclasses(
+    base=Obo,
+    suffix="Getter",
+    skip={AdHocOntologyBase},
+)
 for getter in list(ontology_resolver):
     ontology_resolver.synonyms[getter.ontology] = getter

pyobo/sources/agrovoc.py CHANGED Viewed

File without changes

pyobo/sources/antibodyregistry.py CHANGED Viewed

@@ -1,15 +1,18 @@
-"""Converter for the Antibody Registry."""
+"""Converter for the Antibody Registry.
+TODO use API https://www.antibodyregistry.org/api/antibodies?page=1&size=100
+"""
 import logging
 from collections.abc import Iterable, Mapping
-from typing import Optional
 import pandas as pd
 from bioregistry.utils import removeprefix
 from tqdm.auto import tqdm
-from pyobo import Obo, Term
+from pyobo import Obo, Reference, Term
 from pyobo.api.utils import get_version
+from pyobo.struct.typedef import has_citation
 from pyobo.utils.path import ensure_df
 __all__ = [
@@ -23,7 +26,7 @@ URL = "http://antibodyregistry.org/php/fileHandler.php"
 CHUNKSIZE = 20_000
-def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame:
+def get_chunks(*, force: bool = False, version: str | None = None) -> pd.DataFrame:
     """Get the BioGRID identifiers mapping dataframe."""
     if version is None:
         version = get_version(PREFIX)
@@ -44,19 +47,15 @@ class AntibodyRegistryGetter(Obo):
     """An ontology representation of the Antibody Registry."""
     ontology = bioversions_key = PREFIX
+    typedefs = [has_citation]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
         return iter_terms(force=force, version=self._version_or_raise)
-def get_obo(*, force: bool = False) -> Obo:
-    """Get the Antibody Registry as OBO."""
-    return AntibodyRegistryGetter(force=force)
 # TODO there are tonnnnsss of mappings to be curated
-MAPPING: Mapping[str, Optional[str]] = {
+MAPPING: Mapping[str, str | None] = {
     "AMERICAN DIAGNOSTICA": None,  # No website
     "Biolegend": "biolegend",
     "Enzo Life Sciences": "enzo",
@@ -74,7 +73,7 @@ SKIP = {
 }
-def iter_terms(*, force: bool = False, version: Optional[str] = None) -> Iterable[Term]:
+def iter_terms(*, force: bool = False, version: str | None = None) -> Iterable[Term]:
     """Iterate over antibodies."""
     chunks = get_chunks(force=force, version=version)
     needs_curating = set()
@@ -98,7 +97,7 @@ def iter_terms(*, force: bool = False, version: Optional[str] = None) -> Iterabl
                     pubmed_id = pubmed_id.strip()
                     if not pubmed_id:
                         continue
-                    term.append_provenance(("pubmed", pubmed_id))
+                    term.append_provenance(Reference(prefix="pubmed", identifier=pubmed_id))
             yield term

pyobo/sources/bigg/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Converter for resources in BiGG."""
+from .bigg_compartment import BiGGCompartmentGetter
+from .bigg_metabolite import BiGGMetaboliteGetter
+from .bigg_model import BiGGModelGetter
+from .bigg_reaction import BiGGReactionGetter
+__all__ = [
+    "BiGGCompartmentGetter",
+    "BiGGMetaboliteGetter",
+    "BiGGModelGetter",
+    "BiGGReactionGetter",
+]

pyobo/sources/bigg/bigg_compartment.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Get compartments from BiGG."""
+from collections.abc import Iterable
+from bioversions.utils import get_soup
+from pyobo import Obo, Reference, Term
+__all__ = [
+    "BiGGCompartmentGetter",
+    "get_compartments",
+]
+DATA_URL = "http://bigg.ucsd.edu/compartments/"
+PREFIX = "bigg.compartment"
+GO_MAPPING: dict[str, Reference | None] = {
+    "c": Reference(prefix="go", identifier="0005829", name="cytosol"),
+    "e": Reference(prefix="go", identifier="0005615", name="extracellular space"),
+    "p": Reference(prefix="go", identifier="0042597", name="periplasmic space"),
+    "m": Reference(prefix="go", identifier="0005739", name="mitochondrion"),
+    "r": Reference(prefix="go", identifier="0005783", name="endoplasmic reticulum"),
+    "v": Reference(prefix="go", identifier="0005773", name="vacuole"),
+    "n": Reference(prefix="go", identifier="0005634", name="nucleus"),
+    "g": Reference(prefix="go", identifier="0005794", name="Golgi apparatus"),
+    "u": Reference(prefix="go", identifier="0009579", name="thylakoid"),
+    "l": Reference(prefix="go", identifier="0005764", name="lysosome"),
+    "h": Reference(prefix="go", identifier="0009507", name="chloroplast"),
+    "f": Reference(prefix="go", identifier="0005929", name="cilium"),
+    "s": Reference(prefix="go", identifier="1990413", name="eyespot apparatus"),
+    "um": Reference(prefix="go", identifier="0042651", name="thylakoid membrane"),
+    "y": Reference(prefix="go", identifier="0070069", name="cytochrome complex"),
+    # note that glyoxysome is a child class of peroxisome in GO
+    "x": Reference(prefix="go", identifier="0005777", name="peroxisome"),
+    "mm": Reference(prefix="go", identifier="0005743", name="mitochondrial inner membrane"),
+    "im": Reference(prefix="go", identifier="0005758", name="mitochondrial intermembrane space"),
+    "cx": None,  # missing for carboxyzome
+    "cm": None,  # missing for cytosolic membrane
+    "i": None,  # missing for inner mitochondrial compartment
+    "w": None,  # missing for wildtype staph aureus
+}
+class BiGGCompartmentGetter(Obo):
+    """An ontology representation of BiGG compartments."""
+    ontology = PREFIX
+    bioversions_key = "bigg"
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return iterate_terms(force=force, version=self._version_or_raise)
+def get_compartments(*, force: bool = False, version: str | None = None) -> dict[str, str]:
+    """Get a dictionary of BiGG compartments."""
+    rv = {}
+    soup = get_soup(DATA_URL)
+    table = soup.find(**{"class": "myTable"})  # type:ignore[arg-type]
+    if table is None:
+        raise ValueError
+    for row in table.find_all("tr"):  # type:ignore[attr-defined]
+        cells = list(row.find_all("td"))
+        if not cells:
+            continue
+        identifier_cell, name_cell = cells
+        rv[identifier_cell.text] = name_cell.text
+    return rv
+def iterate_terms(*, force: bool = False, version: str | None = None) -> Iterable[Term]:
+    """Iterate over BiGG compartments."""
+    compartments = get_compartments(force=force, version=version)
+    for identifier, name in compartments.items():
+        term = Term.from_triple(PREFIX, identifier, name)
+        if go_component_ref := GO_MAPPING.get(identifier):
+            term.append_exact_match(go_component_ref)
+        yield term
+if __name__ == "__main__":
+    BiGGCompartmentGetter.cli()

pyobo/sources/bigg/bigg_metabolite.py ADDED Viewed

@@ -0,0 +1,229 @@
+"""Converter for metabolites in BiGG."""
+import logging
+import re
+from collections.abc import Iterable
+import bioregistry
+import pandas as pd
+from pydantic import ValidationError
+from tqdm import tqdm
+from pyobo.sources.bigg.bigg_compartment import GO_MAPPING
+from pyobo.struct import Obo, Reference, Term
+from pyobo.struct.typedef import located_in, participates_in
+from pyobo.utils.path import ensure_df
+__all__ = [
+    "BiGGMetaboliteGetter",
+]
+logger = logging.getLogger(__name__)
+PREFIX = "bigg.metabolite"
+URL = "http://bigg.ucsd.edu/static/namespace/bigg_models_metabolites.txt"
+PATTERN = re.compile("^[a-z_A-Z0-9]+$")
+MOLECULE = Term.from_triple("cob", "0000013", "molecule")
+class BiGGMetaboliteGetter(Obo):
+    """An ontology representation of BiGG Metabolites."""
+    ontology = PREFIX
+    bioversions_key = "bigg"
+    typedefs = [participates_in, located_in]
+    root_terms = [MOLECULE.reference]
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return iterate_terms(force=force, version=self._version_or_raise)
+KEY_TO_PREFIX = {
+    "CHEBI": "chebi",
+    "Human Metabolome Database": "hmdb",
+    "LipidMaps": "lipidmaps",
+    "BioCyc": "biocyc",
+    "KEGG Compound": "kegg.compound",
+    "MetaNetX (MNX) Chemical": "metanetx.chemical",
+    "InChI Key": "inchikey",
+    "SEED Compound": "seed.compound",
+    "Reactome Compound": "reactome",
+    "KEGG Drug": "kegg.drug",
+    "KEGG Glycan": "kegg.glycan",
+    "MetaNetX (MNX) Equation": "metanetx.reaction",
+    "RHEA": "rhea",
+    "EC Number": "ec",
+    "SEED Reaction": "seed.reaction",
+    "Reactome Reaction": "reactome",
+    "KEGG Reaction": "kegg.reaction",
+}
+EXACTS = {"inchikey"}
+def _split(x) -> list[str]:
+    if pd.notna(x):
+        return [y.strip() for y in x.split(";")]
+    return []
+def iterate_terms(force: bool = False, version: str | None = None) -> Iterable[Term]:
+    """Iterate terms for BiGG Metabolite."""
+    bigg_df = ensure_df(
+        prefix=PREFIX,
+        url=URL,
+        force=force,
+        version=version,
+    )
+    for v in KEY_TO_PREFIX.values():
+        nmp = bioregistry.normalize_prefix(v)
+        if v != nmp:
+            raise ValueError(f"Normalize {v} to {nmp}")
+    universal_references: set[Reference] = set()
+    compartment_references: set[Reference] = set()
+    yield MOLECULE
+    # TODO there are duplicates on universal ID - this might be
+    # because the compartment ID is unique
+    for (
+        bigg_compartmental_id,
+        universal_bigg_id,
+        name,
+        model_list,
+        database_links,
+        old_bigg_ids,
+    ) in tqdm(
+        bigg_df.values,
+        unit_scale=True,
+        unit="metabolite",
+        desc=f"[{PREFIX}] processing",
+    ):
+        if not PATTERN.match(bigg_compartmental_id):
+            tqdm.write(f"[{PREFIX}] invalid BIGG ID: {bigg_compartmental_id}")
+            continue
+        universal_name = name.strip() if pd.notna(name) else None
+        _, _, compartment_letter = bigg_compartmental_id.rpartition("_")
+        compartment_reference = GO_MAPPING[compartment_letter] or Reference(
+            prefix="bigg.compartment", identifier=compartment_letter
+        )
+        compartment_references.add(compartment_reference)
+        compartment_name = (
+            f"{universal_name} (in {compartment_reference.name})" if universal_name else None
+        )
+        term = Term(
+            reference=Reference(
+                prefix=PREFIX,
+                identifier=bigg_compartmental_id,
+                name=compartment_name,
+            ),
+        )
+        term.append_relationship(located_in, compartment_reference)
+        if PATTERN.match(universal_bigg_id):
+            universal_reference = Reference(
+                prefix=PREFIX, identifier=universal_bigg_id, name=universal_name
+            )
+            term.append_parent(universal_reference)
+            universal_references.add(universal_reference)
+        else:
+            tqdm.write(f"[{PREFIX}] invalid universal BIGG ID: {bigg_compartmental_id}")
+        for old_bigg_id in _split(old_bigg_ids):
+            if old_bigg_id in {bigg_compartmental_id, universal_bigg_id}:
+                continue
+            if not PATTERN.match(old_bigg_id):
+                if not old_bigg_id.endswith("]"):
+                    # if it ends with ']' then it's a compartment identifier
+                    logger.debug(f"[{PREFIX}:{universal_bigg_id}] invalid alt ID: {old_bigg_id}")
+                continue
+            term.append_alt(Reference(prefix=PREFIX, identifier=old_bigg_id))
+        _parse_model_links(term, model_list)
+        _parse_dblinks(term, database_links)
+        yield term
+    for universal_reference in universal_references:
+        yield Term(reference=universal_reference).append_parent(MOLECULE)
+    for compartment in compartment_references:
+        yield Term(reference=compartment)
+def _parse_model_links(term: Term, model_list: str) -> None:
+    for model_id in _split(model_list):
+        try:
+            reference = Reference(prefix="bigg.model", identifier=model_id)
+        except ValidationError:
+            tqdm.write(f"[{term.curie}] invalid model reference: {model_id}")
+        else:
+            term.annotate_object(participates_in, reference)
+def _parse_dblinks(term: Term, database_links: str, property_map=None) -> None:
+    if not property_map:
+        property_map = {}
+    # there are duplicate xrefs, keep track
+    seen = set()
+    for dblink in _split(database_links):
+        key, _, identifier_url = dblink.strip().partition(":")
+        identifier_url = identifier_url.strip()
+        if not identifier_url:
+            continue
+        if identifier_url.startswith("http://identifiers.org/kegg.glycan/"):
+            prefix = "kegg.glycan"
+            identifier = identifier_url.removeprefix("http://identifiers.org/kegg.glycan/")
+        elif identifier_url.startswith("http://identifiers.org/kegg.drug/"):
+            prefix = "kegg.drug"
+            identifier = identifier_url.removeprefix("http://identifiers.org/kegg.drug/")
+        elif identifier_url.startswith("http://identifiers.org/kegg.reaction/"):
+            prefix = "kegg.reaction"
+            identifier = identifier_url.removeprefix("http://identifiers.org/kegg.reaction/")
+        else:
+            prefix_, identifier_ = bioregistry.parse_iri(identifier_url)
+            if not prefix_ or not identifier_:
+                tqdm.write(f"[{PREFIX}] failed to parse xref IRI: {identifier_url}")
+                continue
+            prefix, identifier = prefix_, identifier_
+        if prefix == "kegg":
+            prefix = "kegg.compound"
+        if prefix != KEY_TO_PREFIX.get(key):
+            tqdm.write(f"[{PREFIX}] mismatch between {prefix=} and {key=} - {identifier_url}")
+            continue
+        if prefix == "rhea" and "#" in identifier:
+            identifier = identifier.split("#")[0]
+        try:
+            reference = Reference(prefix=prefix, identifier=identifier)
+        except ValidationError:
+            tqdm.write(f"[{term.curie}] could not validate xref - {prefix}:{identifier}")
+            continue
+        # don't add self-reference
+        if reference.pair == term.pair:
+            continue
+        if reference in seen:
+            tqdm.write(f"[{term.curie}] got duplicate xref {reference}")
+            continue
+        seen.add(reference)
+        if prefix in property_map:
+            term.annotate_object(property_map[prefix], reference)
+        elif prefix in EXACTS:
+            term.append_exact_match(reference)
+        else:
+            term.append_xref(reference)
+if __name__ == "__main__":
+    BiGGMetaboliteGetter.cli()

pyobo/sources/bigg/bigg_model.py ADDED Viewed

@@ -0,0 +1,46 @@
+"""Converter for models in BiGG."""
+import json
+import logging
+from collections.abc import Iterable
+from pyobo.resources.ncbitaxon import get_ncbitaxon_id
+from pyobo.struct import Obo, Term
+from pyobo.utils.path import ensure_path
+__all__ = [
+    "BiGGModelGetter",
+]
+logger = logging.getLogger(__name__)
+URL = "http://bigg.ucsd.edu/api/v2/models"
+PREFIX = "bigg.model"
+class BiGGModelGetter(Obo):
+    """An ontology representation of BiGG Models."""
+    ontology = PREFIX
+    bioversions_key = "bigg"
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return iterate_terms(version=self._version_or_raise)
+def iterate_terms(version: str) -> Iterable[Term]:
+    """Iterate over BiGG Models."""
+    path = ensure_path(PREFIX, url=URL, version=version)
+    records = json.loads(path.read_text())["results"]
+    for record in records:
+        ncbitaxon_id = get_ncbitaxon_id(record["organism"])
+        term = Term.from_triple(PREFIX, record["bigg_id"])
+        if ncbitaxon_id:
+            term.set_species(ncbitaxon_id)
+        else:
+            logger.info("[%s] could not ground organism name: %s", term.curie, record["organism"])
+        yield term
+if __name__ == "__main__":
+    BiGGModelGetter.cli()

pyobo/sources/bigg/bigg_reaction.py ADDED Viewed

@@ -0,0 +1,77 @@
+"""Converter for BiGG."""
+from collections.abc import Iterable
+import pandas as pd
+from pydantic import ValidationError
+from tqdm import tqdm
+from pyobo.sources.bigg.bigg_metabolite import _parse_dblinks, _parse_model_links, _split
+from pyobo.struct import Obo, Reference, Term
+from pyobo.struct.typedef import enabled_by, participates_in
+from pyobo.utils.path import ensure_df
+__all__ = [
+    "BiGGReactionGetter",
+]
+PREFIX = "bigg.reaction"
+URL = "http://bigg.ucsd.edu/static/namespace/bigg_models_reactions.txt"
+PROPERTY_MAP = {"ec": enabled_by}
+class BiGGReactionGetter(Obo):
+    """An ontology representation of BiGG Reactions."""
+    ontology = PREFIX
+    bioversions_key = "bigg"
+    typedefs = [participates_in, enabled_by]
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return iterate_terms(force=force, version=self._version_or_raise)
+def iterate_terms(force: bool = False, version: str | None = None) -> Iterable[Term]:
+    """Iterate terms for BiGG Reaction."""
+    bigg_reaction_df = ensure_df(
+        prefix=PREFIX,
+        url=URL,
+        force=force,
+        version=version,
+    )
+    for bigg_id, name, reaction_string, model_list, database_links, old_bigg_ids in tqdm(
+        bigg_reaction_df.values, unit_scale=True, unit="reaction", desc=f"[{PREFIX}] processing"
+    ):
+        if "(" in bigg_id:
+            tqdm.write(f"[{PREFIX}] identifier has open paren. can't encode in OWL: {bigg_id}")
+            continue
+        term = Term(
+            reference=Reference(
+                prefix=PREFIX, identifier=bigg_id, name=name if pd.notna(name) else None
+            ),
+            definition=reaction_string,
+        )
+        for old_bigg_id in _split(old_bigg_ids):
+            if old_bigg_id == bigg_id:
+                continue
+            if "(" in old_bigg_id:
+                continue
+            try:
+                alt_reference = Reference(prefix=PREFIX, identifier=old_bigg_id)
+            except ValidationError:
+                tqdm.write(f"[{term.curie}] had problematic alt reference: {old_bigg_id}")
+            else:
+                term.append_alt(alt_reference)
+        _parse_model_links(term, model_list)
+        # TODO make sure exact match goes to the bidirectional rhea reaction but not others
+        _parse_dblinks(term, database_links)
+        yield term
+if __name__ == "__main__":
+    BiGGReactionGetter.cli()

pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

pyobo 0.11.2py3-none-any.whl → 0.12.0py3-none-any.whl