PyPI - pymetadata - Versions diffs - 0.5.0__py3-none-any.whl - Mend

pymetadata 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pymetadata might be problematic. Click here for more details.

Files changed (42) hide show

pymetadata/__init__.py +14 -0
pymetadata/cache.py +52 -0
pymetadata/chebi.py +92 -0
pymetadata/console.py +18 -0
pymetadata/core/__init__.py +1 -0
pymetadata/core/annotation.py +396 -0
pymetadata/core/creator.py +46 -0
pymetadata/core/synonym.py +12 -0
pymetadata/core/xref.py +66 -0
pymetadata/examples/__init__.py +1 -0
pymetadata/examples/cache_path_example.py +15 -0
pymetadata/examples/omex_example.py +46 -0
pymetadata/examples/results/test_from_files.omex +0 -0
pymetadata/examples/results/test_from_omex.omex +0 -0
pymetadata/examples/results/testomex/README.md +3 -0
pymetadata/examples/results/testomex/manifest.xml +9 -0
pymetadata/examples/results/testomex/models/omex_comp.xml +174 -0
pymetadata/examples/results/testomex/models/omex_comp_flat.xml +215 -0
pymetadata/examples/results/testomex/models/omex_minimal.xml +99 -0
pymetadata/examples/test.omex +0 -0
pymetadata/identifiers/__init__.py +1 -0
pymetadata/identifiers/miriam.py +43 -0
pymetadata/identifiers/registry.py +397 -0
pymetadata/log.py +29 -0
pymetadata/metadata/__init__.py +6 -0
pymetadata/metadata/eco.py +15918 -0
pymetadata/metadata/kisao.py +2731 -0
pymetadata/metadata/sbo.py +3754 -0
pymetadata/omex.py +771 -0
pymetadata/omex_v2.py +30 -0
pymetadata/ontologies/__init__.py +1 -0
pymetadata/ontologies/ols.py +214 -0
pymetadata/ontologies/ontology.py +312 -0
pymetadata/py.typed +0 -0
pymetadata/resources/chebi_webservice_wsdl.xml +509 -0
pymetadata/resources/ontologies/README.md +4 -0
pymetadata/resources/templates/ontology_enum.pytemplate +61 -0
pymetadata/unichem.py +190 -0
pymetadata-0.5.0.dist-info/METADATA +154 -0
pymetadata-0.5.0.dist-info/RECORD +42 -0
pymetadata-0.5.0.dist-info/WHEEL +4 -0
pymetadata-0.5.0.dist-info/licenses/LICENSE +7 -0

pymetadata/omex_v2.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""COMBINE archive version 2.
+See https://docs.google.com/document/d/1-UDgY5lQ6tv4mZILZzol-PvCoAYW8yr2Ydn1OxcHMjM/edit#
+"""
+from typing import List, Optional
+from pydantic import BaseModel
+class Creator(BaseModel):
+    """Creator version 2."""
+    name: str
+    affiliation: Optional[str]
+    orcid: Optional[str]
+class Manifest(BaseModel):
+    """Manifest version 2."""
+    title: str
+    description: str
+    access_right: str
+    access_conditions: str
+    license: str
+    creators: List[Creator]
+    version: Optional[str]
+    doi: Optional[str]
+    keywords: List[str]

pymetadata/ontologies/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Ontologies."""

pymetadata/ontologies/ols.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""Lookup of ontology information from the ontology lookup service (OLS).
+This uses the EMBL-EBI Ontology Lookup Service
+https://www.ebi.ac.uk/ols4
+"""
+import urllib.parse
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import requests
+import pymetadata
+from pymetadata import log
+from pymetadata.cache import read_json_cache, write_json_cache
+from pymetadata.identifiers.registry import Registry
+registry = Registry()
+logger = log.get_logger(__name__)
+@dataclass
+class OLSOntology:
+    """OLSOntology."""
+    name: str
+    iri_pattern: Optional[str] = field(default=None)
+    def __post_init__(self) -> None:
+        """Fix IRI patterns."""
+        if self.iri_pattern is None:
+            self.iri_pattern = (
+                f"http://purl.obolibrary.org/obo/{self.name.upper()}" + "_{$Id}"
+            )
+ONTOLOGIES = [
+    # ontologies which are used in most projects
+    OLSOntology(name="sbo", iri_pattern="http://biomodels.net/SBO/SBO_{$Id}"),
+    OLSOntology(
+        name="ncbitaxon", iri_pattern="http://purl.obolibrary.org/obo/NCBITaxon_{$Id}"
+    ),
+    OLSOntology(name="bto"),
+    OLSOntology(name="chebi"),
+    OLSOntology(name="cmo"),
+    OLSOntology(name="chmo"),
+    OLSOntology(name="doid"),
+    OLSOntology(name="efo", iri_pattern="http://www.ebi.ac.uk/efo/EFO_{$Id}"),
+    OLSOntology(name="fix"),
+    OLSOntology(name="fma"),
+    OLSOntology(name="foodon"),
+    OLSOntology(name="go"),
+    OLSOntology(name="hp"),
+    OLSOntology(name="nbo"),
+    OLSOntology(name="obi"),
+    OLSOntology(name="mondo"),
+    OLSOntology(name="ncit"),
+    OLSOntology(name="mp"),
+    OLSOntology(name="oba"),
+    OLSOntology(name="opmi"),
+    OLSOntology(name="omit"),
+    OLSOntology(
+        name="sio", iri_pattern="http://semanticscience.org/resource/SIO_{$Id}"
+    ),
+    OLSOntology(name="vto"),
+]
+class OLSQuery:
+    """Handling OLS queries."""
+    url_term_query = "https://www.ebi.ac.uk/ols4/api/ontologies/{}/terms/{}"
+    def __init__(
+        self,
+        ontologies: List[OLSOntology],
+        cache_path: Optional[Path] = None,
+        cache: Optional[bool] = None,
+    ):
+        """Initialize OLSQuery."""
+        self.ontologies: Dict[str, OLSOntology] = {
+            ontology.name: ontology for ontology in ontologies
+        }
+        if not cache_path:
+            cache_path = pymetadata.CACHE_PATH
+        if not cache:
+            cache = pymetadata.CACHE_USE
+        self.cache_path = cache_path / "ols"
+        self.cache = cache
+        if cache and not self.cache_path.exists():
+            self.cache_path.mkdir(parents=True)
+    def get_iri(self, ontology: str, term: str) -> str:
+        """Get IRI information."""
+        ols_ontology: Optional[OLSOntology] = self.ontologies.get(ontology, None)
+        # remove prefix if existing
+        if term.startswith(ontology.upper()):
+            term = term.replace(f"{ontology.upper()}:", "")
+        if ols_ontology is None:
+            logger.warning(
+                f"Ontology '{ontology}' is not registered, using default iri."
+            )
+            iri = f"http://purl.obolibrary.org/obo/{ontology.upper()}_{term}"
+        else:
+            if not ols_ontology.iri_pattern:
+                raise ValueError(f"No iri pattern for `{ols_ontology}")
+            iri = ols_ontology.iri_pattern.replace("{$Id}", term)
+        return iri
+    def query_ols(self, ontology: Optional[str], term: Optional[str]) -> Dict:
+        """Query the ontology lookup service."""
+        if not ontology:
+            return {"errors": [], "warnings": ["No collection."]}
+        if not term:
+            return {"errors": [], "warnings": [f"No term: '{ontology}'"]}
+        namespace = registry.ns_dict.get(ontology)
+        ols_pattern = None
+        if namespace and namespace.resources:
+            for ns_resource in namespace.resources:
+                if ns_resource.providerCode == "ols":
+                    ols_pattern = ns_resource.urlPattern
+                    break
+        if not ols_pattern:
+            return {
+                "errors": [],
+                "warnings": [f"'{ontology}' is not on OLS."],
+            }
+        if ontology == "taxonomy":
+            ontology = "ncbitaxon"
+        iri = self.get_iri(ontology=ontology, term=term)
+        # double urlencode iri for OLS
+        urliri = urllib.parse.quote(iri, safe="")
+        urliri = urllib.parse.quote(urliri, safe="")
+        cache_path = self.cache_path / f"{urliri}.json"
+        data: Dict[str, Any] = {}
+        if self.cache:
+            try:
+                data = read_json_cache(cache_path=cache_path)
+            except IOError:
+                # cache does not exist
+                pass
+        if not data:
+            url = self.url_term_query.format(ontology, urliri)
+            logger.info(f"Query: {url}")
+            response = requests.get(url)
+            if response.status_code != 200:
+                data = {
+                    "errors": [f"{response.status_code} response for: '{url}'"],
+                    "warnings": [],
+                }
+            else:
+                # print(response.text)
+                data = response.json()
+                if not data or "error" in data:
+                    error_msg = (
+                        f"Error in OLS query <{ontology}|{term}> at {url}: {data}"
+                    )
+                    logger.error(error_msg)
+                    data = {
+                        "errors": [error_msg],
+                        "warnings": [],
+                    }
+                    return data
+                else:
+                    data["errors"] = []
+                    data["warnings"] = []
+                    if self.cache:
+                        write_json_cache(data=data, cache_path=cache_path)  # type: ignore
+        return data
+    def process_response(self, term: Dict) -> Dict[str, Any]:
+        """Process the response dictionary."""
+        data = {
+            "errors": term["errors"],
+            "warnings": term["warnings"],
+        }
+        label = term.get("label", None)
+        description = term.get("description", None)
+        # fallback description
+        if description is None:
+            annotation = term.get("annotation")
+            if annotation:
+                definition = annotation.get("definition")
+                if definition:
+                    description = definition[0]
+        if description and isinstance(description, list):
+            description = description[0]
+        synonyms = term.get("obo_synonym", [])
+        xrefs = term.get("obo_xref", [])
+        return {
+            **data,
+            "label": label,
+            "description": description,
+            "synonyms": synonyms,
+            "xrefs": xrefs,
+        }

pymetadata/ontologies/ontology.py ADDED Viewed

@@ -0,0 +1,312 @@
+"""Ontology support.
+This file allows to download the ontologies for local use.
+Special ontologies are provided as enums.
+Uses the OWL links provided on OLS4 to download the ontologies.
+"""
+import gzip
+import importlib
+import re
+import shutil
+import tempfile
+import warnings
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+import pronto
+import requests
+from jinja2 import Template
+from pronto.ontology import Ontology as ProntoOntology
+from pronto.relationship import Relationship as ProntoRelationship
+from pronto.term import Term as ProntoTerm
+from pymetadata import ENUM_DIR, RESOURCES_DIR, log
+logger = log.get_logger(__name__)
+class OntologyFormat(str, Enum):
+    """Formats for ontologies."""
+    OBO = "obo"
+    OWL = "owl"
+@dataclass
+class OntologyFile:
+    """Definition file for ontology."""
+    id: str
+    name: str
+    format: OntologyFormat
+    source: str
+    bioportal: bool
+    ols: bool
+    @property
+    def path(self) -> Path:
+        """Path of ontology file."""
+        return RESOURCES_DIR / "ontologies" / f"{self.id.lower()}.{self.format}.gz"
+    @property
+    def filename(self) -> str:
+        """Filename of ontology file.
+        :return: ontolgoy filename
+        :rtype: str
+        """
+        data = str(self.path)
+        print(data)
+        return data
+_ontology_files: List[OntologyFile] = [
+    OntologyFile(
+        "BTO",
+        name="The BRENDA Tissue Ontology (BTO)",
+        format=OntologyFormat.OWL,
+        source="http://purl.obolibrary.org/obo/bto.owl",
+        bioportal=False,
+        ols=True,
+    ),
+    OntologyFile(
+        "CHEBI",
+        name="Chemical Entities of Biological Interest Ontology",
+        format=OntologyFormat.OWL,
+        source="http://purl.obolibrary.org/obo/chebi.owl",
+        bioportal=True,
+        ols=True,
+    ),
+    OntologyFile(
+        "FMA",
+        name="Foundational Model of Anatomy",
+        format=OntologyFormat.OWL,
+        source="http://purl.obolibrary.org/obo/fma.owl",
+        bioportal=True,
+        ols=True,
+    ),
+    OntologyFile(
+        "ECO",
+        name="Evidence & Conclusion Ontology (ECO)",
+        format=OntologyFormat.OWL,
+        source="http://purl.obolibrary.org/obo/eco.owl",
+        bioportal=True,
+        ols=True,
+    ),
+    OntologyFile(
+        "GO",
+        name="Gene Ontology",
+        format=OntologyFormat.OWL,
+        source="http://purl.obolibrary.org/obo/go/extensions/go-plus.owl",
+        bioportal=True,
+        ols=True,
+    ),
+    OntologyFile(
+        "KISAO",
+        name="Kinetic Simulation Algorithm Ontology",
+        format=OntologyFormat.OWL,
+        # source="https://raw.githubusercontent.com/SED-ML/KiSAO/deploy/kisao.owl",
+        source="https://raw.githubusercontent.com/SED-ML/KiSAO/dev/kisao.owl",
+        bioportal=True,
+        ols=True,
+    ),
+    OntologyFile(
+        "SBO",
+        name="Systems Biology Ontology",
+        format=OntologyFormat.OWL,
+        source="https://raw.githubusercontent.com/EBI-BioModels/SBO/master/SBO_OWL.owl",
+        bioportal=True,
+        ols=True,
+    ),
+    OntologyFile(
+        "NCIT",
+        name="National Cancer Institute Thesaurus",
+        format=OntologyFormat.OWL,
+        source="http://purl.obolibrary.org/obo/ncit.owl",
+        bioportal=True,
+        ols=True,
+    ),
+    #     OntologyFile(
+    #     "NCBITAXON",
+    #     name="NCBI organismal classification",
+    #     format=OntologyFormat.OWL,
+    #     source=OLS_BASE_URL + "ncbitaxon",
+    #     bioportal=False,
+    #     ols=True,
+    # ),
+]
+ontology_files: Dict[str, OntologyFile] = {
+    ontology.id: ontology for ontology in _ontology_files
+}
+def update_ontology_file(ofile: OntologyFile) -> None:
+    """Download latest versions of ontologies."""
+    oid = ofile.id
+    logger.info(f"Update ontology: `{oid}`")
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        # download in tmp location
+        owl_path = Path(tmp_dir) / f"{oid.lower()}.owl"
+        url = ofile.source
+        with requests.get(url, stream=True) as r:
+            r.raise_for_status()
+            with open(owl_path, "wb") as f:
+                for chunk in r.iter_content(chunk_size=8192):
+                    f.write(chunk)
+        # only store gzip version
+        with open(owl_path, "rb") as f_in:
+            gzip_path = RESOURCES_DIR / "ontologies" / f"{oid.lower()}.owl.gz"
+            with gzip.open(gzip_path, "wb") as f_out:
+                shutil.copyfileobj(f_in, f_out)
+def update_ontology_files() -> None:
+    """Download latest ontology files."""
+    with ThreadPoolExecutor(max_workers=4) as pool:
+        for ofile in ontology_files.values():
+            pool.submit(update_ontology_file, ofile)
+class Ontology:
+    """Ontology."""
+    _ontology: Optional[ProntoOntology] = None
+    def __init__(self, ontology_id: str):
+        """Construct ontology."""
+        ontology_file = ontology_files[ontology_id]
+        logger.info(f"Read ontology: `{ontology_id}`")
+        # read ontology with pronto
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", pronto.utils.warnings.SyntaxWarning)
+            warnings.simplefilter("ignore", pronto.utils.warnings.NotImplementedWarning)
+            self.__class__._ontology = pronto.Ontology(ontology_file.filename)
+    def get_pronto_ontology(self) -> Optional[ProntoOntology]:
+        """Get a proto object for the ontology.
+        :return: `pronto.Ontology`: pronto object for the ontology
+        :rtype: [type]
+        """
+        return self._ontology
+def create_ontology_enum(ontology_id: str, pattern: str) -> None:
+    """Create enum of the ontology."""
+    logger.info(f"Create enum: `{ontology_id}`")
+    def name_to_variable(name: str) -> Optional[str]:
+        """Clean string to python variable name."""
+        if name is None:
+            return None
+        name = re.sub(r"\W|^(?=\d)", "_", name)
+        return name.upper()
+    # load ontology
+    terms: Dict[str, Dict] = {}
+    ontology: Ontology = Ontology(ontology_id=ontology_id)
+    names = set()
+    pronto_term: Union[ProntoTerm, ProntoRelationship]
+    if not ontology._ontology:
+        raise ValueError(f"No Pronto Ontology for `{ontology_id}`")
+    for term_id in ontology._ontology:
+        pronto_term = ontology._ontology[term_id]
+        pronto_name: Union[str, None, Any] = pronto_term.name
+        if not isinstance(pronto_name, str):
+            logger.warning(f"Pronto name is none: `{pronto_term}`")
+            continue
+        var_name: Optional[str] = name_to_variable(pronto_name)
+        if var_name in names:
+            logger.error(f"Duplicate name in ontology: `{var_name}`")
+            continue
+        else:
+            names.add(var_name)
+            term_id = pronto_term.id
+            # fix the ids
+            if ontology_id == "KISAO":
+                term_id = term_id.replace("http://www.biomodels.net/kisao/KISAO#", "")
+            if ontology_id == "SBO":
+                term_id = term_id.replace("http://biomodels.net/SBO/", "")
+            if ":" in term_id:
+                term_id = term_id.replace(":", "_")
+            terms[term_id] = {
+                "id": term_id,
+                "var_name": var_name,
+                "name": pronto_name.replace('"', "'"),
+                "definition": pronto_term.definition,
+            }
+    terms_sorted = {}
+    for key in sorted(terms.keys()):
+        terms_sorted[key] = terms[key]
+    with open(
+        RESOURCES_DIR / "templates" / "ontology_enum.pytemplate", "r"
+    ) as f_template:
+        template = Template(
+            f_template.read(),
+            trim_blocks=True,
+            lstrip_blocks=True,
+        )
+        context = {
+            "ontology_id": ontology_id,
+            "terms": terms_sorted,
+            "pattern": pattern,
+        }
+        module_str = template.render(**context)
+        # print(module_str)
+        path_module = ENUM_DIR / f"{ontology_id.lower()}.py"
+        print(path_module)
+        with open(path_module, "w") as f_py:
+            f_py.write(module_str)
+def try_ontology_import(ontology_id: str) -> None:
+    """Try import of created module."""
+    # try to import
+    importlib.import_module(f"pymetadata.metadata.{ontology_id.lower()}")
+if __name__ == "__main__":
+    # download latest versions
+    update_ontology_files()
+    # test loading of OWL files
+    # ofile: OntologyFile
+    # for oid, ofile in ontology_files.items():
+    #     console.rule(style="white")
+    #     ontology = Ontology(ontology_id=oid)
+    #     console.print(ontology)
+    # ontology = Ontology(ontology_id="CHEBI")
+    # convert to python module
+    create_ontology_enum("SBO", r"^SBO_\d{7}$")
+    create_ontology_enum("KISAO", r"^KISAO_\d{7}$")
+    create_ontology_enum("ECO", r"^ECO_\d{7}$")
+    try_ontology_import("SBO")
+    try_ontology_import("KISAO")
+    try_ontology_import("ECO")
+    # for ontology_id in ontology_files:
+    #     create_ontology_enum(ontology_id)

pymetadata/py.typed ADDED Viewed

File without changes