PyPI - pymetadata - Versions diffs - 0.5.0__py3-none-any.whl - Mend

pymetadata 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pymetadata might be problematic. Click here for more details.

Files changed (42) hide show

pymetadata/__init__.py +14 -0
pymetadata/cache.py +52 -0
pymetadata/chebi.py +92 -0
pymetadata/console.py +18 -0
pymetadata/core/__init__.py +1 -0
pymetadata/core/annotation.py +396 -0
pymetadata/core/creator.py +46 -0
pymetadata/core/synonym.py +12 -0
pymetadata/core/xref.py +66 -0
pymetadata/examples/__init__.py +1 -0
pymetadata/examples/cache_path_example.py +15 -0
pymetadata/examples/omex_example.py +46 -0
pymetadata/examples/results/test_from_files.omex +0 -0
pymetadata/examples/results/test_from_omex.omex +0 -0
pymetadata/examples/results/testomex/README.md +3 -0
pymetadata/examples/results/testomex/manifest.xml +9 -0
pymetadata/examples/results/testomex/models/omex_comp.xml +174 -0
pymetadata/examples/results/testomex/models/omex_comp_flat.xml +215 -0
pymetadata/examples/results/testomex/models/omex_minimal.xml +99 -0
pymetadata/examples/test.omex +0 -0
pymetadata/identifiers/__init__.py +1 -0
pymetadata/identifiers/miriam.py +43 -0
pymetadata/identifiers/registry.py +397 -0
pymetadata/log.py +29 -0
pymetadata/metadata/__init__.py +6 -0
pymetadata/metadata/eco.py +15918 -0
pymetadata/metadata/kisao.py +2731 -0
pymetadata/metadata/sbo.py +3754 -0
pymetadata/omex.py +771 -0
pymetadata/omex_v2.py +30 -0
pymetadata/ontologies/__init__.py +1 -0
pymetadata/ontologies/ols.py +214 -0
pymetadata/ontologies/ontology.py +312 -0
pymetadata/py.typed +0 -0
pymetadata/resources/chebi_webservice_wsdl.xml +509 -0
pymetadata/resources/ontologies/README.md +4 -0
pymetadata/resources/templates/ontology_enum.pytemplate +61 -0
pymetadata/unichem.py +190 -0
pymetadata-0.5.0.dist-info/METADATA +154 -0
pymetadata-0.5.0.dist-info/RECORD +42 -0
pymetadata-0.5.0.dist-info/WHEEL +4 -0
pymetadata-0.5.0.dist-info/licenses/LICENSE +7 -0

pymetadata/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""pymetadata - Python utilities for metadata."""
+from pathlib import Path
+__author__ = "Matthias Koenig"
+__version__ = "0.5.0"
+program_name: str = "pymetadata"
+RESOURCES_DIR: Path = Path(__file__).parent / "resources"
+ENUM_DIR: Path = Path(__file__).parent / "metadata"
+CACHE_USE: bool = False
+CACHE_PATH: Path = Path.home() / ".cache" / "pymetadata"

pymetadata/cache.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""Caching of information."""
+import json
+from json.encoder import JSONEncoder
+from pathlib import Path
+from typing import Any, Dict, Optional, Type
+from pymetadata import log
+logger = log.get_logger(__name__)
+class DataclassJSONEncoder(JSONEncoder):
+    """JSON serialization of dataclasses."""
+    def default(self, o: Any) -> Any:
+        """Serialize to JSON."""
+        return o.__dict__
+def read_json_cache(cache_path: Path) -> Dict:
+    """Read JSON cache file.
+    :param cache_path:
+    :return: Dictionary with content or None if cache file does not exist.
+    """
+    if cache_path.exists():
+        with open(cache_path) as fp:
+            logger.debug(f"Read cache: {cache_path}")
+            return json.load(fp)  # type: ignore
+    raise IOError(f"Cache path does not exist: '{cache_path}'")
+def write_json_cache(
+    data: Dict, cache_path: Path, json_encoder: Optional[Type[JSONEncoder]] = None
+) -> None:
+    """Write JSON cache file.
+    :param data: data to serialize
+    :param cache_path: path for the cache file
+    :param json_encoder: optional JSON encoder
+    :return:
+    """
+    cache_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(cache_path, "w") as fp:
+        logger.info(f"Write cache: {cache_path}")
+        if json_encoder:
+            json.dump(data, fp=fp, indent=2, cls=json_encoder)
+        else:
+            json.dump(data, fp=fp, indent=2)

pymetadata/chebi.py ADDED Viewed

@@ -0,0 +1,92 @@
+"""Module for working with chebi."""
+from pathlib import Path
+from pprint import pprint
+from typing import Any, Dict, Optional
+from zeep import Client
+import pymetadata
+from pymetadata import log
+from pymetadata.cache import DataclassJSONEncoder, read_json_cache, write_json_cache
+logger = log.get_logger(__name__)
+# FIXME: copy the file to the cache dir
+client = Client(str(pymetadata.RESOURCES_DIR / "chebi_webservice_wsdl.xml"))
+class ChebiQuery:
+    """Class to query information from ChEBI.
+    An overview over available methods:
+        python -mzeep https://www.ebi.ac.uk/webservices/chebi/2.0/webservice?wsdl
+    """
+    @staticmethod
+    def query(
+        chebi: str, cache: Optional[bool] = None, cache_path: Optional[Path] = None
+    ) -> Dict:
+        """Query additional ChEBI information."""
+        if not chebi:
+            return dict()
+        if cache is None:
+            cache = pymetadata.CACHE_USE
+        if cache_path is None:
+            cache_path = pymetadata.CACHE_PATH
+        # caching
+        chebi_base_path = Path(cache_path) / "chebi"
+        if not chebi_base_path.exists():
+            chebi_base_path.mkdir(parents=True)
+        chebi_path = chebi_base_path / f"{chebi.replace(':', '%3A')}.json"
+        data: Dict[str, Any] = {}
+        if cache:
+            try:
+                data = read_json_cache(cache_path=chebi_path)
+            except IOError:
+                pass
+        # fetch and cache data
+        if not data:
+            try:
+                result = client.service.getCompleteEntity(chebi)
+                # print(result)
+            except Exception:
+                logger.error(f"CHEBI information could not be retrieved for: {chebi}")
+                return dict()
+            # parse formula
+            formula = None
+            formulae = result["Formulae"]
+            if formulae:
+                formula = formulae[0]["data"]
+            data = {
+                "chebi": chebi,
+                "name": result["chebiAsciiName"],
+                "definition": result["definition"],
+                "formula": formula,
+                "charge": result["charge"],
+                "mass": result["mass"],
+                "inchikey": result["inchiKey"],
+            }
+            logger.info(f"Write chebi: {chebi_path}")
+            write_json_cache(
+                data=data, cache_path=chebi_path, json_encoder=DataclassJSONEncoder
+            )
+        return data
+if __name__ == "__main__":
+    chebis = ["CHEBI:2668", "CHEBI:138366", "CHEBI:9637", "CHEBI:155897"]
+    for chebi in chebis:
+        print(chebi)
+        d = ChebiQuery.query(chebi=chebi, cache=False)
+        pprint(d)
+        d = ChebiQuery.query(chebi=chebi, cache=True)

pymetadata/console.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""Rich console for logging."""
+from rich import pretty
+from rich.console import Console
+from rich.theme import Theme
+pretty.install()
+custom_theme = Theme(
+    {
+        "success": "green",
+        "info": "blue",
+        "warning": "orange3",
+        "error": "red",
+    }
+)
+console = Console(record=True, theme=custom_theme)

pymetadata/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Core data structures."""

pymetadata/core/annotation.py ADDED Viewed

@@ -0,0 +1,396 @@
+"""Annotation.
+Core data structure to store annotations.
+"""
+import re
+import urllib
+from pprint import pprint
+from typing import Any, Dict, Final, List, Optional, Tuple, Union
+import requests
+from pymetadata import log
+from pymetadata.core.xref import CrossReference, is_url
+from pymetadata.identifiers.miriam import BQB, BQM
+from pymetadata.identifiers.registry import REGISTRY
+from pymetadata.ontologies.ols import ONTOLOGIES, OLSQuery
+OLS_QUERY = OLSQuery(ontologies=ONTOLOGIES)
+IDENTIFIERS_ORG_PREFIX: Final = "http://identifiers.org"
+IDENTIFIERS_ORG_PATTERN1: Final = re.compile(r"^https?://identifiers.org/(.+?)/(.+)")
+IDENTIFIERS_ORG_PATTERN2: Final = re.compile(r"^https?://identifiers.org/(.+)")
+MIRIAM_URN_PATTERN: Final = re.compile(r"^urn:miriam:(.+)")
+logger = log.get_logger(__name__)
+class RDFAnnotation:
+    """RDFAnnotation class.
+    Basic storage of annotation information. This consists of the relation
+    and the the resource.
+    The annotations can be attached to other objects thereby forming
+    triples which can be converted to RDF.
+    Resource can be either:
+        - `http(s)://identifiers.org/collection/term`, i.e., a identifiers.org URI
+        - `collection/term`, i.e., the combination of collection and term
+        - `http(s)://arbitrary.url`, an arbitrary URL
+        - urn:miriam:uniprot:P03023
+    """
+    replaced_collections: Dict[str, str] = {
+        "obo.go": "go",
+        "biomodels.sbo": "sbo",
+    }
+    def __init__(self, qualifier: Union[BQB, BQM], resource: str):
+        """Initialize RDFAnnotation."""
+        self.qualifier: Union[BQB, BQM] = qualifier
+        self.collection: Optional[str] = None
+        self.term: Optional[str] = None
+        self.resource: str = resource
+        if not qualifier:
+            raise ValueError(
+                f"MIRIAM qualifiers are required for rdf annotation, but no "
+                f"qualifier for resource '{resource}' was provided."
+            )
+        if not resource:
+            raise ValueError(
+                f"resource is required for annotation, but resource is emtpy "
+                f"'{qualifier} {resource}'."
+            )
+        if not isinstance(resource, str):
+            raise ValueError(
+                f"resource must be string, but found '{resource} {type(resource)}'."
+            )
+        # handle urls
+        if resource.startswith("http"):
+            match1 = IDENTIFIERS_ORG_PATTERN1.match(resource)
+            if match1:
+                # handle identifiers.org pattern
+                self.collection, self.term = match1.group(1), match1.group(2)
+            if not self.collection:
+                # tests new short pattern
+                match2 = IDENTIFIERS_ORG_PATTERN2.match(resource)
+                if match2:
+                    tokens = match2.group(1).split(":")
+                    if len(tokens) == 2:
+                        self.collection = tokens[0].lower()
+                        self.term = match2.group(1)
+                    else:
+                        logger.warning(
+                            f"Identifiers.org URL does not conform to new"
+                            f"short pattern: {resource}"
+                        )
+            if not self.collection:
+                # other urls are directly stored as resources without collection
+                self.collection = None
+                self.term = resource
+                logger.debug(
+                    f"{resource} does not conform to "
+                    f"http(s)://identifiers.org/collection/id or http(s)://identifiers.org/id",
+                )
+        elif resource.startswith("urn:miriam:"):
+            match3 = MIRIAM_URN_PATTERN.match(resource)
+            if match3:
+                tokens = match3.group(1).split(":")
+                self.collection = tokens[0]
+                self.term = ":".join(tokens[1:]).replace("%3A", ":")
+                logger.warning(
+                    f"Deprecated urn pattern `{resource}` updated: "
+                    f"{self.resource_normalized}"
+                )
+        else:
+            # handle short notation
+            tokens = resource.split("/")
+            if len(tokens) == 2:
+                self.collection = tokens[0]
+                self.term = "/".join(tokens[1:])
+            elif len(tokens) == 1 and ":" in tokens[0]:
+                self.collection = tokens[0].split(":")[0].lower()
+                self.term = tokens[0]
+            if len(tokens) < 2 and not self.collection:
+                logger.error(
+                    f"Resource `{resource}` could not be split in collection and term. "
+                    f"A given resource must be of the form "
+                    f"`collection/term` or an url starting with "
+                    f"`http(s)://`)"
+                )
+                self.collection = None
+                self.term = resource
+        # clean legacy collections
+        if self.collection in self.replaced_collections:
+            self.collection = self.replaced_collections[self.collection]
+        self.validate()
+    @staticmethod
+    def from_tuple(t: Tuple[Union[BQB, BQM], str]) -> "RDFAnnotation":
+        """Construct from tuple."""
+        qualifier, resource = t[0], t[1]
+        return RDFAnnotation(qualifier=qualifier, resource=resource)
+    @property
+    def resource_normalized(self) -> Optional[str]:
+        """Normalize resource for given annotation.
+        This is the correct usage.
+        """
+        if not self.term:
+            return None
+        if self.collection is not None:
+            if self.term.startswith(f"{self.collection.upper()}:"):
+                return f"{IDENTIFIERS_ORG_PREFIX}/{self.term}"
+            else:
+                return f"{IDENTIFIERS_ORG_PREFIX}/{self.collection}/{self.term}"
+        else:
+            return self.term
+    def __repr__(self) -> str:
+        """Get representation string."""
+        return f"RDFAnnotation({self.qualifier}|{self.collection}|{self.term})"
+    def to_dict(self) -> Dict:
+        """Convert to dict."""
+        return {
+            "qualifier": self.qualifier.value,  # FIXME use enums!
+            "collection": self.collection,
+            "term": self.term,
+        }
+    @staticmethod
+    def check_term(collection: str, term: str) -> bool:
+        """Check that term follows id pattern for collection.
+        Uses the Identifiers collection information.
+        """
+        namespace = REGISTRY.ns_dict.get(collection, None)
+        if not namespace:
+            logger.error(
+                f"MIRIAM collection `{collection}` does not exist for term `{term}`"
+            )
+            return False
+        p = re.compile(namespace.pattern)
+        m = p.match(term)
+        if not m:
+            logger.error(
+                f"Term `{term}` did not match pattern "
+                f"`{namespace.pattern}` for collection `{collection}`."
+            )
+            return False
+        return True
+    @staticmethod
+    def check_qualifier(qualifier: Union[BQB, BQM]) -> None:
+        """Check that the qualifier is an allowed qualifier.
+        :param qualifier:
+        :return:
+        """
+        if not isinstance(qualifier, (BQB, BQM)):
+            supported_qualifiers = [e.value for e in BQB] + [e.value for e in BQM]
+            raise ValueError(
+                f"qualifier `{qualifier}` is not in supported qualifiers: "
+                f"`{supported_qualifiers}`"
+            )
+    def validate(self) -> None:
+        """Validate annotation."""
+        if self.qualifier:
+            self.check_qualifier(self.qualifier)
+        if self.collection and self.term:
+            self.check_term(collection=self.collection, term=self.term)
+class RDFAnnotationData(RDFAnnotation):
+    """Annotation with resolved information.
+    queries for the resource should happen here;
+    this resolves additional information.
+    """
+    def __init__(self, annotation: RDFAnnotation):
+        """Initialize RDFAnnotationData."""
+        self.resource = annotation.resource
+        self.qualifier = annotation.qualifier
+        self.collection = annotation.collection
+        self.term: Optional[str] = annotation.term
+        self.url: Optional[str] = None
+        self.description: Optional[str] = None
+        self.label: Optional[str] = None
+        self.synonyms: List = []
+        self.xrefs: List = []
+        self.warnings: List = []
+        self.errors: List = []
+        if self.collection:
+            # register MIRIAM xrefs
+            namespace = REGISTRY.ns_dict.get(self.collection, None)
+            if not namespace:
+                raise ValueError(
+                    f"Namespace does not exist in dict for: `{self.collection}`"
+                )
+            namespace_embedded = namespace.namespaceEmbeddedInLui
+            if not namespace.resources:
+                namespace.resources = []
+            for ns_resource in namespace.resources:
+                # create url
+                url = ns_resource.urlPattern
+                if not self.term:
+                    continue
+                term = self.term
+                # remove prefix
+                if namespace_embedded and namespace.prefix:
+                    term = term[len(namespace.prefix) + 1 :]
+                # urlencode term
+                term = urllib.parse.quote(term)
+                # create url
+                url = url.replace("{$Id}", term)
+                url = url.replace("{$id}", term)
+                if namespace.prefix:
+                    url = url.replace(
+                        f"{namespace.prefix.upper}:",
+                        urllib.parse.quote(f"{namespace.prefix.upper}:"),
+                    )
+                if not self.url:
+                    # set url to first resource url
+                    self.url = url
+                # print(url)
+                _xref = CrossReference(
+                    name=ns_resource.name, accession=self.term, url=url
+                )
+                valid = _xref.validate() and is_url(self.url)  # type: ignore
+                if valid:
+                    self.xrefs.append(_xref)
+        # query OLS information
+        self.query_ols()
+    def __repr__(self) -> str:
+        """Get representation string."""
+        return f"RDFAnnotationData({self.collection}|{self.term}|{self.label}|{self.description}|{self.synonyms}|{self.xrefs})"
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dict."""
+        return {
+            "resource": self.resource,
+            "resource_normalized": self.resource_normalized,
+            # "qualifier": self.qualifier.value,
+            "collection": self.collection,
+            "term": self.term,
+            "label": self.label,
+            "description": self.description,
+            "url": self.url,
+            "synonyms": self.synonyms,
+            "xrefs": self.xrefs,
+            "errors": self.errors,
+            "warnings": self.warnings,
+        }
+    def query_ols(self) -> Dict:
+        """Query ontology lookup service."""
+        try:
+            d = OLS_QUERY.query_ols(ontology=self.collection, term=self.term)
+        except requests.HTTPError as err:
+            logger.error(err)
+            d = {
+                "errors": [err],
+                "warnings": [],
+            }
+        info = OLS_QUERY.process_response(d)
+        if self.label is None:
+            self.label = info["label"]
+        if self.description is None:
+            self.description = info["description"]
+        self.synonyms = info["synonyms"]
+        self.xrefs = info["xrefs"]
+        self.warnings.extend(info["warnings"])
+        self.errors.extend(info["errors"])
+        return info
+if __name__ == "__main__":
+    for annotation in [
+        # FIXME: support this
+        RDFAnnotation(
+            qualifier=BQB.IS_VERSION_OF,
+            resource="NCIT:C75913",
+        ),
+        RDFAnnotation(
+            qualifier=BQB.IS_VERSION_OF,
+            resource="taxonomy/562",
+        ),
+        RDFAnnotation(
+            qualifier=BQB.IS_VERSION_OF,
+            resource="http://identifiers.org/taxonomy/9606",
+        ),
+        RDFAnnotation(
+            qualifier=BQB.IS_VERSION_OF,
+            resource="http://identifiers.org/biomodels.sbo/SBO:0000247",
+        ),
+        RDFAnnotation(
+            qualifier=BQB.IS_VERSION_OF, resource="urn:miriam:obo.go:GO%3A0005623"
+        ),
+        RDFAnnotation(
+            qualifier=BQB.IS_VERSION_OF, resource="urn:miriam:chebi:CHEBI%3A33699"
+        ),
+        RDFAnnotation(qualifier=BQB.IS_VERSION_OF, resource="chebi/CHEBI:456215"),
+        RDFAnnotation(
+            qualifier=BQB.IS, resource="https://en.wikipedia.org/wiki/Cytosol"
+        ),
+        RDFAnnotation(
+            qualifier=BQB.IS_VERSION_OF, resource="urn:miriam:uniprot:P03023"
+        ),
+        RDFAnnotation(
+            qualifier=BQB.IS_VERSION_OF,
+            resource="http://identifiers.org/go/GO:0005829",
+        ),
+        RDFAnnotation(
+            qualifier=BQB.IS_VERSION_OF, resource="http://identifiers.org/go/GO:0005829"
+        ),
+        RDFAnnotation(
+            qualifier=BQB.IS_VERSION_OF, resource="http://identifiers.org/GO:0005829"
+        ),
+        RDFAnnotation(
+            qualifier=BQB.IS_VERSION_OF, resource="http://identifiers.org/GO:0005829"
+        ),
+        RDFAnnotation(qualifier=BQB.IS_VERSION_OF, resource="bto/BTO:0000089"),
+        RDFAnnotation(qualifier=BQB.IS_VERSION_OF, resource="BTO:0000089"),
+        RDFAnnotation(qualifier=BQB.IS_VERSION_OF, resource="chebi/CHEBI:000012"),
+    ]:
+        print("-" * 80)
+        data = RDFAnnotationData(annotation)
+        print(data)
+        pprint(data.to_dict())

pymetadata/core/creator.py ADDED Viewed

@@ -0,0 +1,46 @@
+"""Creator information."""
+from typing import Optional
+class Creator:
+    """Creator in ModelHistory and other COMBINE formats."""
+    def __init__(
+        self,
+        familyName: str,
+        givenName: str,
+        email: str,
+        organization: str,
+        site: Optional[str] = None,
+        orcid: Optional[str] = None,
+    ):
+        """Initialize Creator."""
+        self.familyName = familyName
+        self.givenName = givenName
+        self.email = email
+        self.organization = organization
+        self.site = site
+        self.orcid = orcid
+    def __str__(self) -> str:
+        """Get string representation."""
+        return f"{self.familyName} {self.givenName} ({self.email}, {self.organization}, {self.site}, {self.orcid})"
+    def __hash__(self) -> int:
+        """Get hash."""
+        return hash(str(self))
+    def __eq__(self, other: object) -> bool:
+        """Check for equality."""
+        if not isinstance(other, Creator):
+            return NotImplemented
+        return (
+            self.familyName == other.familyName
+            and self.givenName == other.givenName
+            and self.email == other.email
+            and self.organization == other.organization
+            and self.site == other.site
+            and self.orcid == other.orcid
+        )

pymetadata/core/synonym.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""Synonym information."""
+from dataclasses import dataclass
+@dataclass
+class Synonym:
+    """Synonyms."""
+    name: str
+    type: str
+    source: str