PyPI - climate-ref-core - Versions diffs - 0.5.0__py3-none-any.whl - Mend

climate-ref-core 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

climate_ref_core/__init__.py +7 -0
climate_ref_core/constraints.py +363 -0
climate_ref_core/dataset_registry.py +158 -0
climate_ref_core/datasets.py +157 -0
climate_ref_core/diagnostics.py +549 -0
climate_ref_core/env.py +35 -0
climate_ref_core/exceptions.py +48 -0
climate_ref_core/executor.py +96 -0
climate_ref_core/logging.py +146 -0
climate_ref_core/providers.py +418 -0
climate_ref_core/py.typed +0 -0
climate_ref_core/pycmec/README.md +1 -0
climate_ref_core/pycmec/__init__.py +3 -0
climate_ref_core/pycmec/controlled_vocabulary.py +175 -0
climate_ref_core/pycmec/cv_cmip7_aft.yaml +44 -0
climate_ref_core/pycmec/metric.py +437 -0
climate_ref_core/pycmec/output.py +207 -0
climate_ref_core-0.5.0.dist-info/METADATA +63 -0
climate_ref_core-0.5.0.dist-info/RECORD +22 -0
climate_ref_core-0.5.0.dist-info/WHEEL +4 -0
climate_ref_core-0.5.0.dist-info/licenses/LICENCE +201 -0
climate_ref_core-0.5.0.dist-info/licenses/NOTICE +3 -0

climate_ref_core/executor.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""
+Executor interface for running diagnostics
+"""
+from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
+from climate_ref_core.diagnostics import Diagnostic, ExecutionDefinition
+from climate_ref_core.providers import DiagnosticProvider
+if TYPE_CHECKING:
+    from climate_ref.models import Execution
+EXECUTION_LOG_FILENAME = "out.log"
+"""
+Filename for the execution log.
+This file is written via [climate_ref_core.logging.redirect_logs][].
+"""
+@runtime_checkable
+class Executor(Protocol):
+    """
+    An executor is responsible for running a diagnostic asynchronously
+    The diagnostic may be run locally in the same process or in a separate process or container.
+    Notes
+    -----
+    This is an extremely basic interface and will be expanded in the future, as we figure out
+    our requirements.
+    """
+    name: str
+    def __init__(self, **kwargs: Any) -> None: ...
+    def run(
+        self,
+        provider: DiagnosticProvider,
+        diagnostic: Diagnostic,
+        definition: ExecutionDefinition,
+        execution: "Execution | None" = None,
+    ) -> None:
+        """
+        Execute a diagnostic with a given definition
+        No executions are returned from this method,
+        as the execution may be performed asynchronously so executions may not be immediately available.
+        /// admonition | Note
+        In future, we may return a `Future` object that can be used to retrieve the result,
+        but that requires some additional work to implement.
+        ///
+        Parameters
+        ----------
+        provider
+            Provider of the diagnostic
+        diagnostic
+            Diagnostic to run
+        definition
+            Definition of the information needed to execute a diagnostic
+            This definition describes which datasets are required to run the diagnostic and where
+            the output should be stored.
+        execution
+            The execution object to update with the results of the execution.
+            This is a database object that contains the executions of the execution.
+            If provided, it will be updated with the executions of the execution.
+            This may happen asynchronously, so the executions may not be immediately available.
+        Returns
+        -------
+        :
+            Results from running the diagnostic
+        """
+        ...
+    def join(self, timeout: float) -> None:
+        """
+        Wait for all executions to finish
+        If the timeout is reached, the method will return and raise an exception.
+        Parameters
+        ----------
+        timeout
+            Maximum time to wait for all executions to finish in seconds
+        Raises
+        ------
+        TimeoutError
+            If the timeout is reached
+        """

climate_ref_core/logging.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""
+Logging utilities
+The REF uses [loguru](https://loguru.readthedocs.io/en/stable/), a simple logging framework
+"""
+import contextlib
+import inspect
+import logging
+import sys
+from collections.abc import Generator
+from typing import Any
+import pooch
+from loguru import logger
+from rich.pretty import pretty_repr
+from climate_ref_core.diagnostics import ExecutionDefinition
+from climate_ref_core.executor import EXECUTION_LOG_FILENAME
+class _InterceptHandler(logging.Handler):
+    def emit(self, record: logging.LogRecord) -> None:
+        # Get corresponding Loguru level if it exists.
+        level: str | int
+        try:
+            level = logger.level(record.levelname).name
+        except ValueError:  # pragma: no cover
+            level = record.levelno
+        # Find caller from where originated the logged message.
+        frame, depth = inspect.currentframe(), 0
+        while frame and (depth == 0 or frame.f_code.co_filename == logging.__file__):
+            frame = frame.f_back
+            depth += 1
+        logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())
+def capture_logging() -> None:
+    """
+    Capture logging from the standard library and redirect it to Loguru
+    Note that this replaces the root logger, so any other handlers attached to it will be removed.
+    """
+    # Pooch adds a handler to its own logger which circumvents the REF logger
+    pooch.get_logger().handlers.clear()
+    pooch.get_logger().addHandler(_InterceptHandler())
+    logging.basicConfig(handlers=[_InterceptHandler()], level=0, force=True)
+    # Disable some overly verbose logs
+    logger.disable("matplotlib.colorbar")
+    logger.disable("matplotlib.ticker")
+    logger.disable("matplotlib.font_manager")
+    logger.disable("pyproj.transformer")
+    logger.disable("pint.facets.plain.registry")
+def add_log_handler(**kwargs: Any) -> None:
+    """
+    Add a log sink to the logger to capture logs.
+    This is useful for testing purposes, to ensure that logs are captured correctly.
+    """
+    if hasattr(logger, "default_handler_id"):
+        raise AssertionError("The default log handler has already been created")
+    kwargs.setdefault("sink", sys.stderr)
+    handled_id = logger.add(**kwargs)
+    # Track the current handler via custom attributes on the logger
+    # This is a bit of a workaround because of loguru's super slim API that doesn't allow for
+    # modificiation of existing handlers.
+    logger.default_handler_id = handled_id  # type: ignore[attr-defined]
+    logger.default_handler_kwargs = kwargs  # type: ignore[attr-defined]
+    capture_logging()
+def remove_log_handler() -> None:
+    """
+    Remove the default log handler from the logger.
+    This is useful for cleaning up after tests or when changing logging configurations.
+    The previously used logger kwargs are kept in `logger.default_handler_kwargs` if the
+    logger should be readded later
+    """
+    if hasattr(logger, "default_handler_id"):
+        logger.remove(logger.default_handler_id)
+        del logger.default_handler_id
+    else:
+        raise AssertionError("No default log handler to remove.")
+@contextlib.contextmanager
+def redirect_logs(definition: ExecutionDefinition, log_level: str) -> Generator[None, None, None]:
+    """
+    Temporarily redirect log output to a file.
+    This also writes some common log messages
+    Parameters
+    ----------
+    definition
+        Diagnostic definition to capture logging for
+    log_level
+        Log level as a string e.g. INFO, WARNING, DEBUG.
+        This log level will dictate what logs will be sent to disk
+        The logger will also be reset to this level after leaving the context manager.
+    """
+    app_logger_configured = hasattr(logger, "default_handler_id")
+    # Remove existing default log handler
+    # This swallows the logs from the app logger
+    # If the app logger hasn't been configured yet, we don't need to remove it,
+    # as logs will also be written to the console as loguru adds a stderr handler by default
+    if app_logger_configured:
+        remove_log_handler()
+    # Add a new log handler for the execution log
+    output_file = definition.output_directory / EXECUTION_LOG_FILENAME
+    file_handler_id = logger.add(output_file, level=log_level, colorize=False)
+    capture_logging()
+    logger.info(f"Running definition {pretty_repr(definition)}")
+    try:
+        yield
+    except:
+        logger.exception("Execution failed")
+        raise
+    finally:
+        logger.info(f"Diagnostic execution complete. Results available in {definition.output_fragment()}")
+        # Reset the logger to the default
+        logger.remove(file_handler_id)
+        # We only re-add the app handler if it was configured before
+        if app_logger_configured:
+            add_log_handler(**logger.default_handler_kwargs)  # type: ignore[attr-defined]
+__all__ = ["add_log_handler", "capture_logging", "logger", "redirect_logs"]

climate_ref_core/providers.py ADDED Viewed

@@ -0,0 +1,418 @@
+"""
+Interface for declaring a diagnostic provider.
+This defines how diagnostic packages interoperate with the REF framework.
+Each diagnostic package may contain multiple diagnostics.
+Each diagnostic package must implement the `DiagnosticProvider` interface.
+"""
+from __future__ import annotations
+import datetime
+import hashlib
+import importlib.resources
+import os
+import stat
+import subprocess
+from abc import abstractmethod
+from collections.abc import Iterable
+from contextlib import AbstractContextManager
+from pathlib import Path
+from typing import TYPE_CHECKING
+import requests
+from loguru import logger
+from climate_ref_core.diagnostics import Diagnostic
+from climate_ref_core.exceptions import InvalidDiagnosticException, InvalidProviderException
+if TYPE_CHECKING:
+    from climate_ref.config import Config
+def _slugify(value: str) -> str:
+    """
+    Slugify a string.
+    Parameters
+    ----------
+    value : str
+        String to slugify.
+    Returns
+    -------
+    str
+        Slugified string.
+    """
+    return value.lower().replace(" ", "-")
+class DiagnosticProvider:
+    """
+    The interface for registering and running diagnostics.
+    Each package that provides diagnostics must implement this interface.
+    """
+    def __init__(self, name: str, version: str, slug: str | None = None) -> None:
+        self.name = name
+        self.slug = slug or _slugify(name)
+        self.version = version
+        self._diagnostics: dict[str, Diagnostic] = {}
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(name={self.name!r}, version={self.version!r})"
+    def configure(self, config: Config) -> None:
+        """
+        Configure the provider.
+        Parameters
+        ----------
+        config :
+            A configuration.
+        """
+    def diagnostics(self) -> list[Diagnostic]:
+        """
+        Iterate over the available diagnostics for the provider.
+        Returns
+        -------
+        :
+            Iterator over the currently registered diagnostics.
+        """
+        return list(self._diagnostics.values())
+    def __len__(self) -> int:
+        return len(self._diagnostics)
+    def register(self, diagnostic: Diagnostic) -> None:
+        """
+        Register a diagnostic with the manager.
+        Parameters
+        ----------
+        diagnostic :
+            The diagnostic to register.
+        """
+        if not isinstance(diagnostic, Diagnostic):
+            raise InvalidDiagnosticException(
+                diagnostic, "Diagnostics must be an instance of the 'Diagnostic' class"
+            )
+        diagnostic.provider = self
+        self._diagnostics[diagnostic.slug.lower()] = diagnostic
+    def get(self, slug: str) -> Diagnostic:
+        """
+        Get a diagnostic by name.
+        Parameters
+        ----------
+        slug :
+            Name of the diagnostic (case-sensitive).
+        Raises
+        ------
+        KeyError
+            If the diagnostic with the given name is not found.
+        Returns
+        -------
+        Diagnostic
+            The requested diagnostic.
+        """
+        return self._diagnostics[slug.lower()]
+def import_provider(fqn: str) -> DiagnosticProvider:
+    """
+    Import a provider by name
+    Parameters
+    ----------
+    fqn
+        Full package and attribute name of the provider to import
+        For example: `climate_ref_example.provider` will use the `provider` attribute from the
+        `climate_ref_example` package.
+        If only a package name is provided, the default attribute name is `provider`.
+    Raises
+    ------
+    InvalidProviderException
+        If the provider cannot be imported
+        If the provider isn't a valid `DiagnosticProvider`.
+    Returns
+    -------
+    :
+        DiagnosticProvider instance
+    """
+    if "." in fqn:
+        module, name = fqn.rsplit(".", 1)
+    else:
+        module = fqn
+        name = "provider"
+    try:
+        imp = importlib.import_module(module)
+        provider = getattr(imp, name)
+        if not isinstance(provider, DiagnosticProvider):
+            raise InvalidProviderException(fqn, f"Expected DiagnosticProvider, got {type(provider)}")
+        return provider
+    except ModuleNotFoundError:
+        logger.error(f"Module '{fqn}' not found")
+        raise InvalidProviderException(fqn, f"Module '{module}' not found")
+    except AttributeError:
+        logger.error(f"Provider '{fqn}' not found")
+        raise InvalidProviderException(fqn, f"Provider '{name}' not found in {module}")
+class CommandLineDiagnosticProvider(DiagnosticProvider):
+    """
+    A provider for diagnostics that can be run from the command line.
+    """
+    @abstractmethod
+    def run(self, cmd: Iterable[str]) -> None:
+        """
+        Return the arguments for the command to run.
+        """
+MICROMAMBA_EXE_URL = (
+    "https://github.com/mamba-org/micromamba-releases/releases/latest/download/micromamba-{platform}-{arch}"
+)
+"""The URL to download the micromamba executable from."""
+MICROMAMBA_MAX_AGE = datetime.timedelta(days=7)
+"""Do not update if the micromamba executable is younger than this age."""
+def _get_micromamba_url() -> str:
+    """
+    Build a platform specific URL from which to download micromamba.
+    Based on the script at: https://micro.mamba.pm/install.sh
+    """
+    sysname = os.uname().sysname
+    machine = os.uname().machine
+    if sysname == "Linux":
+        platform = "linux"
+    elif sysname == "Darwin":
+        platform = "osx"
+    elif "NT" in sysname:
+        platform = "win"
+    else:
+        platform = sysname
+    arch = machine if machine in {"aarch64", "ppc64le", "arm64"} else "64"
+    supported = {
+        "linux-aarch64",
+        "linux-ppc64le",
+        "linux-64",
+        "osx-arm64",
+        "osx-64",
+        "win-64",
+    }
+    if f"{platform}-{arch}" not in supported:
+        msg = "Failed to detect your platform. Please set MICROMAMBA_EXE_URL to a valid location."
+        raise ValueError(msg)
+    return MICROMAMBA_EXE_URL.format(platform=platform, arch=arch)
+class CondaDiagnosticProvider(CommandLineDiagnosticProvider):
+    """
+    A provider for diagnostics that can be run from the command line in a conda environment.
+    """
+    def __init__(
+        self,
+        name: str,
+        version: str,
+        slug: str | None = None,
+        repo: str | None = None,
+        tag_or_commit: str | None = None,
+    ) -> None:
+        super().__init__(name, version, slug)
+        self._conda_exe: Path | None = None
+        self._prefix: Path | None = None
+        self.url = f"git+{repo}@{tag_or_commit}" if repo and tag_or_commit else None
+    @property
+    def prefix(self) -> Path:
+        """Path where conda environments are stored."""
+        if not isinstance(self._prefix, Path):
+            msg = (
+                "No prefix for conda environments configured. Please use the "
+                "configure method to configure the provider or assign a value "
+                "to prefix directly."
+            )
+            raise ValueError(msg)
+        return self._prefix
+    @prefix.setter
+    def prefix(self, path: Path) -> None:
+        self._prefix = path
+    def configure(self, config: Config) -> None:
+        """Configure the provider."""
+        self.prefix = config.paths.software / "conda"
+    def _install_conda(self, update: bool) -> Path:
+        """Install micromamba in a temporary location.
+        Parameters
+        ----------
+        update:
+            Update the micromamba executable if it is older than a week.
+        Returns
+        -------
+            The path to the executable.
+        """
+        conda_exe = self.prefix / "micromamba"
+        if conda_exe.exists() and update:
+            # Only update if the executable is older than `MICROMAMBA_MAX_AGE`.
+            creation_time = datetime.datetime.fromtimestamp(conda_exe.stat().st_ctime)
+            age = datetime.datetime.now() - creation_time
+            if age < MICROMAMBA_MAX_AGE:
+                update = False
+        if not conda_exe.exists() or update:
+            logger.info("Installing conda")
+            self.prefix.mkdir(parents=True, exist_ok=True)
+            response = requests.get(_get_micromamba_url(), timeout=120)
+            response.raise_for_status()
+            with conda_exe.open(mode="wb") as file:
+                file.write(response.content)
+            conda_exe.chmod(stat.S_IRWXU)
+            logger.info("Successfully installed conda.")
+        return conda_exe
+    def get_conda_exe(self, update: bool = False) -> Path:
+        """
+        Get the path to a conda executable.
+        """
+        if self._conda_exe is None:
+            self._conda_exe = self._install_conda(update)
+        return self._conda_exe
+    def get_environment_file(self) -> AbstractContextManager[Path]:
+        """
+        Return a context manager that provides the environment file as a Path.
+        """
+        # Because providers are instances, we have no way of retrieving the
+        # module in which they are created, so get the information from the
+        # first registered diagnostic instead.
+        diagnostics = self.diagnostics()
+        if len(diagnostics) == 0:
+            msg = "Unable to determine the provider module, please register a diagnostic first."
+            raise ValueError(msg)
+        module = diagnostics[0].__module__.split(".")[0]
+        lockfile = importlib.resources.files(module).joinpath("requirements").joinpath("conda-lock.yml")
+        return importlib.resources.as_file(lockfile)
+    @property
+    def env_path(self) -> Path:
+        """
+        A unique path for storing the conda environment.
+        """
+        with self.get_environment_file() as file:
+            suffix = hashlib.sha1(file.read_bytes(), usedforsecurity=False)
+            if self.url is not None:
+                suffix.update(bytes(self.url, encoding="utf-8"))
+        return self.prefix / f"{self.slug}-{suffix.hexdigest()}"
+    def create_env(self) -> None:
+        """
+        Create a conda environment.
+        """
+        logger.debug(f"Attempting to create environment at {self.env_path}")
+        if self.env_path.exists():
+            logger.info(f"Environment at {self.env_path} already exists, skipping.")
+            return
+        conda_exe = f"{self.get_conda_exe(update=True)}"
+        with self.get_environment_file() as file:
+            cmd = [
+                conda_exe,
+                "create",
+                "--yes",
+                "--file",
+                f"{file}",
+                "--prefix",
+                f"{self.env_path}",
+            ]
+            logger.debug(f"Running {' '.join(cmd)}")
+            subprocess.run(cmd, check=True)  # noqa: S603
+            if self.url is not None:
+                logger.info(f"Installing development version of {self.slug} from {self.url}")
+                cmd = [
+                    conda_exe,
+                    "run",
+                    "--prefix",
+                    f"{self.env_path}",
+                    "pip",
+                    "install",
+                    "--no-deps",
+                    self.url,
+                ]
+                logger.debug(f"Running {' '.join(cmd)}")
+                subprocess.run(cmd, check=True)  # noqa: S603
+    def run(self, cmd: Iterable[str]) -> None:
+        """
+        Run a command.
+        Parameters
+        ----------
+        cmd
+            The command to run.
+        Raises
+        ------
+        subprocess.CalledProcessError
+            If the command fails
+        """
+        self.create_env()
+        cmd = [
+            f"{self.get_conda_exe(update=False)}",
+            "run",
+            "--prefix",
+            f"{self.env_path}",
+            *cmd,
+        ]
+        logger.info(f"Running '{' '.join(cmd)}'")
+        try:
+            # This captures the log output until the execution is complete
+            # We could poll using `subprocess.Popen` if we want something more responsive
+            res = subprocess.run(  # noqa: S603
+                cmd,
+                check=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+            )
+            logger.info("Command output: \n" + res.stdout)
+            logger.info("Command execution successful")
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Failed to run {cmd}")
+            logger.error(e.stdout)
+            raise e

climate_ref_core/py.typed ADDED Viewed

File without changes

climate_ref_core/pycmec/README.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ CMEC python implementation (pycmec)

climate_ref_core/pycmec/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+CMEC python package
+"""