PyPI - climate-ref - Versions diffs - 0.5.0__py3-none-any.whl - Mend

climate-ref 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

climate_ref/__init__.py +30 -0
climate_ref/_config_helpers.py +214 -0
climate_ref/alembic.ini +114 -0
climate_ref/cli/__init__.py +138 -0
climate_ref/cli/_utils.py +68 -0
climate_ref/cli/config.py +28 -0
climate_ref/cli/datasets.py +205 -0
climate_ref/cli/executions.py +201 -0
climate_ref/cli/providers.py +84 -0
climate_ref/cli/solve.py +23 -0
climate_ref/config.py +475 -0
climate_ref/constants.py +8 -0
climate_ref/database.py +223 -0
climate_ref/dataset_registry/obs4ref_reference.txt +2 -0
climate_ref/dataset_registry/sample_data.txt +60 -0
climate_ref/datasets/__init__.py +40 -0
climate_ref/datasets/base.py +214 -0
climate_ref/datasets/cmip6.py +202 -0
climate_ref/datasets/obs4mips.py +224 -0
climate_ref/datasets/pmp_climatology.py +15 -0
climate_ref/datasets/utils.py +16 -0
climate_ref/executor/__init__.py +274 -0
climate_ref/executor/local.py +89 -0
climate_ref/migrations/README +22 -0
climate_ref/migrations/env.py +139 -0
climate_ref/migrations/script.py.mako +26 -0
climate_ref/migrations/versions/2025-05-02T1418_341a4aa2551e_regenerate.py +292 -0
climate_ref/models/__init__.py +33 -0
climate_ref/models/base.py +42 -0
climate_ref/models/dataset.py +206 -0
climate_ref/models/diagnostic.py +61 -0
climate_ref/models/execution.py +306 -0
climate_ref/models/metric_value.py +195 -0
climate_ref/models/provider.py +39 -0
climate_ref/provider_registry.py +146 -0
climate_ref/py.typed +0 -0
climate_ref/solver.py +395 -0
climate_ref/testing.py +109 -0
climate_ref-0.5.0.dist-info/METADATA +97 -0
climate_ref-0.5.0.dist-info/RECORD +44 -0
climate_ref-0.5.0.dist-info/WHEEL +4 -0
climate_ref-0.5.0.dist-info/entry_points.txt +2 -0
climate_ref-0.5.0.dist-info/licenses/LICENCE +201 -0
climate_ref-0.5.0.dist-info/licenses/NOTICE +3 -0

climate_ref/config.py ADDED Viewed

@@ -0,0 +1,475 @@
+"""
+Configuration management
+The REF uses a tiered configuration model,
+where configuration is sourced from a hierarchy of different places.
+Each configuration value has a default which is used if not other configuration is available.
+Then configuration is loaded from a `.toml` file which overrides any default values.
+Finally, some configuration can be overridden at runtime using environment variables,
+which always take precedence over any other configuration values.
+"""
+# The basics of the configuration management takes a lot of inspiration from the
+# `esgpull` configuration management system with some of the extra complexity removed.
+# https://github.com/ESGF/esgf-download/blob/main/esgpull/config.py
+import importlib.resources
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+import tomlkit
+from attr import Factory
+from attrs import define, field
+from cattrs import Converter
+from cattrs.gen import make_dict_unstructure_fn, override
+from loguru import logger
+from tomlkit import TOMLDocument
+from climate_ref._config_helpers import (
+    _format_exception,
+    _format_key_exception,
+    _pop_empty,
+    config,
+    env_field,
+    transform_error,
+)
+from climate_ref.constants import config_filename
+from climate_ref.executor import import_executor_cls
+from climate_ref_core.env import env
+from climate_ref_core.exceptions import InvalidExecutorException
+from climate_ref_core.executor import Executor
+if TYPE_CHECKING:
+    from climate_ref.database import Database
+env_prefix = "REF"
+"""
+Prefix for the environment variables used by the REF
+"""
+def ensure_absolute_path(path: str | Path) -> Path:
+    """
+    Ensure that the path is absolute
+    Parameters
+    ----------
+    path
+        Path to check
+    Returns
+    -------
+        Absolute path
+    """
+    if isinstance(path, str):
+        path = Path(path)
+    return path.resolve()
+@config(prefix=env_prefix)
+class PathConfig:
+    """
+    Common paths used by the REF application
+    /// admonition | Warning
+        type: warning
+    These paths must be common across all systems that the REF is being run
+    ///
+    If any of these paths are specified as relative paths,
+    they will be resolved to absolute paths.
+    """
+    log: Path = env_field(name="LOG_ROOT", converter=ensure_absolute_path)
+    """
+    Directory to store log files from the compute engine
+    This is not currently used by the REF, but is included for future use.
+    """
+    scratch: Path = env_field(name="SCRATCH_ROOT", converter=ensure_absolute_path)
+    """
+    Shared scratch space for the REF.
+    This directory is used to write the intermediate executions of a diagnostic execution.
+    After the diagnostic has been run, the executions will be copied to the executions directory.
+    This directory must be accessible by all the diagnostic services that are used to run the diagnostics,
+    but does not need to be mounted in the same location on all the diagnostic services.
+    """
+    software: Path = env_field(name="SOFTWARE_ROOT", converter=ensure_absolute_path)
+    """
+    Shared software space for the REF.
+    This directory is used to store software environments.
+    This directory must be accessible by all the diagnostic services that are used to run the diagnostics,
+    and should be mounted in the same location on all the diagnostic services.
+    """
+    # TODO: This could be another data source option
+    results: Path = env_field(name="RESULTS_ROOT", converter=ensure_absolute_path)
+    """
+    Path to store the executions
+    """
+    dimensions_cv: Path = env_field(name="DIMENSIONS_CV_PATH", converter=Path)
+    """
+    Path to a file containing the controlled vocabulary for the dimensions in a CMEC diagnostics bundle
+    This defaults to the controlled vocabulary for the CMIP7 Assessment Fast Track diagnostics,
+    which is included in the `climate_ref_core` package.
+    This controlled vocabulary is used to validate the dimensions in the diagnostics bundle.
+    If custom diagnostics are implemented,
+    this file may need to be extended to include any new dimensions.
+    """
+    @log.default
+    def _log_factory(self) -> Path:
+        return env.path("REF_CONFIGURATION").resolve() / "log"
+    @scratch.default
+    def _scratch_factory(self) -> Path:
+        return env.path("REF_CONFIGURATION").resolve() / "scratch"
+    @software.default
+    def _software_factory(self) -> Path:
+        return env.path("REF_CONFIGURATION").resolve() / "software"
+    @results.default
+    def _results_factory(self) -> Path:
+        return env.path("REF_CONFIGURATION").resolve() / "results"
+    @dimensions_cv.default
+    def _dimensions_cv_factory(self) -> Path:
+        filename = "cv_cmip7_aft.yaml"
+        return Path(str(importlib.resources.files("climate_ref_core.pycmec") / filename))
+@config(prefix=env_prefix)
+class ExecutorConfig:
+    """
+    Configuration to define the executor to use for running diagnostics
+    """
+    executor: str = env_field(name="EXECUTOR", default="climate_ref.executor.local.LocalExecutor")
+    """
+    Executor to use for running diagnostics
+    This should be the fully qualified name of the executor class
+    (e.g. `climate_ref.executor.local.LocalExecutor`).
+    The default is to use the local executor.
+    The environment variable `REF_EXECUTOR` takes precedence over this configuration value.
+    This class will be used for all executions of diagnostics.
+    """
+    config: dict[str, Any] = field(factory=dict)
+    """
+    Additional configuration for the executor.
+    See the documentation for the executor for the available configuration options.
+    """
+    def build(self, config: "Config", database: "Database") -> Executor:
+        """
+        Create an instance of the executor
+        Returns
+        -------
+        :
+            An executor that can be used to run diagnostics
+        """
+        ExecutorCls = import_executor_cls(self.executor)
+        kwargs = {
+            "config": config,
+            "database": database,
+            **self.config,
+        }
+        executor = ExecutorCls(**kwargs)
+        if not isinstance(executor, Executor):
+            raise InvalidExecutorException(executor, f"Expected an Executor, got {type(executor)}")
+        return executor
+@define
+class DiagnosticProviderConfig:
+    """
+    Configuration for the diagnostic providers
+    """
+    provider: str
+    """
+    Package that contains the diagnostic provider
+    This should be the fully qualified name of the diagnostic provider.
+    """
+    config: dict[str, Any] = field(factory=dict)
+    """
+    Additional configuration for the diagnostic provider.
+    See the documentation for the diagnostic package for the available configuration options.
+    """
+    # TODO: Additional configuration for narrowing down the diagnostics to run
+@config(prefix=env_prefix)
+class DbConfig:
+    """
+    Database configuration
+    We currently only plan to support SQLite and PostgreSQL databases,
+    although only SQLite is currently implemented and tested.
+    """
+    database_url: str = env_field(name="DATABASE_URL")
+    """
+    Database URL that describes the connection to the database.
+    Defaults to sqlite:///{config.paths.db}/climate_ref.db".
+    This configuration value will be overridden by the `REF_DATABASE_URL` environment variable.
+    ## Schemas
+    postgresql://USER:PASSWORD@HOST:PORT/NAME
+    sqlite:///RELATIVE_PATH or sqlite:////ABS_PATH or sqlite:///:memory:
+    """
+    run_migrations: bool = field(default=True)
+    @database_url.default
+    def _connection_url_factory(self) -> str:
+        filename = env.path("REF_CONFIGURATION") / "db" / "climate_ref.db"
+        sqlite_url = f"sqlite:///{filename}"
+        return sqlite_url
+def default_providers() -> list[DiagnosticProviderConfig]:
+    """
+    Default diagnostic provider
+    Used if no diagnostic providers are specified in the configuration
+    Returns
+    -------
+    :
+        List of default diagnostic providers
+    """  # noqa: D401
+    env_providers = env.list("REF_DIAGNOSTIC_PROVIDERS", default=None)
+    if env_providers:
+        return [DiagnosticProviderConfig(provider=provider) for provider in env_providers]
+    return [
+        DiagnosticProviderConfig(provider="climate_ref_esmvaltool.provider", config={}),
+        DiagnosticProviderConfig(provider="climate_ref_ilamb.provider", config={}),
+        DiagnosticProviderConfig(provider="climate_ref_pmp.provider", config={}),
+    ]
+def _load_config(config_file: str | Path, doc: dict[str, Any]) -> "Config":
+    # Try loading the configuration with strict validation
+    try:
+        return _converter_defaults.structure(doc, Config)
+    except Exception as exc:
+        # Find the extra key errors which are displayed as warnings
+        key_validation_errors = transform_error(exc, format_exception=_format_key_exception)
+        for key_error in key_validation_errors:
+            logger.warning(f"Error loading configuration from {config_file}: {key_error}")
+    # Try again with relaxed validation
+    return _converter_defaults_relaxed.structure(doc, Config)
+@define
+class Config:
+    """
+    REF configuration
+    This class is used to store the configuration of the REF application.
+    """
+    log_level: str = field(default="INFO")
+    """
+    Log level of messages that are displayed by the REF
+    This value is overridden if a value is specified via the CLI.
+    """
+    paths: PathConfig = Factory(PathConfig)  # noqa
+    db: DbConfig = Factory(DbConfig)  # noqa
+    executor: ExecutorConfig = Factory(ExecutorConfig)  # noqa
+    diagnostic_providers: list[DiagnosticProviderConfig] = Factory(default_providers)  # noqa
+    _raw: TOMLDocument | None = field(init=False, default=None, repr=False)
+    _config_file: Path | None = field(init=False, default=None, repr=False)
+    @classmethod
+    def load(cls, config_file: Path, allow_missing: bool = True) -> "Config":
+        """
+        Load the configuration from a file
+        Parameters
+        ----------
+        config_file
+            Path to the configuration file.
+            This should be a TOML file.
+        Returns
+        -------
+        :
+            The configuration loaded from the file
+        """
+        if config_file.is_file():
+            with config_file.open() as fh:
+                doc = tomlkit.load(fh)
+                raw = doc
+        else:
+            if not allow_missing:
+                raise FileNotFoundError(f"Configuration file not found: {config_file}")
+            doc = TOMLDocument()
+            raw = None
+        try:
+            config = _load_config(config_file, doc)
+        except Exception as exc:
+            # If that still fails, error out
+            key_validation_errors = transform_error(exc, format_exception=_format_exception)
+            for key_error in key_validation_errors:
+                logger.error(f"Error loading configuration from {config_file}: {key_error}")
+            # Deliberately not raising "from exc" to avoid long tracebacks from cattrs
+            # The transformed error messages are sufficient for debugging
+            raise ValueError(f"Error loading configuration from {config_file}") from None
+        config._raw = raw
+        config._config_file = config_file
+        return config
+    def refresh(self) -> "Config":
+        """
+        Refresh the configuration values
+        This returns a new instance of the configuration based on the same configuration file and
+        any current environment variables.
+        """
+        if self._config_file is None:
+            raise ValueError("No configuration file specified")
+        return self.load(self._config_file)
+    def save(self, config_file: Path | None = None) -> None:
+        """
+        Save the configuration as a TOML file
+        The configuration will be saved to the specified file.
+        If no file is specified, the configuration will be saved to the file
+        that was used to load the configuration.
+        Parameters
+        ----------
+        config_file
+            The file to save the configuration to
+        Raises
+        ------
+        ValueError
+            If no configuration file is specified and the configuration was not loaded from a file
+        """
+        if config_file is None:
+            if self._config_file is None:  # pragma: no cover
+                # I'm not sure if this is possible
+                raise ValueError("No configuration file specified")
+            config_file = self._config_file
+        config_file.parent.mkdir(parents=True, exist_ok=True)
+        with open(config_file, "w") as fh:
+            fh.write(self.dumps())
+    @classmethod
+    def default(cls) -> "Config":
+        """
+        Load the default configuration
+        This will load the configuration from the default configuration location,
+        which is typically the user's configuration directory.
+        This location can be overridden by setting the `REF_CONFIGURATION` environment variable.
+        Returns
+        -------
+        :
+            The default configuration
+        """
+        root = env.path("REF_CONFIGURATION")
+        path_to_load = root / config_filename
+        logger.debug(f"Loading default configuration from {path_to_load}")
+        return cls.load(path_to_load)
+    def dumps(self, defaults: bool = True) -> str:
+        """
+        Dump the configuration to a TOML string
+        Parameters
+        ----------
+        defaults
+            If True, include default values in the output
+        Returns
+        -------
+        :
+            The configuration as a TOML string
+        """
+        return self.dump(defaults).as_string()
+    def dump(
+        self,
+        defaults: bool = True,
+    ) -> TOMLDocument:
+        """
+        Dump the configuration to a TOML document
+        Parameters
+        ----------
+        defaults
+            If True, include default values in the output
+        Returns
+        -------
+        :
+            The configuration as a TOML document
+        """
+        if defaults:
+            converter = _converter_defaults
+        else:
+            converter = _converter_no_defaults
+        dump = converter.unstructure(self)
+        if not defaults:
+            _pop_empty(dump)
+        doc = TOMLDocument()
+        doc.update(dump)
+        return doc
+def _make_converter(omit_default: bool, forbid_extra_keys: bool) -> Converter:
+    conv = Converter(omit_if_default=omit_default, forbid_extra_keys=forbid_extra_keys)
+    conv.register_unstructure_hook(Path, str)
+    conv.register_unstructure_hook(
+        Config,
+        make_dict_unstructure_fn(
+            Config,
+            conv,
+            _raw=override(omit=True),
+            _config_file=override(omit=True),
+        ),
+    )
+    return conv
+_converter_defaults = _make_converter(omit_default=False, forbid_extra_keys=True)
+_converter_defaults_relaxed = _make_converter(omit_default=False, forbid_extra_keys=False)
+_converter_no_defaults = _make_converter(omit_default=True, forbid_extra_keys=True)

climate_ref/constants.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""
+Constants used by the REF
+"""
+config_filename = "ref.toml"
+"""
+Default name of the configuration file
+"""

climate_ref/database.py ADDED Viewed

@@ -0,0 +1,223 @@
+"""Database adapter layer
+This module provides a database adapter layer that abstracts the database connection and migrations.
+This allows us to easily switch between different database backends,
+and to run migrations when the database is loaded.
+The `Database` class is the main entry point for interacting with the database.
+It provides a session object that can be used to interact with the database and run queries.
+"""
+import importlib.resources
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+from urllib import parse as urlparse
+import alembic.command
+import sqlalchemy
+from alembic.config import Config as AlembicConfig
+from alembic.runtime.migration import MigrationContext
+from loguru import logger
+from sqlalchemy.orm import Session
+from climate_ref.models import MetricValue, Table
+from climate_ref_core.pycmec.controlled_vocabulary import CV
+if TYPE_CHECKING:
+    from climate_ref.config import Config
+_REMOVED_REVISIONS = [
+    "ea2aa1134cb3",
+    "4b95a617184e",
+    "4a447fbf6d65",
+    "c1818a18d87f",
+    "6634396f139a",
+    "1f5969a92b85",
+    "c5de99c14533",
+    "e1cdda7dcf1d",
+    "904f2f2db24a",
+    "6bc6ad5fc5e1",
+    "4fc26a7d2d28",
+    "4ac252ba38ed",
+]
+"""
+List of revisions that have been deleted
+If a user's database contains these revisions then they need to delete their database and start again.
+"""
+def _get_database_revision(connection: sqlalchemy.engine.Connection) -> str | None:
+    context = MigrationContext.configure(connection)
+    current_rev = context.get_current_revision()
+    return current_rev
+def validate_database_url(database_url: str) -> str:
+    """
+    Validate a database URL
+    We support sqlite databases, and we create the directory if it doesn't exist.
+    We may aim to support PostgreSQL databases, but this is currently experimental and untested.
+    Parameters
+    ----------
+    database_url
+        The database URL to validate
+        See [climate_ref.config.DbConfig.database_url][climate_ref.config.DbConfig.database_url]
+        for more information on the format of the URL.
+    Raises
+    ------
+    ValueError
+        If the database scheme is not supported
+    Returns
+    -------
+    :
+        The validated database URL
+    """
+    split_url = urlparse.urlsplit(database_url)
+    path = split_url.path[1:]
+    if split_url.scheme == "sqlite":
+        if path == ":memory:":
+            logger.warning("Using an in-memory database")
+        else:
+            Path(path).parent.mkdir(parents=True, exist_ok=True)
+    elif split_url.scheme == "postgresql":
+        # We don't need to do anything special for PostgreSQL
+        logger.warning("PostgreSQL support is currently experimental and untested")
+    else:
+        raise ValueError(f"Unsupported database scheme: {split_url.scheme}")
+    return database_url
+class Database:
+    """
+    Manage the database connection and migrations
+    The database migrations are optionally run after the connection to the database is established.
+    """
+    def __init__(self, url: str) -> None:
+        logger.info(f"Connecting to database at {url}")
+        self.url = url
+        self._engine = sqlalchemy.create_engine(self.url)
+        self.session = Session(self._engine)
+    def alembic_config(self, config: "Config") -> AlembicConfig:
+        """
+        Get the Alembic configuration object for the database
+        This includes an open connection with the database engine and the REF configuration.
+        Returns
+        -------
+        :
+            The Alembic configuration object that can be used with alembic commands
+        """
+        alembic_config_filename = importlib.resources.files("climate_ref") / "alembic.ini"
+        if not alembic_config_filename.is_file():  # pragma: no cover
+            raise FileNotFoundError(f"{alembic_config_filename} not found")
+        alembic_config = AlembicConfig(str(alembic_config_filename))
+        alembic_config.attributes["connection"] = self._engine
+        alembic_config.attributes["ref_config"] = config
+        return alembic_config
+    def migrate(self, config: "Config") -> None:
+        """
+        Migrate the database to the latest revision
+        Parameters
+        ----------
+        config
+            REF Configuration
+            This is passed to alembic
+        """
+        # Check if the database revision is one of the removed revisions
+        # If it is, then we need to delete the database and start again
+        with self._engine.connect() as connection:
+            current_rev = _get_database_revision(connection)
+            logger.debug(f"Current database revision: {current_rev}")
+            if current_rev in _REMOVED_REVISIONS:
+                raise ValueError(
+                    f"Database revision {current_rev!r} has been removed in "
+                    f"https://github.com/Climate-REF/climate-ref/pull/271. "
+                    "Please delete your database and start again."
+                )
+        alembic.command.upgrade(self.alembic_config(config), "heads")
+    @staticmethod
+    def from_config(config: "Config", run_migrations: bool = True) -> "Database":
+        """
+        Create a Database instance from a Config instance
+        The `REF_DATABASE_URL` environment variable will take preference,
+         and override the database URL specified in the config.
+        Parameters
+        ----------
+        config
+            The Config instance that includes information about where the database is located
+        Returns
+        -------
+        :
+            A new Database instance
+        """
+        database_url: str = config.db.database_url
+        database_url = validate_database_url(database_url)
+        cv = CV.load_from_file(config.paths.dimensions_cv)
+        db = Database(database_url)
+        if run_migrations:
+            # Run any outstanding migrations
+            # This also adds any diagnostic value columns to the DB if they don't exist
+            db.migrate(config)
+        # Register the CV dimensions with the MetricValue model
+        # This will add new columns to the db if the CVs have changed
+        MetricValue.register_cv_dimensions(cv)
+        return db
+    def get_or_create(
+        self, model: type[Table], defaults: dict[str, Any] | None = None, **kwargs: Any
+    ) -> tuple[Table, bool]:
+        """
+        Get or create an instance of a model
+        This doesn't commit the transaction,
+        so you will need to call `session.commit()` after this method
+        or use a transaction context manager.
+        Parameters
+        ----------
+        model
+            The model to get or create
+        defaults
+            Default values to use when creating a new instance
+        kwargs
+            The filter parameters to use when querying for an instance
+        Returns
+        -------
+        :
+            A tuple containing the instance and a boolean indicating if the instance was created
+        """
+        instance = self.session.query(model).filter_by(**kwargs).first()
+        if instance:
+            return instance, False
+        else:
+            params = {**kwargs, **(defaults or {})}
+            instance = model(**params)
+            self.session.add(instance)
+            return instance, True

climate_ref/dataset_registry/obs4ref_reference.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ obs4REF/MOHC/HadISST-1-1/mon/ts/gn/v20210727/ts_mon_HadISST-1-1_PCMDI_gn_187001-201907.nc md5:99c8691e0f615dc4d79b4fb5e926cc76
2	+ obs4REF/NOAA-ESRL-PSD/20CR/mon/psl/gn/v20210727/psl_mon_20CR_PCMDI_gn_187101-201212.nc md5:570ce90b3afd1d0b31690ae5dbe32d31