PyPI - climate-ref-core - Versions diffs - 0.5.0__tar.gz → 0.5.1__tar.gz - Mend

climate-ref-core 0.5.0tar.gz → 0.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

{climate_ref_core-0.5.0 → climate_ref_core-0.5.1}/.gitignore RENAMED Viewed

@@ -74,7 +74,6 @@ coverage.xml
 *.pot
 # Django stuff:
-*.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal

{climate_ref_core-0.5.0 → climate_ref_core-0.5.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: climate-ref-core
-Version: 0.5.0
+Version: 0.5.1
 Summary: Core library for the CMIP Rapid Evaluation Framework
 Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
 License: Apache-2.0

{climate_ref_core-0.5.0 → climate_ref_core-0.5.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "climate-ref-core"
-version = "0.5.0"
+version = "0.5.1"
 description = "Core library for the CMIP Rapid Evaluation Framework"
 readme = "README.md"
 authors = [

{climate_ref_core-0.5.0 → climate_ref_core-0.5.1}/src/climate_ref_core/datasets.py RENAMED Viewed

@@ -11,6 +11,16 @@ from typing import Any, Self
 import pandas as pd
 from attrs import field, frozen
+Selector = tuple[tuple[str, str], ...]
+"""
+Type describing the key used to identify a group of datasets
+This is a tuple of tuples, where each inner tuple contains a metadata and dimension value
+that was used to group the datasets together.
+This type must be hashable, as it is used as a key in a dictionary.
+"""
 class SourceDatasetType(enum.Enum):
     """
@@ -76,6 +86,23 @@ class FacetFilter:
     """
+def sort_selector(inp: Selector) -> Selector:
+    """
+    Sort the selector by key
+    Parameters
+    ----------
+    inp
+        Selector to sort
+    Returns
+    -------
+    :
+        Sorted selector
+    """
+    return tuple(sorted(inp, key=lambda x: x[0]))
 @frozen
 class DatasetCollection:
     """
@@ -83,15 +110,33 @@ class DatasetCollection:
     """
     datasets: pd.DataFrame
+    """
+    DataFrame containing the datasets that were selected for the execution.
+    The columns in this dataframe depend on the source dataset type, but always include:
+    * path
+    * [slug_column]
+    """
     slug_column: str
     """
     Column in datasets that contains the unique identifier for the dataset
     """
-    selector: tuple[tuple[str, str], ...] = ()
+    selector: Selector = field(converter=sort_selector, factory=tuple)
     """
     Unique key, value pairs that were selected during the initial groupby
     """
+    def selector_dict(self) -> dict[str, str]:
+        """
+        Convert the selector to a dictionary
+        Returns
+        -------
+        :
+            Dictionary of the selector
+        """
+        return {key: value for key, value in self.selector}
     def __getattr__(self, item: str) -> Any:
         return getattr(self.datasets, item)
@@ -155,3 +200,19 @@ class ExecutionDatasetCollection:
         hash_sum = sum(hash(item) for item in self._collection.values())
         hash_bytes = hash_sum.to_bytes(16, "little", signed=True)
         return hashlib.sha1(hash_bytes).hexdigest()  # noqa: S324
+    @property
+    def selectors(self) -> dict[str, Selector]:
+        """
+        Collection of selectors used to identify the datasets
+        These are the key, value pairs that were selected during the initial group-by,
+        for each data requirement.
+        """
+        # The "value" of SourceType is used here so this can be stored in the db
+        s = {}
+        for source_type in SourceDatasetType.ordered():
+            if source_type not in self._collection:
+                continue
+            s[source_type.value] = self._collection[source_type].selector
+        return s

{climate_ref_core-0.5.0 → climate_ref_core-0.5.1}/src/climate_ref_core/diagnostics.py RENAMED Viewed

@@ -14,6 +14,7 @@ from attrs import field, frozen
 from climate_ref_core.constraints import GroupConstraint
 from climate_ref_core.datasets import ExecutionDatasetCollection, FacetFilter, SourceDatasetType
+from climate_ref_core.metric_values import SeriesMetricValue
 from climate_ref_core.pycmec.metric import CMECMetric
 from climate_ref_core.pycmec.output import CMECOutput
@@ -61,6 +62,11 @@ class ExecutionDefinition:
     for a specific set of datasets fulfilling the requirements.
     """
+    diagnostic: Diagnostic
+    """
+    The diagnostic that is being executed
+    """
     key: str
     """
     The unique identifier for the datasets in the diagnostic execution group.
@@ -85,6 +91,12 @@ class ExecutionDefinition:
     Root directory for storing the output of the diagnostic execution
     """
+    def execution_slug(self) -> str:
+        """
+        Get a slug for the execution
+        """
+        return f"{self.diagnostic.full_slug()}/{self.key}"
     def to_output_path(self, filename: pathlib.Path | str | None) -> pathlib.Path:
         """
         Get the absolute path for a file in the output directory
@@ -170,7 +182,11 @@ class ExecutionResult:
     """
     Whether the diagnostic execution ran successfully.
     """
-    # Log info is in the output bundle file already, but is definitely useful
+    series: Sequence[SeriesMetricValue] = field(factory=tuple)
+    """
+    A collection of series metric values that were extracted from the execution.
+    """
     @staticmethod
     def build_from_output_bundle(
@@ -426,7 +442,7 @@ class AbstractDiagnostic(Protocol):
         """
         Run the diagnostic on the given configuration.
-        The implementation of this method method is left to the diagnostic providers.
+        The implementation of this method is left to the diagnostic providers.
         Parameters

{climate_ref_core-0.5.0 → climate_ref_core-0.5.1}/src/climate_ref_core/exceptions.py RENAMED Viewed

@@ -46,3 +46,10 @@ class ConstraintNotSatisfied(RefException):
 class ResultValidationError(RefException):
     """Exception raised when the executions from a diagnostic are invalid"""
+class ExecutionError(RefException):
+    """Exception raised when an execution fails"""
+    def __init__(self, message: str) -> None:
+        super().__init__(message)

climate_ref_core-0.5.1/src/climate_ref_core/executor.py ADDED Viewed

@@ -0,0 +1,167 @@
+"""
+Executor interface for running diagnostics
+"""
+import importlib
+import shutil
+from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
+from loguru import logger
+from climate_ref_core.diagnostics import ExecutionDefinition, ExecutionResult
+from climate_ref_core.exceptions import InvalidExecutorException
+from climate_ref_core.logging import redirect_logs
+if TYPE_CHECKING:
+    # TODO: break this import cycle and move it into the execution definition
+    from climate_ref.models import Execution
+def execute_locally(
+    definition: ExecutionDefinition,
+    log_level: str,
+) -> ExecutionResult:
+    """
+    Run a diagnostic execution
+    This is the chunk of work that should be executed by an executor.
+    Parameters
+    ----------
+    definition
+        A description of the information needed for this execution of the diagnostic
+    log_level
+        The log level to use for the execution
+    """
+    logger.info(f"Executing {definition.execution_slug()!r}")
+    try:
+        if definition.output_directory.exists():
+            logger.warning(
+                f"Output directory {definition.output_directory} already exists. "
+                f"Removing the existing directory."
+            )
+            shutil.rmtree(definition.output_directory)
+        definition.output_directory.mkdir(parents=True, exist_ok=True)
+        with redirect_logs(definition, log_level):
+            return definition.diagnostic.run(definition=definition)
+    except Exception:
+        # If the diagnostic fails, we want to log the error and return a failure result
+        logger.exception(f"Error running {definition.execution_slug()!r}")
+        return ExecutionResult.build_from_failure(definition)
+@runtime_checkable
+class Executor(Protocol):
+    """
+    An executor is responsible for running a diagnostic asynchronously
+    The diagnostic may be run locally in the same process or in a separate process or container.
+    Notes
+    -----
+    This is an extremely basic interface and will be expanded in the future, as we figure out
+    our requirements.
+    """
+    name: str
+    def __init__(self, **kwargs: Any) -> None: ...
+    def run(
+        self,
+        definition: ExecutionDefinition,
+        execution: "Execution | None" = None,
+    ) -> None:
+        """
+        Execute a diagnostic with a given definition
+        No executions are returned from this method,
+        as the execution may be performed asynchronously so executions may not be immediately available.
+        /// admonition | Note
+        In future, we may return a `Future` object that can be used to retrieve the result,
+        but that requires some additional work to implement.
+        ///
+        Parameters
+        ----------
+        definition
+            Definition of the information needed to execute a diagnostic
+            This definition describes which datasets are required to run the diagnostic and where
+            the output should be stored.
+        execution
+            The execution object to update with the results of the execution.
+            This is a database object that contains the executions of the execution.
+            If provided, it will be updated with the executions of the execution.
+            This may happen asynchronously, so the executions may not be immediately available.
+        Returns
+        -------
+        :
+            Results from running the diagnostic
+        """
+        ...
+    def join(self, timeout: float) -> None:
+        """
+        Wait for all executions to finish
+        If the timeout is reached, the method will return and raise an exception.
+        Parameters
+        ----------
+        timeout
+            Maximum time to wait for all executions to finish in seconds
+        Raises
+        ------
+        TimeoutError
+            If the timeout is reached
+        """
+def import_executor_cls(fqn: str) -> type[Executor]:
+    """
+    Import an executor using a fully qualified module path
+    Parameters
+    ----------
+    fqn
+        Full package and attribute name of the executor to import
+        For example: `climate_ref_example.executor` will use the `executor` attribute from the
+        `climate_ref_example` package.
+    Raises
+    ------
+    InvalidExecutorException
+        If the executor cannot be imported
+        If the executor isn't a valid `DiagnosticProvider`.
+    Returns
+    -------
+    :
+        Executor instance
+    """
+    module, attribute_name = fqn.rsplit(".", 1)
+    try:
+        imp = importlib.import_module(module)
+        executor: type[Executor] = getattr(imp, attribute_name)
+        # We can't really check if the executor is a subclass of Executor here
+        # Protocols can't be used with issubclass if they have non-method members
+        # We have to check this at class instantiation time
+        return executor
+    except ModuleNotFoundError:
+        logger.error(f"Package '{fqn}' not found")
+        raise InvalidExecutorException(fqn, f"Module '{module}' not found")
+    except AttributeError:
+        logger.error(f"Provider '{fqn}' not found")
+        raise InvalidExecutorException(fqn, f"Executor '{attribute_name}' not found in {module}")

{climate_ref_core-0.5.0 → climate_ref_core-0.5.1}/src/climate_ref_core/logging.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """
 Logging utilities
-The REF uses [loguru](https://loguru.readthedocs.io/en/stable/), a simple logging framework
+The REF uses [loguru](https://loguru.readthedocs.io/en/stable/), a simple logging framework.
 """
 import contextlib
@@ -16,7 +16,13 @@ from loguru import logger
 from rich.pretty import pretty_repr
 from climate_ref_core.diagnostics import ExecutionDefinition
-from climate_ref_core.executor import EXECUTION_LOG_FILENAME
+EXECUTION_LOG_FILENAME = "out.log"
+"""
+Filename for the execution log.
+This file is written via [climate_ref_core.logging.redirect_logs][].
+"""
 class _InterceptHandler(logging.Handler):
@@ -72,7 +78,7 @@ def add_log_handler(**kwargs: Any) -> None:
     # Track the current handler via custom attributes on the logger
     # This is a bit of a workaround because of loguru's super slim API that doesn't allow for
-    # modificiation of existing handlers.
+    # modification of existing handlers.
     logger.default_handler_id = handled_id  # type: ignore[attr-defined]
     logger.default_handler_kwargs = kwargs  # type: ignore[attr-defined]
@@ -88,7 +94,12 @@ def remove_log_handler() -> None:
     logger should be readded later
     """
     if hasattr(logger, "default_handler_id"):
-        logger.remove(logger.default_handler_id)
+        try:
+            logger.remove(logger.default_handler_id)
+        except ValueError:
+            # This can happen if the handler has already been removed
+            # or if the logger was never configured
+            pass
         del logger.default_handler_id
     else:
         raise AssertionError("No default log handler to remove.")
@@ -143,4 +154,4 @@ def redirect_logs(definition: ExecutionDefinition, log_level: str) -> Generator[
             add_log_handler(**logger.default_handler_kwargs)  # type: ignore[attr-defined]
-__all__ = ["add_log_handler", "capture_logging", "logger", "redirect_logs"]
+__all__ = ["EXECUTION_LOG_FILENAME", "add_log_handler", "capture_logging", "logger", "redirect_logs"]

climate_ref_core-0.5.1/src/climate_ref_core/metric_values/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""
+Metric Values
+A metric is a single statistical evaluation contained within a diagnostic.
+A diagnostic may consist of more than one metric.
+Examples include bias, root mean squared error (RMSE), Earth Mover's Distance,
+phase/timing of the seasonal cycle, amplitude of the seasonal cycle, spatial or temporal correlations,
+interannual variability.
+Not all metrics are useful for all variables or should be used with every observationally constrained dataset.
+Each metric may be converted into a performance score.
+"""
+from .typing import ScalarMetricValue, SeriesMetricValue
+__all__ = ["ScalarMetricValue", "SeriesMetricValue"]

climate_ref_core-0.5.1/src/climate_ref_core/metric_values/typing.py ADDED Viewed

@@ -0,0 +1,74 @@
+from collections.abc import Sequence
+from typing import Self
+from pydantic import BaseModel, model_validator
+Value = float | int
+class SeriesMetricValue(BaseModel):
+    """
+    A 1-d array with an associated index and additional dimensions
+    These values are typically sourced from the CMEC metrics bundle
+    """
+    dimensions: dict[str, str]
+    """
+    Key, value pairs that identify the dimensions of the metric
+    These values are used for a faceted search of the metric values.
+    """
+    values: Sequence[Value]
+    """
+    A 1-d array of values
+    """
+    index: Sequence[str | Value]
+    """
+    A 1-d array of index values
+    Values must be strings or numbers and have the same length as values.
+    Non-unique index values are not allowed.
+    """
+    index_name: str
+    """
+    The name of the index.
+    This is used for presentation purposes and is not used in the controlled vocabulary.
+    """
+    attributes: dict[str, str | Value] | None = None
+    """
+    Additional unstructured attributes associated with the metric value
+    """
+    @model_validator(mode="after")
+    def validate_index_length(self) -> Self:
+        """Validate that index has the same length as values"""
+        if len(self.index) != len(self.values):
+            raise ValueError(
+                f"Index length ({len(self.index)}) must match values length ({len(self.values)})"
+            )
+        return self
+class ScalarMetricValue(BaseModel):
+    """
+    A scalar value with an associated dimensions
+    """
+    dimensions: dict[str, str]
+    """
+    Key, value pairs that identify the dimensions of the metric
+    These values are used for a faceted search of the metric values.
+    """
+    value: Value
+    """
+    A scalar value
+    """
+    attributes: dict[str, str | Value] | None = None
+    """
+    Additional unstructured attributes associated with the metric value
+    """

climate_ref_core-0.5.1/src/climate_ref_core/pycmec/cv_cmip7_aft.yaml ADDED Viewed

@@ -0,0 +1,95 @@
+dimensions:
+- name: source_id
+  long_name: Source ID
+  description: "Source ID (e.g., GFDL-CM4)"
+  allow_extra_values: true
+  required: false
+- name: reference_source_id
+  long_name: Reference Source ID
+  description: "Source ID of the reference dataset(e.g., HadISST)"
+  allow_extra_values: true
+  required: false
+- name: experiment_id
+  long_name: Experiment ID
+  description: "Experiment ID (e.g., historical, ssp585)"
+  allow_extra_values: true
+  required: false
+- name: variable_id
+  long_name: Variable
+  description: "Variable ID (e.g., tas, pr, etc.)"
+  allow_extra_values: true
+  required: false
+- name: reference_variable_id
+  long_name: Reference Variable
+  description: "Variable ID for the reference dataset (e.g., tas, pr, etc.)"
+  allow_extra_values: true
+  required: false
+- name: member_id
+  long_name: Member ID
+  description: "Unique identifier for each ensemble member, includes the variant label and sub-experiment if present"
+  allow_extra_values: true
+  required: false
+- name: variant_label
+  long_name: Variant Label
+  description: "Ensemble member (construct from realization, initialization, physics, and forcing indices)"
+  allow_extra_values: true
+  required: false
+- name: metric
+  long_name: Metric
+  description: ""
+  required: true
+  allow_extra_values: true
+- name: region
+  long_name: Region
+  description: "Part of the world from which the metric values are calculated. "
+  required: true
+  allow_extra_values: true
+  values:
+    - name: global
+      long_name: Global
+      description: "Global aggregate"
+      units: dimensionless
+- name: season
+  long_name: Season
+  description: "Parts of the year from which the metric values are calculated"
+  required: true
+  allow_extra_values: true
+  values:
+    - name: ann
+      long_name: Annual
+      description: ""
+      units: dimensionless
+    - name: djf
+      long_name: Dec,Jan,Feb
+      description: "December, January, February"
+      units: dimensionless
+    - name: mam
+      long_name: Mar,Apr,May
+      description: "March, April, May"
+      units: dimensionless
+    - name: jja
+      long_name: Jun,Jul,Aug
+      description: "June, July, August"
+      units: dimensionless
+    - name: son
+      long_name: Sep,Oct,Nov
+      description: "September, October, November"
+      units: dimensionless
+- name: statistic
+  long_name: Statistic
+  description: ""
+  required: true
+  allow_extra_values: true
+  values:
+    - name: rmse
+      long_name: Root Mean Square Error
+      description: ""
+      units: dimensionless
+    - name: overall score
+      long_name: Overall Score
+      description: ""
+      units: dimensionless
+    - name: bias
+      long_name: Bias
+      description: ""
+      units: dimensionless

climate-ref-core 0.5.0__tar.gz → 0.5.1__tar.gz

climate-ref-core 0.5.0tar.gz → 0.5.1tar.gz