PyPI - climate-ref - Versions diffs - 0.6.6__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

climate-ref 0.6.6py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

climate_ref/cli/__init__.py +12 -3
climate_ref/cli/_utils.py +56 -2
climate_ref/cli/datasets.py +49 -12
climate_ref/cli/executions.py +333 -24
climate_ref/cli/providers.py +1 -2
climate_ref/config.py +67 -4
climate_ref/database.py +62 -4
climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
climate_ref/dataset_registry/sample_data.txt +10 -19
climate_ref/datasets/__init__.py +3 -3
climate_ref/datasets/base.py +121 -20
climate_ref/datasets/cmip6.py +2 -0
climate_ref/datasets/obs4mips.py +26 -15
climate_ref/executor/hpc.py +149 -53
climate_ref/executor/local.py +1 -2
climate_ref/executor/result_handling.py +17 -7
climate_ref/migrations/env.py +12 -10
climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
climate_ref/models/__init__.py +1 -6
climate_ref/models/base.py +4 -20
climate_ref/models/dataset.py +2 -0
climate_ref/models/diagnostic.py +2 -1
climate_ref/models/execution.py +219 -7
climate_ref/models/metric_value.py +25 -110
climate_ref/models/mixins.py +144 -0
climate_ref/models/provider.py +2 -1
climate_ref/provider_registry.py +4 -4
climate_ref/slurm.py +2 -2
climate_ref/solver.py +17 -6
climate_ref/testing.py +1 -1
{climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/METADATA +1 -1
climate_ref-0.8.0.dist-info/RECORD +58 -0
{climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/WHEEL +1 -1
climate_ref-0.6.6.dist-info/RECORD +0 -55
{climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/entry_points.txt +0 -0
{climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/licenses/LICENCE +0 -0
{climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/licenses/NOTICE +0 -0

climate_ref/models/execution.py CHANGED Viewed

@@ -1,19 +1,22 @@
 import enum
 import pathlib
-from typing import TYPE_CHECKING, Any
+from collections.abc import Sequence
+from typing import TYPE_CHECKING, Any, ClassVar
 from loguru import logger
-from sqlalchemy import Column, ForeignKey, Table, UniqueConstraint, func
+from sqlalchemy import Column, ForeignKey, Table, UniqueConstraint, func, or_
 from sqlalchemy.orm import Mapped, Session, mapped_column, relationship
 from sqlalchemy.orm.query import RowReturningQuery
-from climate_ref.models import Dataset
-from climate_ref.models.base import Base, CreatedUpdatedMixin
+from climate_ref.models.base import Base
+from climate_ref.models.dataset import Dataset
+from climate_ref.models.diagnostic import Diagnostic
+from climate_ref.models.mixins import CreatedUpdatedMixin, DimensionMixin
+from climate_ref.models.provider import Provider
 from climate_ref_core.datasets import ExecutionDatasetCollection
 if TYPE_CHECKING:
     from climate_ref.database import Database
-    from climate_ref.models.diagnostic import Diagnostic
     from climate_ref.models.metric_value import MetricValue
@@ -217,16 +220,21 @@ class ResultOutputType(enum.Enum):
     HTML = "html"
-class ExecutionOutput(CreatedUpdatedMixin, Base):
+class ExecutionOutput(DimensionMixin, CreatedUpdatedMixin, Base):
     """
     An output generated as part of an execution.
     This output may be a plot, data file or HTML file.
-    These outputs are defined in the CMEC output bundle
+    These outputs are defined in the CMEC output bundle.
+    Outputs can be tagged with dimensions from the controlled vocabulary
+    to enable filtering and organization.
     """
     __tablename__ = "execution_output"
+    _cv_dimensions: ClassVar[list[str]] = []
     id: Mapped[int] = mapped_column(primary_key=True)
     execution_id: Mapped[int] = mapped_column(ForeignKey("execution.id"), index=True)
@@ -264,6 +272,65 @@ class ExecutionOutput(CreatedUpdatedMixin, Base):
     execution: Mapped["Execution"] = relationship(back_populates="outputs")
+    @classmethod
+    def build(  # noqa: PLR0913
+        cls,
+        *,
+        execution_id: int,
+        output_type: ResultOutputType,
+        dimensions: dict[str, str],
+        filename: str | None = None,
+        short_name: str | None = None,
+        long_name: str | None = None,
+        description: str | None = None,
+    ) -> "ExecutionOutput":
+        """
+        Build an ExecutionOutput from dimensions and metadata
+        This is a helper method that validates the dimensions supplied.
+        Parameters
+        ----------
+        execution_id
+            Execution that created the output
+        output_type
+            Type of the output
+        dimensions
+            Dimensions that describe the output
+        filename
+            Path to the output
+        short_name
+            Short key of the output
+        long_name
+            Human readable name
+        description
+            Long description
+        Raises
+        ------
+        KeyError
+            If an unknown dimension was supplied.
+            Dimensions must exist in the controlled vocabulary.
+        Returns
+        -------
+            Newly created ExecutionOutput
+        """
+        for k in dimensions:
+            if k not in cls._cv_dimensions:
+                raise KeyError(f"Unknown dimension column '{k}'")
+        return ExecutionOutput(
+            execution_id=execution_id,
+            output_type=output_type,
+            filename=filename,
+            short_name=short_name,
+            long_name=long_name,
+            description=description,
+            **dimensions,
+        )
 def get_execution_group_and_latest(
     session: Session,
@@ -305,3 +372,148 @@ def get_execution_group_and_latest(
     )
     return query  # type: ignore
+def _filter_executions_by_facets(
+    results: Sequence[tuple[ExecutionGroup, Execution | None]],
+    facet_filters: dict[str, str],
+) -> list[tuple[ExecutionGroup, Execution | None]]:
+    """
+    Filter execution groups and their latest executions based on facet key-value pairs.
+    This is a relatively expensive operation as it requires iterating over all results.
+    This should be replaced once we have normalised the selectors into a separate table.
+    Parameters
+    ----------
+    results
+        List of tuples containing ExecutionGroup and its latest Execution (or None)
+    facet_filters
+        Dictionary of facet key-value pairs to filter by (AND logic, exact match)
+    Returns
+    -------
+        Filtered list of tuples containing ExecutionGroup and its latest Execution (or None)
+    Notes
+    -----
+    - Facet filters can either be key=value (searches all dataset types)
+      or dataset_type.key=value (searches specific dataset type)
+    - Key=value filters search across all dataset types
+    - dataset_type.key=value filters only search within the specified dataset type
+    - Multiple values within same filter type use OR logic
+    - All specified facets must match for an execution group to be included (AND logic)
+    """
+    filtered_results = []
+    for eg, execution in results:
+        all_filters_match = True
+        for facet_key, facet_value in facet_filters.items():
+            filter_match = False
+            if "." in facet_key:
+                # Handle dataset_type.key=value format
+                dataset_type, key = facet_key.split(".", 1)
+                if dataset_type in eg.selectors:
+                    if [key, facet_value] in eg.selectors[dataset_type]:
+                        filter_match = True
+                        break
+            else:
+                # Handle key=value format (search across all dataset types)
+                for ds_type_selectors in eg.selectors.values():
+                    if [facet_key, facet_value] in ds_type_selectors:
+                        filter_match = True
+                        break
+            if not filter_match:
+                all_filters_match = False
+                break
+        if all_filters_match:
+            filtered_results.append((eg, execution))
+    return filtered_results
+def get_execution_group_and_latest_filtered(  # noqa: PLR0913
+    session: Session,
+    diagnostic_filters: list[str] | None = None,
+    provider_filters: list[str] | None = None,
+    facet_filters: dict[str, str] | None = None,
+    dirty: bool | None = None,
+    successful: bool | None = None,
+) -> list[tuple[ExecutionGroup, Execution | None]]:
+    """
+    Query execution groups with filtering capabilities.
+    Parameters
+    ----------
+    session
+        Database session
+    diagnostic_filters
+        List of diagnostic slug substrings (OR logic, case-insensitive)
+    provider_filters
+        List of provider slug substrings (OR logic, case-insensitive)
+    facet_filters
+        Dictionary of facet key-value pairs (AND logic, exact match)
+    dirty
+        If True, only return dirty execution groups.
+        If False, only return clean execution groups.
+        If None, do not filter by dirty status.
+    successful
+        If True, only return execution groups whose latest execution was successful.
+        If False, only return execution groups whose latest execution was unsuccessful or has no executions.
+        If None, do not filter by execution success.
+    Returns
+    -------
+        Query returning tuples of (ExecutionGroup, latest Execution or None)
+    Notes
+    -----
+    - Diagnostic and provider filters use substring matching (case-insensitive)
+    - Multiple values within same filter type use OR logic
+    - Different filter types use AND logic
+    - Facet filters can either be key=value (searches all dataset types)
+      or dataset_type.key=value (searches specific dataset type)
+    """
+    # Start with base query
+    query = get_execution_group_and_latest(session)
+    if diagnostic_filters or provider_filters:
+        # Join through to the Diagnostic table
+        query = query.join(Diagnostic, ExecutionGroup.diagnostic_id == Diagnostic.id)
+    # Apply diagnostic filter (OR logic for multiple values)
+    if diagnostic_filters:
+        diagnostic_conditions = [
+            Diagnostic.slug.ilike(f"%{filter_value.lower()}%") for filter_value in diagnostic_filters
+        ]
+        query = query.filter(or_(*diagnostic_conditions))
+    # Apply provider filter (OR logic for multiple values)
+    if provider_filters:
+        # Need to join through Diagnostic to Provider
+        query = query.join(Provider, Diagnostic.provider_id == Provider.id)
+        provider_conditions = [
+            Provider.slug.ilike(f"%{filter_value.lower()}%") for filter_value in provider_filters
+        ]
+        query = query.filter(or_(*provider_conditions))
+    if successful is not None:
+        if successful:
+            query = query.filter(Execution.successful.is_(True))
+        else:
+            query = query.filter(or_(Execution.successful.is_(False), Execution.successful.is_(None)))
+    if dirty is not None:
+        if dirty:
+            query = query.filter(ExecutionGroup.dirty.is_(True))
+        else:
+            query = query.filter(or_(ExecutionGroup.dirty.is_(False), ExecutionGroup.dirty.is_(None)))
+    if facet_filters:
+        # Load all results into memory for Python-based filtering
+        # TODO: Update once we have normalised the selector
+        results = [r._tuple() for r in query.all()]
+        return _filter_executions_by_facets(results, facet_filters)
+    else:
+        return [r._tuple() for r in query.all()]

climate_ref/models/metric_value.py CHANGED Viewed

@@ -2,12 +2,11 @@ import enum
 from collections.abc import Mapping
 from typing import TYPE_CHECKING, Any, ClassVar
-from loguru import logger
-from sqlalchemy import Column, ForeignKey, Text, event
+from sqlalchemy import ForeignKey, event
 from sqlalchemy.orm import Mapped, mapped_column, relationship
-from climate_ref.models.base import Base, CreatedUpdatedMixin
-from climate_ref_core.pycmec.controlled_vocabulary import CV, Dimension
+from climate_ref.models.base import Base
+from climate_ref.models.mixins import CreatedUpdatedMixin, DimensionMixin
 if TYPE_CHECKING:
     from climate_ref.models.execution import Execution
@@ -27,11 +26,14 @@ class MetricValueType(enum.Enum):
     SERIES = "series"
-class MetricValue(CreatedUpdatedMixin, Base):
+class MetricValue(DimensionMixin, CreatedUpdatedMixin, Base):
     """
     Represents a single metric value
-    This value has a number of dimensions which are used to query the diagnostic value.
+    This is a base class for different types of metric values (e.g. scalar, series) which
+    are stored in a single table using single table inheritance.
+    This value has a number of dimensions which are used to query the diagnostic values.
     These dimensions describe aspects such as the type of statistic being measured,
     the region of interest or the model from which the statistic is being measured.
@@ -46,6 +48,8 @@ class MetricValue(CreatedUpdatedMixin, Base):
         "polymorphic_on": "type",
     }
+    _cv_dimensions: ClassVar[list[str]] = []
     id: Mapped[int] = mapped_column(primary_key=True)
     execution_id: Mapped[int] = mapped_column(ForeignKey("execution.id"), index=True)
@@ -60,111 +64,9 @@ class MetricValue(CreatedUpdatedMixin, Base):
     This value is used to determine how the metric value should be interpreted.
     """
-    _cv_dimensions: ClassVar[list[str]] = []
-    @property
-    def dimensions(self) -> dict[str, str]:
-        """
-        Get the non-null dimensions and their values
-        Any changes to the resulting dictionary are not reflected in the object
-        Returns
-        -------
-            Collection of dimensions names and their values
-        """
-        dims = {}
-        for key in self._cv_dimensions:
-            value = getattr(self, key)
-            if value is not None:
-                dims[key] = value
-        return dims
     def __repr__(self) -> str:
         return f"<MetricValue id={self.id} execution={self.execution} dimensions={self.dimensions}>"
-    @staticmethod
-    def build_dimension_column(dimension: Dimension) -> Column[str]:
-        """
-        Create a column representing a CV dimension
-        These columns are not automatically generated with alembic revisions.
-        Any changes to this functionality likely require a manual database migration
-        of the existing columns.
-        Parameters
-        ----------
-        dimension
-            Dimension definition to create the column for.
-            Currently only the "name" field is being used.
-        Returns
-        -------
-            An instance of a sqlalchemy Column
-            This doesn't create the column in the database,
-            but enables the ORM to access it.
-        """
-        return Column(
-            dimension.name,
-            Text,
-            index=True,
-            nullable=True,
-            info={"skip_autogenerate": True},
-        )
-    @classmethod
-    def register_cv_dimensions(cls, cv: CV) -> None:
-        """
-        Register the dimensions supplied in the controlled vocabulary
-        This has to be done at run-time to support custom CVs.
-        Any extra columns already in the database, but not in the CV are ignored.
-        Parameters
-        ----------
-        cv
-            Controlled vocabulary being used by the application.
-            This controlled vocabulary contains the definitions of the dimensions that can be used.
-        """
-        for dimension in cv.dimensions:
-            target_attribute = dimension.name
-            if target_attribute in cls._cv_dimensions:
-                continue
-            cls._cv_dimensions.append(target_attribute)
-            logger.debug(f"Registered MetricValue dimension: {target_attribute}")
-            if hasattr(cls, target_attribute):
-                # This should only occur in test suite as we don't support removing dimensions at runtime
-                logger.warning("Column attribute already exists on MetricValue. Ignoring")
-            else:
-                setattr(cls, target_attribute, cls.build_dimension_column(dimension))
-            # TODO: Check if the underlying table already contains columns
-    @classmethod
-    def _reset_cv_dimensions(cls) -> None:
-        """
-        Remove any previously registered dimensions
-        Used by the test suite and should not be called at runtime.
-        This doesn't remove any previous column definitions due to a limitation that columns in
-        declarative classes cannot be removed.
-        This means that `hasattr(MetricValue, "old_attribute")`
-        will still return True after resetting, but the values will not be included in any executions.
-        """
-        logger.warning(f"Removing MetricValue dimensions: {cls._cv_dimensions}")
-        keys = list(cls._cv_dimensions)
-        for key in keys:
-            cls._cv_dimensions.remove(key)
-        assert not len(cls._cv_dimensions)
 class ScalarMetricValue(MetricValue):
     """
@@ -180,6 +82,12 @@ class ScalarMetricValue(MetricValue):
     # This is a scalar value
     value: Mapped[float] = mapped_column(nullable=True)
+    def __repr__(self) -> str:
+        return (
+            f"<ScalarMetricValue "
+            f"id={self.id} execution={self.execution} dimensions={self.dimensions} value={self.value}>"
+        )
     @classmethod
     def build(
         cls,
@@ -232,9 +140,10 @@ class ScalarMetricValue(MetricValue):
 class SeriesMetricValue(MetricValue):
     """
-    A scalar value with an associated dimensions
+    A 1d series with associated dimensions
-    This is a subclass of MetricValue that is used to represent a scalar value.
+    This is a subclass of MetricValue that is used to represent a series.
+    This can be used to represent time series, vertical profiles or other 1d data.
     """
     __mapper_args__: ClassVar[Mapping[str, Any]] = {  # type: ignore
@@ -246,6 +155,12 @@ class SeriesMetricValue(MetricValue):
     index: Mapped[list[float | int | str]] = mapped_column(nullable=True)
     index_name: Mapped[str] = mapped_column(nullable=True)
+    def __repr__(self) -> str:
+        return (
+            f"<SeriesMetricValue id={self.id} execution={self.execution} "
+            f"dimensions={self.dimensions} index_name={self.index_name}>"
+        )
     @classmethod
     def build(  # noqa: PLR0913
         cls,

climate_ref/models/mixins.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""Model mixins for shared functionality"""
+import datetime
+from typing import TYPE_CHECKING, ClassVar
+from loguru import logger
+from sqlalchemy import Column, Text, func
+from sqlalchemy.orm import Mapped, mapped_column
+if TYPE_CHECKING:
+    from climate_ref_core.pycmec.controlled_vocabulary import CV, Dimension
+class CreatedUpdatedMixin:
+    """
+    Mixin for models that have a created_at and updated_at fields
+    """
+    created_at: Mapped[datetime.datetime] = mapped_column(server_default=func.now())
+    """
+    When the dataset was added to the database
+    """
+    updated_at: Mapped[datetime.datetime] = mapped_column(
+        server_default=func.now(), onupdate=func.now(), index=True
+    )
+    """
+    When the dataset was updated.
+    """
+class DimensionMixin:
+    """
+    Mixin that adds controlled vocabulary dimension support to a model
+    This mixin provides methods and properties for managing CV dimensions
+    on database models. Dimensions are stored as individual indexed columns
+    that are registered at runtime based on the controlled vocabulary.
+    Classes using this mixin must:
+    - Define _cv_dimensions as a ClassVar[list[str]] = []
+    - Have a __tablename__ attribute (SQLAlchemy requirement)
+    """
+    _cv_dimensions: ClassVar[list[str]]
+    @property
+    def dimensions(self) -> dict[str, str]:
+        """
+        Get the non-null dimensions and their values
+        Any changes to the resulting dictionary are not reflected in the object
+        Returns
+        -------
+            Collection of dimension names and their values
+        """
+        dims = {}
+        for key in self._cv_dimensions:
+            value = getattr(self, key)
+            if value is not None:
+                dims[key] = value
+        return dims
+    @staticmethod
+    def build_dimension_column(dimension: "Dimension") -> Column[str]:
+        """
+        Create a column representing a CV dimension
+        These columns are not automatically generated with alembic revisions.
+        Any changes to this functionality likely require a manual database migration
+        of the existing columns.
+        Parameters
+        ----------
+        dimension
+            Dimension definition to create the column for.
+            Currently only the "name" field is being used.
+        Returns
+        -------
+            An instance of a sqlalchemy Column
+            This doesn't create the column in the database,
+            but enables the ORM to access it.
+        """
+        return Column(
+            dimension.name,
+            Text,
+            index=True,
+            nullable=True,
+            info={"skip_autogenerate": True},
+        )
+    @classmethod
+    def register_cv_dimensions(cls, cv: "CV") -> None:
+        """
+        Register the dimensions supplied in the controlled vocabulary
+        This has to be done at run-time to support custom CVs.
+        Any extra columns already in the database, but not in the CV are ignored.
+        Parameters
+        ----------
+        cv
+            Controlled vocabulary being used by the application.
+            This controlled vocabulary contains the definitions of the dimensions that can be used.
+        """
+        model_name = cls.__name__
+        for dimension in cv.dimensions:
+            target_attribute = dimension.name
+            if target_attribute in cls._cv_dimensions:
+                continue
+            cls._cv_dimensions.append(target_attribute)
+            logger.debug(f"Registered {model_name} dimension: {target_attribute}")
+            if hasattr(cls, target_attribute):
+                # This should only occur in test suite as we don't support removing dimensions at runtime
+                logger.warning(f"Column attribute already exists on {model_name}. Ignoring")
+            else:
+                setattr(cls, target_attribute, cls.build_dimension_column(dimension))
+    @classmethod
+    def _reset_cv_dimensions(cls) -> None:
+        """
+        Remove any previously registered dimensions
+        Used by the test suite and should not be called at runtime.
+        This doesn't remove any previous column definitions due to a limitation that columns in
+        declarative classes cannot be removed.
+        This means that `hasattr(cls, "old_attribute")`
+        will still return True after resetting, but the values will not be included in any executions.
+        """
+        model_name = cls.__name__
+        logger.warning(f"Removing {model_name} dimensions: {cls._cv_dimensions}")
+        keys = list(cls._cv_dimensions)
+        for key in keys:
+            cls._cv_dimensions.remove(key)
+        assert not len(cls._cv_dimensions)

climate_ref/models/provider.py CHANGED Viewed

@@ -2,7 +2,8 @@ from typing import TYPE_CHECKING
 from sqlalchemy.orm import Mapped, mapped_column, relationship
-from climate_ref.models.base import Base, CreatedUpdatedMixin
+from .base import Base
+from .mixins import CreatedUpdatedMixin
 if TYPE_CHECKING:
     from climate_ref.models.diagnostic import Diagnostic

climate_ref/provider_registry.py CHANGED Viewed

@@ -14,6 +14,8 @@ from loguru import logger
 from climate_ref.config import Config
 from climate_ref.database import Database
+from climate_ref.models import Diagnostic as DiagnosticModel
+from climate_ref.models import Provider as ProviderModel
 from climate_ref_core.diagnostics import Diagnostic
 from climate_ref_core.providers import DiagnosticProvider, import_provider
@@ -29,10 +31,8 @@ def _register_provider(db: Database, provider: DiagnosticProvider) -> None:
     provider
         DiagnosticProvider instance
     """
-    from climate_ref.models import Diagnostic, Provider
     provider_model, created = db.get_or_create(
-        Provider,
+        ProviderModel,
         slug=provider.slug,
         defaults={
             "name": provider.name,
@@ -46,7 +46,7 @@ def _register_provider(db: Database, provider: DiagnosticProvider) -> None:
     for diagnostic in provider.diagnostics():
         diagnostic_model, created = db.get_or_create(
-            Diagnostic,
+            DiagnosticModel,
             slug=diagnostic.slug,
             provider_id=provider_model.id,
             defaults={

climate_ref/slurm.py CHANGED Viewed

@@ -10,14 +10,14 @@ class SlurmChecker:
     def __init__(self, intest: bool = False) -> None:
         if HAS_REAL_SLURM:
-            import pyslurm  # type: ignore
+            import pyslurm  # type: ignore # noqa: PLC0415
             self.slurm_association: dict[int, Any] | None = pyslurm.db.Associations.load()
             self.slurm_partition: dict[str, Any] | None = pyslurm.Partitions.load()
             self.slurm_qos: dict[str, Any] | None = pyslurm.qos().get()
             self.slurm_node: dict[str, Any] | None = pyslurm.Nodes.load()
         elif intest:
-            import pyslurm
+            import pyslurm  # noqa: PLC0415
             self.slurm_association = pyslurm.db.Associations.load()  # dict [num -> Association]
             self.slurm_partition = pyslurm.Partitions.load()  # collection

climate-ref 0.6.6__py3-none-any.whl → 0.8.0__py3-none-any.whl

climate-ref 0.6.6py3-none-any.whl → 0.8.0py3-none-any.whl