PyPI - climate-ref - Versions diffs - 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

climate-ref 0.6.5py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

climate_ref/cli/__init__.py +12 -3
climate_ref/cli/_utils.py +56 -2
climate_ref/cli/datasets.py +48 -9
climate_ref/cli/executions.py +351 -24
climate_ref/cli/providers.py +1 -2
climate_ref/config.py +4 -4
climate_ref/database.py +62 -4
climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
climate_ref/dataset_registry/sample_data.txt +269 -107
climate_ref/datasets/__init__.py +3 -3
climate_ref/datasets/base.py +121 -20
climate_ref/datasets/cmip6.py +2 -0
climate_ref/datasets/obs4mips.py +26 -15
climate_ref/executor/__init__.py +8 -1
climate_ref/executor/hpc.py +7 -1
climate_ref/executor/result_handling.py +151 -64
climate_ref/migrations/env.py +12 -10
climate_ref/migrations/versions/2025-07-20T1521_94beace57a9c_cmip6_finalised.py +1 -1
climate_ref/migrations/versions/2025-08-05T0327_a1b2c3d4e5f6_finalised_on_base_dataset.py +1 -1
climate_ref/migrations/versions/2025-09-05T2019_8d28e5e0f9c3_add_indexes.py +108 -0
climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
climate_ref/models/__init__.py +1 -6
climate_ref/models/base.py +4 -18
climate_ref/models/dataset.py +10 -6
climate_ref/models/diagnostic.py +2 -1
climate_ref/models/execution.py +225 -12
climate_ref/models/metric_value.py +27 -112
climate_ref/models/mixins.py +144 -0
climate_ref/models/provider.py +2 -1
climate_ref/provider_registry.py +4 -4
climate_ref/slurm.py +2 -2
climate_ref/testing.py +1 -1
{climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/METADATA +2 -2
climate_ref-0.7.0.dist-info/RECORD +58 -0
climate_ref-0.6.5.dist-info/RECORD +0 -54
{climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/WHEEL +0 -0
{climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/entry_points.txt +0 -0
{climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/licenses/LICENCE +0 -0
{climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/licenses/NOTICE +0 -0

climate_ref/migrations/versions/2025-07-20T1521_94beace57a9c_cmip6_finalised.py CHANGED Viewed

@@ -22,7 +22,7 @@ depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
     # ### commands auto generated by Alembic - please adjust! ###
     with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
-        batch_op.add_column(sa.Column("finalised", sa.Boolean(), nullable=False))
+        batch_op.add_column(sa.Column("finalised", sa.Boolean(), nullable=True))
         batch_op.alter_column("experiment", existing_type=sa.VARCHAR(), nullable=True)
         batch_op.alter_column("frequency", existing_type=sa.VARCHAR(), nullable=True)
         batch_op.alter_column("grid", existing_type=sa.VARCHAR(), nullable=True)

climate_ref/migrations/versions/2025-08-05T0327_a1b2c3d4e5f6_finalised_on_base_dataset.py CHANGED Viewed

@@ -49,7 +49,7 @@ def downgrade() -> None:
     # Note: Original migration 94beace57a9c added cmip6_dataset.finalised NOT NULL, with no default.
     with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
         batch_op.add_column(
-            sa.Column("finalised", sa.Boolean(), nullable=False, server_default=sa.text("false"))
+            sa.Column("finalised", sa.Boolean(), nullable=True, server_default=sa.text("false"))
         )
     # Drop base dataset finalised

climate_ref/migrations/versions/2025-09-05T2019_8d28e5e0f9c3_add_indexes.py ADDED Viewed

@@ -0,0 +1,108 @@
+"""add indexes
+Revision ID: 8d28e5e0f9c3
+Revises: ba5e
+Create Date: 2025-09-05 20:19:18.311472
+"""
+from collections.abc import Sequence
+from typing import Union
+from alembic import op
+# revision identifiers, used by Alembic.
+revision: str = "8d28e5e0f9c3"
+down_revision: Union[str, None] = "ba5e"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_cmip6_dataset_experiment_id"), ["experiment_id"], unique=False)
+        batch_op.create_index(batch_op.f("ix_cmip6_dataset_instance_id"), ["instance_id"], unique=False)
+        batch_op.create_index(batch_op.f("ix_cmip6_dataset_member_id"), ["member_id"], unique=False)
+        batch_op.create_index(batch_op.f("ix_cmip6_dataset_source_id"), ["source_id"], unique=False)
+    with op.batch_alter_table("dataset", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_dataset_dataset_type"), ["dataset_type"], unique=False)
+    with op.batch_alter_table("dataset_file", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_dataset_file_dataset_id"), ["dataset_id"], unique=False)
+    with op.batch_alter_table("diagnostic", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_diagnostic_updated_at"), ["updated_at"], unique=False)
+    with op.batch_alter_table("execution", schema=None) as batch_op:
+        batch_op.create_index(
+            batch_op.f("ix_execution_execution_group_id"), ["execution_group_id"], unique=False
+        )
+        batch_op.create_index(batch_op.f("ix_execution_successful"), ["successful"], unique=False)
+        batch_op.create_index(batch_op.f("ix_execution_updated_at"), ["updated_at"], unique=False)
+    with op.batch_alter_table("execution_dataset", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_execution_dataset_dataset_id"), ["dataset_id"], unique=False)
+        batch_op.create_index(batch_op.f("ix_execution_dataset_execution_id"), ["execution_id"], unique=False)
+    with op.batch_alter_table("execution_group", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_execution_group_diagnostic_id"), ["diagnostic_id"], unique=False)
+        batch_op.create_index(batch_op.f("ix_execution_group_updated_at"), ["updated_at"], unique=False)
+    with op.batch_alter_table("execution_output", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_execution_output_updated_at"), ["updated_at"], unique=False)
+    with op.batch_alter_table("metric_value", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_metric_value_execution_id"), ["execution_id"], unique=False)
+        batch_op.create_index(batch_op.f("ix_metric_value_type"), ["type"], unique=False)
+        batch_op.create_index(batch_op.f("ix_metric_value_updated_at"), ["updated_at"], unique=False)
+    with op.batch_alter_table("provider", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_provider_updated_at"), ["updated_at"], unique=False)
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("provider", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_provider_updated_at"))
+    with op.batch_alter_table("metric_value", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_metric_value_updated_at"))
+        batch_op.drop_index(batch_op.f("ix_metric_value_type"))
+        batch_op.drop_index(batch_op.f("ix_metric_value_execution_id"))
+    with op.batch_alter_table("execution_output", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_execution_output_updated_at"))
+    with op.batch_alter_table("execution_group", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_execution_group_updated_at"))
+        batch_op.drop_index(batch_op.f("ix_execution_group_diagnostic_id"))
+    with op.batch_alter_table("execution_dataset", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_execution_dataset_execution_id"))
+        batch_op.drop_index(batch_op.f("ix_execution_dataset_dataset_id"))
+    with op.batch_alter_table("execution", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_execution_updated_at"))
+        batch_op.drop_index(batch_op.f("ix_execution_successful"))
+        batch_op.drop_index(batch_op.f("ix_execution_execution_group_id"))
+    with op.batch_alter_table("diagnostic", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_diagnostic_updated_at"))
+    with op.batch_alter_table("dataset_file", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_dataset_file_dataset_id"))
+    with op.batch_alter_table("dataset", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_dataset_dataset_type"))
+    with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_cmip6_dataset_source_id"))
+        batch_op.drop_index(batch_op.f("ix_cmip6_dataset_member_id"))
+        batch_op.drop_index(batch_op.f("ix_cmip6_dataset_instance_id"))
+        batch_op.drop_index(batch_op.f("ix_cmip6_dataset_experiment_id"))
+    # ### end Alembic commands ###

climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""use 'version' as version facet for obs4MIPs
+Revision ID: 2f6e36738e06
+Revises: 8d28e5e0f9c3
+Create Date: 2025-09-10 13:58:40.660076
+"""
+from collections.abc import Sequence
+from typing import Union
+import sqlalchemy as sa
+from alembic import op
+# revision identifiers, used by Alembic.
+revision: str = "2f6e36738e06"
+down_revision: Union[str, None] = "8d28e5e0f9c3"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("obs4mips_dataset", schema=None) as batch_op:
+        batch_op.add_column(sa.Column("version", sa.String(), nullable=False))
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("obs4mips_dataset", schema=None) as batch_op:
+        batch_op.drop_column("version")
+    # ### end Alembic commands ###

climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""add pmp version
+Revision ID: 20cd136a5b04
+Revises: 2f6e36738e06
+Create Date: 2025-09-22 23:59:42.724007
+"""
+from collections.abc import Sequence
+from typing import Union
+import sqlalchemy as sa
+from alembic import op
+# revision identifiers, used by Alembic.
+revision: str = "20cd136a5b04"
+down_revision: Union[str, None] = "2f6e36738e06"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("pmp_climatology_dataset", schema=None) as batch_op:
+        batch_op.add_column(sa.Column("version", sa.String(), nullable=False))
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("pmp_climatology_dataset", schema=None) as batch_op:
+        batch_op.drop_column("version")
+    # ### end Alembic commands ###

climate_ref/models/__init__.py CHANGED Viewed

@@ -4,9 +4,7 @@ Declaration of the models used by the REF.
 These models are used to represent the data that is stored in the database.
 """
-from typing import TypeVar
-from climate_ref.models.base import Base
+from climate_ref.models.base import Base, Table
 from climate_ref.models.dataset import Dataset
 from climate_ref.models.diagnostic import Diagnostic
 from climate_ref.models.execution import (
@@ -17,9 +15,6 @@ from climate_ref.models.execution import (
 from climate_ref.models.metric_value import MetricValue, ScalarMetricValue, SeriesMetricValue
 from climate_ref.models.provider import Provider
-Table = TypeVar("Table", bound=Base)
 __all__ = [
     "Base",
     "Dataset",

climate_ref/models/base.py CHANGED Viewed

@@ -1,8 +1,7 @@
-import datetime
-from typing import Any
+from typing import Any, TypeVar
-from sqlalchemy import JSON, MetaData, func
-from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
+from sqlalchemy import JSON, MetaData
+from sqlalchemy.orm import DeclarativeBase
 class Base(DeclarativeBase):
@@ -28,17 +27,4 @@ class Base(DeclarativeBase):
     )
-class CreatedUpdatedMixin:
-    """
-    Mixin for models that have a created_at and updated_at fields
-    """
-    created_at: Mapped[datetime.datetime] = mapped_column(server_default=func.now())
-    """
-    When the dataset was added to the database
-    """
-    updated_at: Mapped[datetime.datetime] = mapped_column(server_default=func.now(), onupdate=func.now())
-    """
-    When the dataset was updated.
-    """
+Table = TypeVar("Table", bound=Base)

climate_ref/models/dataset.py CHANGED Viewed

@@ -30,7 +30,7 @@ class Dataset(Base):
     In the case of CMIP6 datasets, this is the instance_id.
     """
-    dataset_type: Mapped[SourceDatasetType] = mapped_column(nullable=False)
+    dataset_type: Mapped[SourceDatasetType] = mapped_column(nullable=False, index=True)
     """
     Type of dataset
     """
@@ -73,7 +73,9 @@ class DatasetFile(Base):
     __tablename__ = "dataset_file"
     id: Mapped[int] = mapped_column(primary_key=True)
-    dataset_id: Mapped[int] = mapped_column(ForeignKey("dataset.id", ondelete="CASCADE"), nullable=False)
+    dataset_id: Mapped[int] = mapped_column(
+        ForeignKey("dataset.id", ondelete="CASCADE"), nullable=False, index=True
+    )
     """
     Foreign key to the dataset table
     """
@@ -111,13 +113,13 @@ class CMIP6Dataset(Dataset):
     branch_time_in_child: Mapped[float] = mapped_column(nullable=True)
     branch_time_in_parent: Mapped[float] = mapped_column(nullable=True)
     experiment: Mapped[str] = mapped_column(nullable=True)
-    experiment_id: Mapped[str] = mapped_column()
+    experiment_id: Mapped[str] = mapped_column(index=True)
     frequency: Mapped[str] = mapped_column(nullable=True)
     grid: Mapped[str] = mapped_column(nullable=True)
     grid_label: Mapped[str] = mapped_column()
     institution_id: Mapped[str] = mapped_column()
     long_name: Mapped[str] = mapped_column(nullable=True)
-    member_id: Mapped[str] = mapped_column()
+    member_id: Mapped[str] = mapped_column(index=True)
     nominal_resolution: Mapped[str] = mapped_column(nullable=True)
     parent_activity_id: Mapped[str] = mapped_column(nullable=True)
     parent_experiment_id: Mapped[str] = mapped_column(nullable=True)
@@ -126,7 +128,7 @@ class CMIP6Dataset(Dataset):
     parent_variant_label: Mapped[str] = mapped_column(nullable=True)
     realm: Mapped[str] = mapped_column(nullable=True)
     product: Mapped[str] = mapped_column(nullable=True)
-    source_id: Mapped[str] = mapped_column()
+    source_id: Mapped[str] = mapped_column(index=True)
     standard_name: Mapped[str] = mapped_column(nullable=True)
     source_type: Mapped[str] = mapped_column(nullable=True)
     sub_experiment: Mapped[str] = mapped_column(nullable=True)
@@ -138,7 +140,7 @@ class CMIP6Dataset(Dataset):
     vertical_levels: Mapped[int] = mapped_column(nullable=True)
     version: Mapped[str] = mapped_column()
-    instance_id: Mapped[str] = mapped_column()
+    instance_id: Mapped[str] = mapped_column(index=True)
     """
     Unique identifier for the dataset (including the version).
     """
@@ -170,6 +172,7 @@ class Obs4MIPsDataset(Dataset):
     units: Mapped[str] = mapped_column()
     variable_id: Mapped[str] = mapped_column()
     variant_label: Mapped[str] = mapped_column()
+    version: Mapped[str] = mapped_column()
     vertical_levels: Mapped[int] = mapped_column()
     source_version_number: Mapped[str] = mapped_column()
@@ -204,6 +207,7 @@ class PMPClimatologyDataset(Dataset):
     units: Mapped[str] = mapped_column()
     variable_id: Mapped[str] = mapped_column()
     variant_label: Mapped[str] = mapped_column()
+    version: Mapped[str] = mapped_column()
     vertical_levels: Mapped[int] = mapped_column()
     source_version_number: Mapped[str] = mapped_column()

climate_ref/models/diagnostic.py CHANGED Viewed

@@ -3,7 +3,8 @@ from typing import TYPE_CHECKING
 from sqlalchemy import ForeignKey, UniqueConstraint
 from sqlalchemy.orm import Mapped, mapped_column, relationship
-from climate_ref.models.base import Base, CreatedUpdatedMixin
+from climate_ref.models.base import Base
+from climate_ref.models.mixins import CreatedUpdatedMixin
 if TYPE_CHECKING:
     from climate_ref.models.execution import ExecutionGroup

climate_ref/models/execution.py CHANGED Viewed

@@ -1,19 +1,22 @@
 import enum
 import pathlib
-from typing import TYPE_CHECKING, Any
+from collections.abc import Sequence
+from typing import TYPE_CHECKING, Any, ClassVar
 from loguru import logger
-from sqlalchemy import Column, ForeignKey, Table, UniqueConstraint, func
+from sqlalchemy import Column, ForeignKey, Table, UniqueConstraint, func, or_
 from sqlalchemy.orm import Mapped, Session, mapped_column, relationship
 from sqlalchemy.orm.query import RowReturningQuery
-from climate_ref.models import Dataset
-from climate_ref.models.base import Base, CreatedUpdatedMixin
+from climate_ref.models.base import Base
+from climate_ref.models.dataset import Dataset
+from climate_ref.models.diagnostic import Diagnostic
+from climate_ref.models.mixins import CreatedUpdatedMixin, DimensionMixin
+from climate_ref.models.provider import Provider
 from climate_ref_core.datasets import ExecutionDatasetCollection
 if TYPE_CHECKING:
     from climate_ref.database import Database
-    from climate_ref.models.diagnostic import Diagnostic
     from climate_ref.models.metric_value import MetricValue
@@ -40,7 +43,7 @@ class ExecutionGroup(CreatedUpdatedMixin, Base):
     id: Mapped[int] = mapped_column(primary_key=True)
-    diagnostic_id: Mapped[int] = mapped_column(ForeignKey("diagnostic.id"))
+    diagnostic_id: Mapped[int] = mapped_column(ForeignKey("diagnostic.id"), index=True)
     """
     The diagnostic that this execution group belongs to
     """
@@ -103,8 +106,8 @@ class ExecutionGroup(CreatedUpdatedMixin, Base):
 execution_datasets = Table(
     "execution_dataset",
     Base.metadata,
-    Column("execution_id", ForeignKey("execution.id")),
-    Column("dataset_id", ForeignKey("dataset.id")),
+    Column("execution_id", ForeignKey("execution.id"), index=True),
+    Column("dataset_id", ForeignKey("dataset.id"), index=True),
 )
@@ -136,7 +139,8 @@ class Execution(CreatedUpdatedMixin, Base):
         ForeignKey(
             "execution_group.id",
             name="fk_execution_id",
-        )
+        ),
+        index=True,
     )
     """
     The execution group that this execution belongs to
@@ -149,7 +153,7 @@ class Execution(CreatedUpdatedMixin, Base):
     This is used to verify if an existing diagnostic execution has been run with the same datasets.
     """
-    successful: Mapped[bool] = mapped_column(nullable=True)
+    successful: Mapped[bool] = mapped_column(nullable=True, index=True)
     """
     Was the run successful
     """
@@ -216,16 +220,21 @@ class ResultOutputType(enum.Enum):
     HTML = "html"
-class ExecutionOutput(CreatedUpdatedMixin, Base):
+class ExecutionOutput(DimensionMixin, CreatedUpdatedMixin, Base):
     """
     An output generated as part of an execution.
     This output may be a plot, data file or HTML file.
-    These outputs are defined in the CMEC output bundle
+    These outputs are defined in the CMEC output bundle.
+    Outputs can be tagged with dimensions from the controlled vocabulary
+    to enable filtering and organization.
     """
     __tablename__ = "execution_output"
+    _cv_dimensions: ClassVar[list[str]] = []
     id: Mapped[int] = mapped_column(primary_key=True)
     execution_id: Mapped[int] = mapped_column(ForeignKey("execution.id"), index=True)
@@ -263,6 +272,65 @@ class ExecutionOutput(CreatedUpdatedMixin, Base):
     execution: Mapped["Execution"] = relationship(back_populates="outputs")
+    @classmethod
+    def build(  # noqa: PLR0913
+        cls,
+        *,
+        execution_id: int,
+        output_type: ResultOutputType,
+        dimensions: dict[str, str],
+        filename: str | None = None,
+        short_name: str | None = None,
+        long_name: str | None = None,
+        description: str | None = None,
+    ) -> "ExecutionOutput":
+        """
+        Build an ExecutionOutput from dimensions and metadata
+        This is a helper method that validates the dimensions supplied.
+        Parameters
+        ----------
+        execution_id
+            Execution that created the output
+        output_type
+            Type of the output
+        dimensions
+            Dimensions that describe the output
+        filename
+            Path to the output
+        short_name
+            Short key of the output
+        long_name
+            Human readable name
+        description
+            Long description
+        Raises
+        ------
+        KeyError
+            If an unknown dimension was supplied.
+            Dimensions must exist in the controlled vocabulary.
+        Returns
+        -------
+            Newly created ExecutionOutput
+        """
+        for k in dimensions:
+            if k not in cls._cv_dimensions:
+                raise KeyError(f"Unknown dimension column '{k}'")
+        return ExecutionOutput(
+            execution_id=execution_id,
+            output_type=output_type,
+            filename=filename,
+            short_name=short_name,
+            long_name=long_name,
+            description=description,
+            **dimensions,
+        )
 def get_execution_group_and_latest(
     session: Session,
@@ -304,3 +372,148 @@ def get_execution_group_and_latest(
     )
     return query  # type: ignore
+def _filter_executions_by_facets(
+    results: Sequence[tuple[ExecutionGroup, Execution | None]],
+    facet_filters: dict[str, str],
+) -> list[tuple[ExecutionGroup, Execution | None]]:
+    """
+    Filter execution groups and their latest executions based on facet key-value pairs.
+    This is a relatively expensive operation as it requires iterating over all results.
+    This should be replaced once we have normalised the selectors into a separate table.
+    Parameters
+    ----------
+    results
+        List of tuples containing ExecutionGroup and its latest Execution (or None)
+    facet_filters
+        Dictionary of facet key-value pairs to filter by (AND logic, exact match)
+    Returns
+    -------
+        Filtered list of tuples containing ExecutionGroup and its latest Execution (or None)
+    Notes
+    -----
+    - Facet filters can either be key=value (searches all dataset types)
+      or dataset_type.key=value (searches specific dataset type)
+    - Key=value filters search across all dataset types
+    - dataset_type.key=value filters only search within the specified dataset type
+    - Multiple values within same filter type use OR logic
+    - All specified facets must match for an execution group to be included (AND logic)
+    """
+    filtered_results = []
+    for eg, execution in results:
+        all_filters_match = True
+        for facet_key, facet_value in facet_filters.items():
+            filter_match = False
+            if "." in facet_key:
+                # Handle dataset_type.key=value format
+                dataset_type, key = facet_key.split(".", 1)
+                if dataset_type in eg.selectors:
+                    if [key, facet_value] in eg.selectors[dataset_type]:
+                        filter_match = True
+                        break
+            else:
+                # Handle key=value format (search across all dataset types)
+                for ds_type_selectors in eg.selectors.values():
+                    if [facet_key, facet_value] in ds_type_selectors:
+                        filter_match = True
+                        break
+            if not filter_match:
+                all_filters_match = False
+                break
+        if all_filters_match:
+            filtered_results.append((eg, execution))
+    return filtered_results
+def get_execution_group_and_latest_filtered(  # noqa: PLR0913
+    session: Session,
+    diagnostic_filters: list[str] | None = None,
+    provider_filters: list[str] | None = None,
+    facet_filters: dict[str, str] | None = None,
+    dirty: bool | None = None,
+    successful: bool | None = None,
+) -> list[tuple[ExecutionGroup, Execution | None]]:
+    """
+    Query execution groups with filtering capabilities.
+    Parameters
+    ----------
+    session
+        Database session
+    diagnostic_filters
+        List of diagnostic slug substrings (OR logic, case-insensitive)
+    provider_filters
+        List of provider slug substrings (OR logic, case-insensitive)
+    facet_filters
+        Dictionary of facet key-value pairs (AND logic, exact match)
+    dirty
+        If True, only return dirty execution groups.
+        If False, only return clean execution groups.
+        If None, do not filter by dirty status.
+    successful
+        If True, only return execution groups whose latest execution was successful.
+        If False, only return execution groups whose latest execution was unsuccessful or has no executions.
+        If None, do not filter by execution success.
+    Returns
+    -------
+        Query returning tuples of (ExecutionGroup, latest Execution or None)
+    Notes
+    -----
+    - Diagnostic and provider filters use substring matching (case-insensitive)
+    - Multiple values within same filter type use OR logic
+    - Different filter types use AND logic
+    - Facet filters can either be key=value (searches all dataset types)
+      or dataset_type.key=value (searches specific dataset type)
+    """
+    # Start with base query
+    query = get_execution_group_and_latest(session)
+    if diagnostic_filters or provider_filters:
+        # Join through to the Diagnostic table
+        query = query.join(Diagnostic, ExecutionGroup.diagnostic_id == Diagnostic.id)
+    # Apply diagnostic filter (OR logic for multiple values)
+    if diagnostic_filters:
+        diagnostic_conditions = [
+            Diagnostic.slug.ilike(f"%{filter_value.lower()}%") for filter_value in diagnostic_filters
+        ]
+        query = query.filter(or_(*diagnostic_conditions))
+    # Apply provider filter (OR logic for multiple values)
+    if provider_filters:
+        # Need to join through Diagnostic to Provider
+        query = query.join(Provider, Diagnostic.provider_id == Provider.id)
+        provider_conditions = [
+            Provider.slug.ilike(f"%{filter_value.lower()}%") for filter_value in provider_filters
+        ]
+        query = query.filter(or_(*provider_conditions))
+    if successful is not None:
+        if successful:
+            query = query.filter(Execution.successful.is_(True))
+        else:
+            query = query.filter(or_(Execution.successful.is_(False), Execution.successful.is_(None)))
+    if dirty is not None:
+        if dirty:
+            query = query.filter(ExecutionGroup.dirty.is_(True))
+        else:
+            query = query.filter(or_(ExecutionGroup.dirty.is_(False), ExecutionGroup.dirty.is_(None)))
+    if facet_filters:
+        # Load all results into memory for Python-based filtering
+        # TODO: Update once we have normalised the selector
+        results = [r._tuple() for r in query.all()]
+        return _filter_executions_by_facets(results, facet_filters)
+    else:
+        return [r._tuple() for r in query.all()]

climate-ref 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl

climate-ref 0.6.5py3-none-any.whl → 0.7.0py3-none-any.whl