climate-ref 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. climate_ref/cli/__init__.py +12 -3
  2. climate_ref/cli/_utils.py +56 -2
  3. climate_ref/cli/datasets.py +48 -9
  4. climate_ref/cli/executions.py +351 -24
  5. climate_ref/cli/providers.py +1 -2
  6. climate_ref/config.py +4 -4
  7. climate_ref/database.py +62 -4
  8. climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
  9. climate_ref/dataset_registry/sample_data.txt +269 -107
  10. climate_ref/datasets/__init__.py +3 -3
  11. climate_ref/datasets/base.py +121 -20
  12. climate_ref/datasets/cmip6.py +2 -0
  13. climate_ref/datasets/obs4mips.py +26 -15
  14. climate_ref/executor/__init__.py +8 -1
  15. climate_ref/executor/hpc.py +7 -1
  16. climate_ref/executor/result_handling.py +151 -64
  17. climate_ref/migrations/env.py +12 -10
  18. climate_ref/migrations/versions/2025-07-20T1521_94beace57a9c_cmip6_finalised.py +1 -1
  19. climate_ref/migrations/versions/2025-08-05T0327_a1b2c3d4e5f6_finalised_on_base_dataset.py +1 -1
  20. climate_ref/migrations/versions/2025-09-05T2019_8d28e5e0f9c3_add_indexes.py +108 -0
  21. climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
  22. climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
  23. climate_ref/models/__init__.py +1 -6
  24. climate_ref/models/base.py +4 -18
  25. climate_ref/models/dataset.py +10 -6
  26. climate_ref/models/diagnostic.py +2 -1
  27. climate_ref/models/execution.py +225 -12
  28. climate_ref/models/metric_value.py +27 -112
  29. climate_ref/models/mixins.py +144 -0
  30. climate_ref/models/provider.py +2 -1
  31. climate_ref/provider_registry.py +4 -4
  32. climate_ref/slurm.py +2 -2
  33. climate_ref/testing.py +1 -1
  34. {climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/METADATA +2 -2
  35. climate_ref-0.7.0.dist-info/RECORD +58 -0
  36. climate_ref-0.6.5.dist-info/RECORD +0 -54
  37. {climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/WHEEL +0 -0
  38. {climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/entry_points.txt +0 -0
  39. {climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/licenses/LICENCE +0 -0
  40. {climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/licenses/NOTICE +0 -0
@@ -22,7 +22,7 @@ depends_on: Union[str, Sequence[str], None] = None
22
22
  def upgrade() -> None:
23
23
  # ### commands auto generated by Alembic - please adjust! ###
24
24
  with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
25
- batch_op.add_column(sa.Column("finalised", sa.Boolean(), nullable=False))
25
+ batch_op.add_column(sa.Column("finalised", sa.Boolean(), nullable=True))
26
26
  batch_op.alter_column("experiment", existing_type=sa.VARCHAR(), nullable=True)
27
27
  batch_op.alter_column("frequency", existing_type=sa.VARCHAR(), nullable=True)
28
28
  batch_op.alter_column("grid", existing_type=sa.VARCHAR(), nullable=True)
@@ -49,7 +49,7 @@ def downgrade() -> None:
49
49
  # Note: Original migration 94beace57a9c added cmip6_dataset.finalised NOT NULL, with no default.
50
50
  with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
51
51
  batch_op.add_column(
52
- sa.Column("finalised", sa.Boolean(), nullable=False, server_default=sa.text("false"))
52
+ sa.Column("finalised", sa.Boolean(), nullable=True, server_default=sa.text("false"))
53
53
  )
54
54
 
55
55
  # Drop base dataset finalised
@@ -0,0 +1,108 @@
1
+ """add indexes
2
+
3
+ Revision ID: 8d28e5e0f9c3
4
+ Revises: ba5e
5
+ Create Date: 2025-09-05 20:19:18.311472
6
+
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+ from typing import Union
11
+
12
+ from alembic import op
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = "8d28e5e0f9c3"
16
+ down_revision: Union[str, None] = "ba5e"
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
24
+ batch_op.create_index(batch_op.f("ix_cmip6_dataset_experiment_id"), ["experiment_id"], unique=False)
25
+ batch_op.create_index(batch_op.f("ix_cmip6_dataset_instance_id"), ["instance_id"], unique=False)
26
+ batch_op.create_index(batch_op.f("ix_cmip6_dataset_member_id"), ["member_id"], unique=False)
27
+ batch_op.create_index(batch_op.f("ix_cmip6_dataset_source_id"), ["source_id"], unique=False)
28
+
29
+ with op.batch_alter_table("dataset", schema=None) as batch_op:
30
+ batch_op.create_index(batch_op.f("ix_dataset_dataset_type"), ["dataset_type"], unique=False)
31
+
32
+ with op.batch_alter_table("dataset_file", schema=None) as batch_op:
33
+ batch_op.create_index(batch_op.f("ix_dataset_file_dataset_id"), ["dataset_id"], unique=False)
34
+
35
+ with op.batch_alter_table("diagnostic", schema=None) as batch_op:
36
+ batch_op.create_index(batch_op.f("ix_diagnostic_updated_at"), ["updated_at"], unique=False)
37
+
38
+ with op.batch_alter_table("execution", schema=None) as batch_op:
39
+ batch_op.create_index(
40
+ batch_op.f("ix_execution_execution_group_id"), ["execution_group_id"], unique=False
41
+ )
42
+ batch_op.create_index(batch_op.f("ix_execution_successful"), ["successful"], unique=False)
43
+ batch_op.create_index(batch_op.f("ix_execution_updated_at"), ["updated_at"], unique=False)
44
+
45
+ with op.batch_alter_table("execution_dataset", schema=None) as batch_op:
46
+ batch_op.create_index(batch_op.f("ix_execution_dataset_dataset_id"), ["dataset_id"], unique=False)
47
+ batch_op.create_index(batch_op.f("ix_execution_dataset_execution_id"), ["execution_id"], unique=False)
48
+
49
+ with op.batch_alter_table("execution_group", schema=None) as batch_op:
50
+ batch_op.create_index(batch_op.f("ix_execution_group_diagnostic_id"), ["diagnostic_id"], unique=False)
51
+ batch_op.create_index(batch_op.f("ix_execution_group_updated_at"), ["updated_at"], unique=False)
52
+
53
+ with op.batch_alter_table("execution_output", schema=None) as batch_op:
54
+ batch_op.create_index(batch_op.f("ix_execution_output_updated_at"), ["updated_at"], unique=False)
55
+
56
+ with op.batch_alter_table("metric_value", schema=None) as batch_op:
57
+ batch_op.create_index(batch_op.f("ix_metric_value_execution_id"), ["execution_id"], unique=False)
58
+ batch_op.create_index(batch_op.f("ix_metric_value_type"), ["type"], unique=False)
59
+ batch_op.create_index(batch_op.f("ix_metric_value_updated_at"), ["updated_at"], unique=False)
60
+
61
+ with op.batch_alter_table("provider", schema=None) as batch_op:
62
+ batch_op.create_index(batch_op.f("ix_provider_updated_at"), ["updated_at"], unique=False)
63
+
64
+ # ### end Alembic commands ###
65
+
66
+
67
+ def downgrade() -> None:
68
+ # ### commands auto generated by Alembic - please adjust! ###
69
+ with op.batch_alter_table("provider", schema=None) as batch_op:
70
+ batch_op.drop_index(batch_op.f("ix_provider_updated_at"))
71
+
72
+ with op.batch_alter_table("metric_value", schema=None) as batch_op:
73
+ batch_op.drop_index(batch_op.f("ix_metric_value_updated_at"))
74
+ batch_op.drop_index(batch_op.f("ix_metric_value_type"))
75
+ batch_op.drop_index(batch_op.f("ix_metric_value_execution_id"))
76
+
77
+ with op.batch_alter_table("execution_output", schema=None) as batch_op:
78
+ batch_op.drop_index(batch_op.f("ix_execution_output_updated_at"))
79
+
80
+ with op.batch_alter_table("execution_group", schema=None) as batch_op:
81
+ batch_op.drop_index(batch_op.f("ix_execution_group_updated_at"))
82
+ batch_op.drop_index(batch_op.f("ix_execution_group_diagnostic_id"))
83
+
84
+ with op.batch_alter_table("execution_dataset", schema=None) as batch_op:
85
+ batch_op.drop_index(batch_op.f("ix_execution_dataset_execution_id"))
86
+ batch_op.drop_index(batch_op.f("ix_execution_dataset_dataset_id"))
87
+
88
+ with op.batch_alter_table("execution", schema=None) as batch_op:
89
+ batch_op.drop_index(batch_op.f("ix_execution_updated_at"))
90
+ batch_op.drop_index(batch_op.f("ix_execution_successful"))
91
+ batch_op.drop_index(batch_op.f("ix_execution_execution_group_id"))
92
+
93
+ with op.batch_alter_table("diagnostic", schema=None) as batch_op:
94
+ batch_op.drop_index(batch_op.f("ix_diagnostic_updated_at"))
95
+
96
+ with op.batch_alter_table("dataset_file", schema=None) as batch_op:
97
+ batch_op.drop_index(batch_op.f("ix_dataset_file_dataset_id"))
98
+
99
+ with op.batch_alter_table("dataset", schema=None) as batch_op:
100
+ batch_op.drop_index(batch_op.f("ix_dataset_dataset_type"))
101
+
102
+ with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
103
+ batch_op.drop_index(batch_op.f("ix_cmip6_dataset_source_id"))
104
+ batch_op.drop_index(batch_op.f("ix_cmip6_dataset_member_id"))
105
+ batch_op.drop_index(batch_op.f("ix_cmip6_dataset_instance_id"))
106
+ batch_op.drop_index(batch_op.f("ix_cmip6_dataset_experiment_id"))
107
+
108
+ # ### end Alembic commands ###
@@ -0,0 +1,35 @@
1
+ """use 'version' as version facet for obs4MIPs
2
+
3
+ Revision ID: 2f6e36738e06
4
+ Revises: 8d28e5e0f9c3
5
+ Create Date: 2025-09-10 13:58:40.660076
6
+
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+ from typing import Union
11
+
12
+ import sqlalchemy as sa
13
+ from alembic import op
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "2f6e36738e06"
17
+ down_revision: Union[str, None] = "8d28e5e0f9c3"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ with op.batch_alter_table("obs4mips_dataset", schema=None) as batch_op:
25
+ batch_op.add_column(sa.Column("version", sa.String(), nullable=False))
26
+
27
+ # ### end Alembic commands ###
28
+
29
+
30
+ def downgrade() -> None:
31
+ # ### commands auto generated by Alembic - please adjust! ###
32
+ with op.batch_alter_table("obs4mips_dataset", schema=None) as batch_op:
33
+ batch_op.drop_column("version")
34
+
35
+ # ### end Alembic commands ###
@@ -0,0 +1,35 @@
1
+ """add pmp version
2
+
3
+ Revision ID: 20cd136a5b04
4
+ Revises: 2f6e36738e06
5
+ Create Date: 2025-09-22 23:59:42.724007
6
+
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+ from typing import Union
11
+
12
+ import sqlalchemy as sa
13
+ from alembic import op
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "20cd136a5b04"
17
+ down_revision: Union[str, None] = "2f6e36738e06"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ with op.batch_alter_table("pmp_climatology_dataset", schema=None) as batch_op:
25
+ batch_op.add_column(sa.Column("version", sa.String(), nullable=False))
26
+
27
+ # ### end Alembic commands ###
28
+
29
+
30
+ def downgrade() -> None:
31
+ # ### commands auto generated by Alembic - please adjust! ###
32
+ with op.batch_alter_table("pmp_climatology_dataset", schema=None) as batch_op:
33
+ batch_op.drop_column("version")
34
+
35
+ # ### end Alembic commands ###
@@ -4,9 +4,7 @@ Declaration of the models used by the REF.
4
4
  These models are used to represent the data that is stored in the database.
5
5
  """
6
6
 
7
- from typing import TypeVar
8
-
9
- from climate_ref.models.base import Base
7
+ from climate_ref.models.base import Base, Table
10
8
  from climate_ref.models.dataset import Dataset
11
9
  from climate_ref.models.diagnostic import Diagnostic
12
10
  from climate_ref.models.execution import (
@@ -17,9 +15,6 @@ from climate_ref.models.execution import (
17
15
  from climate_ref.models.metric_value import MetricValue, ScalarMetricValue, SeriesMetricValue
18
16
  from climate_ref.models.provider import Provider
19
17
 
20
- Table = TypeVar("Table", bound=Base)
21
-
22
-
23
18
  __all__ = [
24
19
  "Base",
25
20
  "Dataset",
@@ -1,8 +1,7 @@
1
- import datetime
2
- from typing import Any
1
+ from typing import Any, TypeVar
3
2
 
4
- from sqlalchemy import JSON, MetaData, func
5
- from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
3
+ from sqlalchemy import JSON, MetaData
4
+ from sqlalchemy.orm import DeclarativeBase
6
5
 
7
6
 
8
7
  class Base(DeclarativeBase):
@@ -28,17 +27,4 @@ class Base(DeclarativeBase):
28
27
  )
29
28
 
30
29
 
31
- class CreatedUpdatedMixin:
32
- """
33
- Mixin for models that have a created_at and updated_at fields
34
- """
35
-
36
- created_at: Mapped[datetime.datetime] = mapped_column(server_default=func.now())
37
- """
38
- When the dataset was added to the database
39
- """
40
-
41
- updated_at: Mapped[datetime.datetime] = mapped_column(server_default=func.now(), onupdate=func.now())
42
- """
43
- When the dataset was updated.
44
- """
30
+ Table = TypeVar("Table", bound=Base)
@@ -30,7 +30,7 @@ class Dataset(Base):
30
30
 
31
31
  In the case of CMIP6 datasets, this is the instance_id.
32
32
  """
33
- dataset_type: Mapped[SourceDatasetType] = mapped_column(nullable=False)
33
+ dataset_type: Mapped[SourceDatasetType] = mapped_column(nullable=False, index=True)
34
34
  """
35
35
  Type of dataset
36
36
  """
@@ -73,7 +73,9 @@ class DatasetFile(Base):
73
73
  __tablename__ = "dataset_file"
74
74
 
75
75
  id: Mapped[int] = mapped_column(primary_key=True)
76
- dataset_id: Mapped[int] = mapped_column(ForeignKey("dataset.id", ondelete="CASCADE"), nullable=False)
76
+ dataset_id: Mapped[int] = mapped_column(
77
+ ForeignKey("dataset.id", ondelete="CASCADE"), nullable=False, index=True
78
+ )
77
79
  """
78
80
  Foreign key to the dataset table
79
81
  """
@@ -111,13 +113,13 @@ class CMIP6Dataset(Dataset):
111
113
  branch_time_in_child: Mapped[float] = mapped_column(nullable=True)
112
114
  branch_time_in_parent: Mapped[float] = mapped_column(nullable=True)
113
115
  experiment: Mapped[str] = mapped_column(nullable=True)
114
- experiment_id: Mapped[str] = mapped_column()
116
+ experiment_id: Mapped[str] = mapped_column(index=True)
115
117
  frequency: Mapped[str] = mapped_column(nullable=True)
116
118
  grid: Mapped[str] = mapped_column(nullable=True)
117
119
  grid_label: Mapped[str] = mapped_column()
118
120
  institution_id: Mapped[str] = mapped_column()
119
121
  long_name: Mapped[str] = mapped_column(nullable=True)
120
- member_id: Mapped[str] = mapped_column()
122
+ member_id: Mapped[str] = mapped_column(index=True)
121
123
  nominal_resolution: Mapped[str] = mapped_column(nullable=True)
122
124
  parent_activity_id: Mapped[str] = mapped_column(nullable=True)
123
125
  parent_experiment_id: Mapped[str] = mapped_column(nullable=True)
@@ -126,7 +128,7 @@ class CMIP6Dataset(Dataset):
126
128
  parent_variant_label: Mapped[str] = mapped_column(nullable=True)
127
129
  realm: Mapped[str] = mapped_column(nullable=True)
128
130
  product: Mapped[str] = mapped_column(nullable=True)
129
- source_id: Mapped[str] = mapped_column()
131
+ source_id: Mapped[str] = mapped_column(index=True)
130
132
  standard_name: Mapped[str] = mapped_column(nullable=True)
131
133
  source_type: Mapped[str] = mapped_column(nullable=True)
132
134
  sub_experiment: Mapped[str] = mapped_column(nullable=True)
@@ -138,7 +140,7 @@ class CMIP6Dataset(Dataset):
138
140
  vertical_levels: Mapped[int] = mapped_column(nullable=True)
139
141
  version: Mapped[str] = mapped_column()
140
142
 
141
- instance_id: Mapped[str] = mapped_column()
143
+ instance_id: Mapped[str] = mapped_column(index=True)
142
144
  """
143
145
  Unique identifier for the dataset (including the version).
144
146
  """
@@ -170,6 +172,7 @@ class Obs4MIPsDataset(Dataset):
170
172
  units: Mapped[str] = mapped_column()
171
173
  variable_id: Mapped[str] = mapped_column()
172
174
  variant_label: Mapped[str] = mapped_column()
175
+ version: Mapped[str] = mapped_column()
173
176
  vertical_levels: Mapped[int] = mapped_column()
174
177
  source_version_number: Mapped[str] = mapped_column()
175
178
 
@@ -204,6 +207,7 @@ class PMPClimatologyDataset(Dataset):
204
207
  units: Mapped[str] = mapped_column()
205
208
  variable_id: Mapped[str] = mapped_column()
206
209
  variant_label: Mapped[str] = mapped_column()
210
+ version: Mapped[str] = mapped_column()
207
211
  vertical_levels: Mapped[int] = mapped_column()
208
212
  source_version_number: Mapped[str] = mapped_column()
209
213
 
@@ -3,7 +3,8 @@ from typing import TYPE_CHECKING
3
3
  from sqlalchemy import ForeignKey, UniqueConstraint
4
4
  from sqlalchemy.orm import Mapped, mapped_column, relationship
5
5
 
6
- from climate_ref.models.base import Base, CreatedUpdatedMixin
6
+ from climate_ref.models.base import Base
7
+ from climate_ref.models.mixins import CreatedUpdatedMixin
7
8
 
8
9
  if TYPE_CHECKING:
9
10
  from climate_ref.models.execution import ExecutionGroup
@@ -1,19 +1,22 @@
1
1
  import enum
2
2
  import pathlib
3
- from typing import TYPE_CHECKING, Any
3
+ from collections.abc import Sequence
4
+ from typing import TYPE_CHECKING, Any, ClassVar
4
5
 
5
6
  from loguru import logger
6
- from sqlalchemy import Column, ForeignKey, Table, UniqueConstraint, func
7
+ from sqlalchemy import Column, ForeignKey, Table, UniqueConstraint, func, or_
7
8
  from sqlalchemy.orm import Mapped, Session, mapped_column, relationship
8
9
  from sqlalchemy.orm.query import RowReturningQuery
9
10
 
10
- from climate_ref.models import Dataset
11
- from climate_ref.models.base import Base, CreatedUpdatedMixin
11
+ from climate_ref.models.base import Base
12
+ from climate_ref.models.dataset import Dataset
13
+ from climate_ref.models.diagnostic import Diagnostic
14
+ from climate_ref.models.mixins import CreatedUpdatedMixin, DimensionMixin
15
+ from climate_ref.models.provider import Provider
12
16
  from climate_ref_core.datasets import ExecutionDatasetCollection
13
17
 
14
18
  if TYPE_CHECKING:
15
19
  from climate_ref.database import Database
16
- from climate_ref.models.diagnostic import Diagnostic
17
20
  from climate_ref.models.metric_value import MetricValue
18
21
 
19
22
 
@@ -40,7 +43,7 @@ class ExecutionGroup(CreatedUpdatedMixin, Base):
40
43
 
41
44
  id: Mapped[int] = mapped_column(primary_key=True)
42
45
 
43
- diagnostic_id: Mapped[int] = mapped_column(ForeignKey("diagnostic.id"))
46
+ diagnostic_id: Mapped[int] = mapped_column(ForeignKey("diagnostic.id"), index=True)
44
47
  """
45
48
  The diagnostic that this execution group belongs to
46
49
  """
@@ -103,8 +106,8 @@ class ExecutionGroup(CreatedUpdatedMixin, Base):
103
106
  execution_datasets = Table(
104
107
  "execution_dataset",
105
108
  Base.metadata,
106
- Column("execution_id", ForeignKey("execution.id")),
107
- Column("dataset_id", ForeignKey("dataset.id")),
109
+ Column("execution_id", ForeignKey("execution.id"), index=True),
110
+ Column("dataset_id", ForeignKey("dataset.id"), index=True),
108
111
  )
109
112
 
110
113
 
@@ -136,7 +139,8 @@ class Execution(CreatedUpdatedMixin, Base):
136
139
  ForeignKey(
137
140
  "execution_group.id",
138
141
  name="fk_execution_id",
139
- )
142
+ ),
143
+ index=True,
140
144
  )
141
145
  """
142
146
  The execution group that this execution belongs to
@@ -149,7 +153,7 @@ class Execution(CreatedUpdatedMixin, Base):
149
153
  This is used to verify if an existing diagnostic execution has been run with the same datasets.
150
154
  """
151
155
 
152
- successful: Mapped[bool] = mapped_column(nullable=True)
156
+ successful: Mapped[bool] = mapped_column(nullable=True, index=True)
153
157
  """
154
158
  Was the run successful
155
159
  """
@@ -216,16 +220,21 @@ class ResultOutputType(enum.Enum):
216
220
  HTML = "html"
217
221
 
218
222
 
219
- class ExecutionOutput(CreatedUpdatedMixin, Base):
223
+ class ExecutionOutput(DimensionMixin, CreatedUpdatedMixin, Base):
220
224
  """
221
225
  An output generated as part of an execution.
222
226
 
223
227
  This output may be a plot, data file or HTML file.
224
- These outputs are defined in the CMEC output bundle
228
+ These outputs are defined in the CMEC output bundle.
229
+
230
+ Outputs can be tagged with dimensions from the controlled vocabulary
231
+ to enable filtering and organization.
225
232
  """
226
233
 
227
234
  __tablename__ = "execution_output"
228
235
 
236
+ _cv_dimensions: ClassVar[list[str]] = []
237
+
229
238
  id: Mapped[int] = mapped_column(primary_key=True)
230
239
 
231
240
  execution_id: Mapped[int] = mapped_column(ForeignKey("execution.id"), index=True)
@@ -263,6 +272,65 @@ class ExecutionOutput(CreatedUpdatedMixin, Base):
263
272
 
264
273
  execution: Mapped["Execution"] = relationship(back_populates="outputs")
265
274
 
275
+ @classmethod
276
+ def build( # noqa: PLR0913
277
+ cls,
278
+ *,
279
+ execution_id: int,
280
+ output_type: ResultOutputType,
281
+ dimensions: dict[str, str],
282
+ filename: str | None = None,
283
+ short_name: str | None = None,
284
+ long_name: str | None = None,
285
+ description: str | None = None,
286
+ ) -> "ExecutionOutput":
287
+ """
288
+ Build an ExecutionOutput from dimensions and metadata
289
+
290
+ This is a helper method that validates the dimensions supplied.
291
+
292
+ Parameters
293
+ ----------
294
+ execution_id
295
+ Execution that created the output
296
+ output_type
297
+ Type of the output
298
+ dimensions
299
+ Dimensions that describe the output
300
+ filename
301
+ Path to the output
302
+ short_name
303
+ Short key of the output
304
+ long_name
305
+ Human readable name
306
+ description
307
+ Long description
308
+
309
+ Raises
310
+ ------
311
+ KeyError
312
+ If an unknown dimension was supplied.
313
+
314
+ Dimensions must exist in the controlled vocabulary.
315
+
316
+ Returns
317
+ -------
318
+ Newly created ExecutionOutput
319
+ """
320
+ for k in dimensions:
321
+ if k not in cls._cv_dimensions:
322
+ raise KeyError(f"Unknown dimension column '{k}'")
323
+
324
+ return ExecutionOutput(
325
+ execution_id=execution_id,
326
+ output_type=output_type,
327
+ filename=filename,
328
+ short_name=short_name,
329
+ long_name=long_name,
330
+ description=description,
331
+ **dimensions,
332
+ )
333
+
266
334
 
267
335
  def get_execution_group_and_latest(
268
336
  session: Session,
@@ -304,3 +372,148 @@ def get_execution_group_and_latest(
304
372
  )
305
373
 
306
374
  return query # type: ignore
375
+
376
+
377
+ def _filter_executions_by_facets(
378
+ results: Sequence[tuple[ExecutionGroup, Execution | None]],
379
+ facet_filters: dict[str, str],
380
+ ) -> list[tuple[ExecutionGroup, Execution | None]]:
381
+ """
382
+ Filter execution groups and their latest executions based on facet key-value pairs.
383
+
384
+ This is a relatively expensive operation as it requires iterating over all results.
385
+ This should be replaced once we have normalised the selectors into a separate table.
386
+
387
+
388
+ Parameters
389
+ ----------
390
+ results
391
+ List of tuples containing ExecutionGroup and its latest Execution (or None)
392
+ facet_filters
393
+ Dictionary of facet key-value pairs to filter by (AND logic, exact match)
394
+
395
+ Returns
396
+ -------
397
+ Filtered list of tuples containing ExecutionGroup and its latest Execution (or None)
398
+
399
+ Notes
400
+ -----
401
+ - Facet filters can either be key=value (searches all dataset types)
402
+ or dataset_type.key=value (searches specific dataset type)
403
+ - Key=value filters search across all dataset types
404
+ - dataset_type.key=value filters only search within the specified dataset type
405
+ - Multiple values within same filter type use OR logic
406
+ - All specified facets must match for an execution group to be included (AND logic)
407
+ """
408
+ filtered_results = []
409
+ for eg, execution in results:
410
+ all_filters_match = True
411
+ for facet_key, facet_value in facet_filters.items():
412
+ filter_match = False
413
+ if "." in facet_key:
414
+ # Handle dataset_type.key=value format
415
+ dataset_type, key = facet_key.split(".", 1)
416
+ if dataset_type in eg.selectors:
417
+ if [key, facet_value] in eg.selectors[dataset_type]:
418
+ filter_match = True
419
+ break
420
+ else:
421
+ # Handle key=value format (search across all dataset types)
422
+ for ds_type_selectors in eg.selectors.values():
423
+ if [facet_key, facet_value] in ds_type_selectors:
424
+ filter_match = True
425
+ break
426
+
427
+ if not filter_match:
428
+ all_filters_match = False
429
+ break
430
+ if all_filters_match:
431
+ filtered_results.append((eg, execution))
432
+ return filtered_results
433
+
434
+
435
+ def get_execution_group_and_latest_filtered( # noqa: PLR0913
436
+ session: Session,
437
+ diagnostic_filters: list[str] | None = None,
438
+ provider_filters: list[str] | None = None,
439
+ facet_filters: dict[str, str] | None = None,
440
+ dirty: bool | None = None,
441
+ successful: bool | None = None,
442
+ ) -> list[tuple[ExecutionGroup, Execution | None]]:
443
+ """
444
+ Query execution groups with filtering capabilities.
445
+
446
+ Parameters
447
+ ----------
448
+ session
449
+ Database session
450
+ diagnostic_filters
451
+ List of diagnostic slug substrings (OR logic, case-insensitive)
452
+ provider_filters
453
+ List of provider slug substrings (OR logic, case-insensitive)
454
+ facet_filters
455
+ Dictionary of facet key-value pairs (AND logic, exact match)
456
+ dirty
457
+ If True, only return dirty execution groups.
458
+ If False, only return clean execution groups.
459
+ If None, do not filter by dirty status.
460
+ successful
461
+ If True, only return execution groups whose latest execution was successful.
462
+ If False, only return execution groups whose latest execution was unsuccessful or has no executions.
463
+ If None, do not filter by execution success.
464
+
465
+ Returns
466
+ -------
467
+ Query returning tuples of (ExecutionGroup, latest Execution or None)
468
+
469
+ Notes
470
+ -----
471
+ - Diagnostic and provider filters use substring matching (case-insensitive)
472
+ - Multiple values within same filter type use OR logic
473
+ - Different filter types use AND logic
474
+ - Facet filters can either be key=value (searches all dataset types)
475
+ or dataset_type.key=value (searches specific dataset type)
476
+ """
477
+ # Start with base query
478
+ query = get_execution_group_and_latest(session)
479
+
480
+ if diagnostic_filters or provider_filters:
481
+ # Join through to the Diagnostic table
482
+ query = query.join(Diagnostic, ExecutionGroup.diagnostic_id == Diagnostic.id)
483
+
484
+ # Apply diagnostic filter (OR logic for multiple values)
485
+ if diagnostic_filters:
486
+ diagnostic_conditions = [
487
+ Diagnostic.slug.ilike(f"%{filter_value.lower()}%") for filter_value in diagnostic_filters
488
+ ]
489
+ query = query.filter(or_(*diagnostic_conditions))
490
+
491
+ # Apply provider filter (OR logic for multiple values)
492
+ if provider_filters:
493
+ # Need to join through Diagnostic to Provider
494
+ query = query.join(Provider, Diagnostic.provider_id == Provider.id)
495
+
496
+ provider_conditions = [
497
+ Provider.slug.ilike(f"%{filter_value.lower()}%") for filter_value in provider_filters
498
+ ]
499
+ query = query.filter(or_(*provider_conditions))
500
+
501
+ if successful is not None:
502
+ if successful:
503
+ query = query.filter(Execution.successful.is_(True))
504
+ else:
505
+ query = query.filter(or_(Execution.successful.is_(False), Execution.successful.is_(None)))
506
+
507
+ if dirty is not None:
508
+ if dirty:
509
+ query = query.filter(ExecutionGroup.dirty.is_(True))
510
+ else:
511
+ query = query.filter(or_(ExecutionGroup.dirty.is_(False), ExecutionGroup.dirty.is_(None)))
512
+
513
+ if facet_filters:
514
+ # Load all results into memory for Python-based filtering
515
+ # TODO: Update once we have normalised the selector
516
+ results = [r._tuple() for r in query.all()]
517
+ return _filter_executions_by_facets(results, facet_filters)
518
+ else:
519
+ return [r._tuple() for r in query.all()]