climate-ref 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref/cli/__init__.py +12 -3
- climate_ref/cli/_utils.py +56 -2
- climate_ref/cli/datasets.py +48 -9
- climate_ref/cli/executions.py +351 -24
- climate_ref/cli/providers.py +1 -2
- climate_ref/config.py +4 -4
- climate_ref/database.py +62 -4
- climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
- climate_ref/dataset_registry/sample_data.txt +269 -107
- climate_ref/datasets/__init__.py +3 -3
- climate_ref/datasets/base.py +121 -20
- climate_ref/datasets/cmip6.py +2 -0
- climate_ref/datasets/obs4mips.py +26 -15
- climate_ref/executor/__init__.py +8 -1
- climate_ref/executor/hpc.py +7 -1
- climate_ref/executor/result_handling.py +151 -64
- climate_ref/migrations/env.py +12 -10
- climate_ref/migrations/versions/2025-07-20T1521_94beace57a9c_cmip6_finalised.py +1 -1
- climate_ref/migrations/versions/2025-08-05T0327_a1b2c3d4e5f6_finalised_on_base_dataset.py +1 -1
- climate_ref/migrations/versions/2025-09-05T2019_8d28e5e0f9c3_add_indexes.py +108 -0
- climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
- climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
- climate_ref/models/__init__.py +1 -6
- climate_ref/models/base.py +4 -18
- climate_ref/models/dataset.py +10 -6
- climate_ref/models/diagnostic.py +2 -1
- climate_ref/models/execution.py +225 -12
- climate_ref/models/metric_value.py +27 -112
- climate_ref/models/mixins.py +144 -0
- climate_ref/models/provider.py +2 -1
- climate_ref/provider_registry.py +4 -4
- climate_ref/slurm.py +2 -2
- climate_ref/testing.py +1 -1
- {climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/METADATA +2 -2
- climate_ref-0.7.0.dist-info/RECORD +58 -0
- climate_ref-0.6.5.dist-info/RECORD +0 -54
- {climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/WHEEL +0 -0
- {climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/entry_points.txt +0 -0
- {climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/licenses/LICENCE +0 -0
- {climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -22,7 +22,7 @@ depends_on: Union[str, Sequence[str], None] = None
|
|
|
22
22
|
def upgrade() -> None:
|
|
23
23
|
# ### commands auto generated by Alembic - please adjust! ###
|
|
24
24
|
with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
|
|
25
|
-
batch_op.add_column(sa.Column("finalised", sa.Boolean(), nullable=
|
|
25
|
+
batch_op.add_column(sa.Column("finalised", sa.Boolean(), nullable=True))
|
|
26
26
|
batch_op.alter_column("experiment", existing_type=sa.VARCHAR(), nullable=True)
|
|
27
27
|
batch_op.alter_column("frequency", existing_type=sa.VARCHAR(), nullable=True)
|
|
28
28
|
batch_op.alter_column("grid", existing_type=sa.VARCHAR(), nullable=True)
|
|
@@ -49,7 +49,7 @@ def downgrade() -> None:
|
|
|
49
49
|
# Note: Original migration 94beace57a9c added cmip6_dataset.finalised NOT NULL, with no default.
|
|
50
50
|
with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
|
|
51
51
|
batch_op.add_column(
|
|
52
|
-
sa.Column("finalised", sa.Boolean(), nullable=
|
|
52
|
+
sa.Column("finalised", sa.Boolean(), nullable=True, server_default=sa.text("false"))
|
|
53
53
|
)
|
|
54
54
|
|
|
55
55
|
# Drop base dataset finalised
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""add indexes
|
|
2
|
+
|
|
3
|
+
Revision ID: 8d28e5e0f9c3
|
|
4
|
+
Revises: ba5e
|
|
5
|
+
Create Date: 2025-09-05 20:19:18.311472
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
from typing import Union
|
|
11
|
+
|
|
12
|
+
from alembic import op
|
|
13
|
+
|
|
14
|
+
# revision identifiers, used by Alembic.
|
|
15
|
+
revision: str = "8d28e5e0f9c3"
|
|
16
|
+
down_revision: Union[str, None] = "ba5e"
|
|
17
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
18
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def upgrade() -> None:
|
|
22
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
23
|
+
with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
|
|
24
|
+
batch_op.create_index(batch_op.f("ix_cmip6_dataset_experiment_id"), ["experiment_id"], unique=False)
|
|
25
|
+
batch_op.create_index(batch_op.f("ix_cmip6_dataset_instance_id"), ["instance_id"], unique=False)
|
|
26
|
+
batch_op.create_index(batch_op.f("ix_cmip6_dataset_member_id"), ["member_id"], unique=False)
|
|
27
|
+
batch_op.create_index(batch_op.f("ix_cmip6_dataset_source_id"), ["source_id"], unique=False)
|
|
28
|
+
|
|
29
|
+
with op.batch_alter_table("dataset", schema=None) as batch_op:
|
|
30
|
+
batch_op.create_index(batch_op.f("ix_dataset_dataset_type"), ["dataset_type"], unique=False)
|
|
31
|
+
|
|
32
|
+
with op.batch_alter_table("dataset_file", schema=None) as batch_op:
|
|
33
|
+
batch_op.create_index(batch_op.f("ix_dataset_file_dataset_id"), ["dataset_id"], unique=False)
|
|
34
|
+
|
|
35
|
+
with op.batch_alter_table("diagnostic", schema=None) as batch_op:
|
|
36
|
+
batch_op.create_index(batch_op.f("ix_diagnostic_updated_at"), ["updated_at"], unique=False)
|
|
37
|
+
|
|
38
|
+
with op.batch_alter_table("execution", schema=None) as batch_op:
|
|
39
|
+
batch_op.create_index(
|
|
40
|
+
batch_op.f("ix_execution_execution_group_id"), ["execution_group_id"], unique=False
|
|
41
|
+
)
|
|
42
|
+
batch_op.create_index(batch_op.f("ix_execution_successful"), ["successful"], unique=False)
|
|
43
|
+
batch_op.create_index(batch_op.f("ix_execution_updated_at"), ["updated_at"], unique=False)
|
|
44
|
+
|
|
45
|
+
with op.batch_alter_table("execution_dataset", schema=None) as batch_op:
|
|
46
|
+
batch_op.create_index(batch_op.f("ix_execution_dataset_dataset_id"), ["dataset_id"], unique=False)
|
|
47
|
+
batch_op.create_index(batch_op.f("ix_execution_dataset_execution_id"), ["execution_id"], unique=False)
|
|
48
|
+
|
|
49
|
+
with op.batch_alter_table("execution_group", schema=None) as batch_op:
|
|
50
|
+
batch_op.create_index(batch_op.f("ix_execution_group_diagnostic_id"), ["diagnostic_id"], unique=False)
|
|
51
|
+
batch_op.create_index(batch_op.f("ix_execution_group_updated_at"), ["updated_at"], unique=False)
|
|
52
|
+
|
|
53
|
+
with op.batch_alter_table("execution_output", schema=None) as batch_op:
|
|
54
|
+
batch_op.create_index(batch_op.f("ix_execution_output_updated_at"), ["updated_at"], unique=False)
|
|
55
|
+
|
|
56
|
+
with op.batch_alter_table("metric_value", schema=None) as batch_op:
|
|
57
|
+
batch_op.create_index(batch_op.f("ix_metric_value_execution_id"), ["execution_id"], unique=False)
|
|
58
|
+
batch_op.create_index(batch_op.f("ix_metric_value_type"), ["type"], unique=False)
|
|
59
|
+
batch_op.create_index(batch_op.f("ix_metric_value_updated_at"), ["updated_at"], unique=False)
|
|
60
|
+
|
|
61
|
+
with op.batch_alter_table("provider", schema=None) as batch_op:
|
|
62
|
+
batch_op.create_index(batch_op.f("ix_provider_updated_at"), ["updated_at"], unique=False)
|
|
63
|
+
|
|
64
|
+
# ### end Alembic commands ###
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def downgrade() -> None:
|
|
68
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
69
|
+
with op.batch_alter_table("provider", schema=None) as batch_op:
|
|
70
|
+
batch_op.drop_index(batch_op.f("ix_provider_updated_at"))
|
|
71
|
+
|
|
72
|
+
with op.batch_alter_table("metric_value", schema=None) as batch_op:
|
|
73
|
+
batch_op.drop_index(batch_op.f("ix_metric_value_updated_at"))
|
|
74
|
+
batch_op.drop_index(batch_op.f("ix_metric_value_type"))
|
|
75
|
+
batch_op.drop_index(batch_op.f("ix_metric_value_execution_id"))
|
|
76
|
+
|
|
77
|
+
with op.batch_alter_table("execution_output", schema=None) as batch_op:
|
|
78
|
+
batch_op.drop_index(batch_op.f("ix_execution_output_updated_at"))
|
|
79
|
+
|
|
80
|
+
with op.batch_alter_table("execution_group", schema=None) as batch_op:
|
|
81
|
+
batch_op.drop_index(batch_op.f("ix_execution_group_updated_at"))
|
|
82
|
+
batch_op.drop_index(batch_op.f("ix_execution_group_diagnostic_id"))
|
|
83
|
+
|
|
84
|
+
with op.batch_alter_table("execution_dataset", schema=None) as batch_op:
|
|
85
|
+
batch_op.drop_index(batch_op.f("ix_execution_dataset_execution_id"))
|
|
86
|
+
batch_op.drop_index(batch_op.f("ix_execution_dataset_dataset_id"))
|
|
87
|
+
|
|
88
|
+
with op.batch_alter_table("execution", schema=None) as batch_op:
|
|
89
|
+
batch_op.drop_index(batch_op.f("ix_execution_updated_at"))
|
|
90
|
+
batch_op.drop_index(batch_op.f("ix_execution_successful"))
|
|
91
|
+
batch_op.drop_index(batch_op.f("ix_execution_execution_group_id"))
|
|
92
|
+
|
|
93
|
+
with op.batch_alter_table("diagnostic", schema=None) as batch_op:
|
|
94
|
+
batch_op.drop_index(batch_op.f("ix_diagnostic_updated_at"))
|
|
95
|
+
|
|
96
|
+
with op.batch_alter_table("dataset_file", schema=None) as batch_op:
|
|
97
|
+
batch_op.drop_index(batch_op.f("ix_dataset_file_dataset_id"))
|
|
98
|
+
|
|
99
|
+
with op.batch_alter_table("dataset", schema=None) as batch_op:
|
|
100
|
+
batch_op.drop_index(batch_op.f("ix_dataset_dataset_type"))
|
|
101
|
+
|
|
102
|
+
with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
|
|
103
|
+
batch_op.drop_index(batch_op.f("ix_cmip6_dataset_source_id"))
|
|
104
|
+
batch_op.drop_index(batch_op.f("ix_cmip6_dataset_member_id"))
|
|
105
|
+
batch_op.drop_index(batch_op.f("ix_cmip6_dataset_instance_id"))
|
|
106
|
+
batch_op.drop_index(batch_op.f("ix_cmip6_dataset_experiment_id"))
|
|
107
|
+
|
|
108
|
+
# ### end Alembic commands ###
|
climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""use 'version' as version facet for obs4MIPs
|
|
2
|
+
|
|
3
|
+
Revision ID: 2f6e36738e06
|
|
4
|
+
Revises: 8d28e5e0f9c3
|
|
5
|
+
Create Date: 2025-09-10 13:58:40.660076
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
from typing import Union
|
|
11
|
+
|
|
12
|
+
import sqlalchemy as sa
|
|
13
|
+
from alembic import op
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision: str = "2f6e36738e06"
|
|
17
|
+
down_revision: Union[str, None] = "8d28e5e0f9c3"
|
|
18
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
19
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def upgrade() -> None:
|
|
23
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
24
|
+
with op.batch_alter_table("obs4mips_dataset", schema=None) as batch_op:
|
|
25
|
+
batch_op.add_column(sa.Column("version", sa.String(), nullable=False))
|
|
26
|
+
|
|
27
|
+
# ### end Alembic commands ###
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def downgrade() -> None:
|
|
31
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
32
|
+
with op.batch_alter_table("obs4mips_dataset", schema=None) as batch_op:
|
|
33
|
+
batch_op.drop_column("version")
|
|
34
|
+
|
|
35
|
+
# ### end Alembic commands ###
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""add pmp version
|
|
2
|
+
|
|
3
|
+
Revision ID: 20cd136a5b04
|
|
4
|
+
Revises: 2f6e36738e06
|
|
5
|
+
Create Date: 2025-09-22 23:59:42.724007
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
from typing import Union
|
|
11
|
+
|
|
12
|
+
import sqlalchemy as sa
|
|
13
|
+
from alembic import op
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision: str = "20cd136a5b04"
|
|
17
|
+
down_revision: Union[str, None] = "2f6e36738e06"
|
|
18
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
19
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def upgrade() -> None:
|
|
23
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
24
|
+
with op.batch_alter_table("pmp_climatology_dataset", schema=None) as batch_op:
|
|
25
|
+
batch_op.add_column(sa.Column("version", sa.String(), nullable=False))
|
|
26
|
+
|
|
27
|
+
# ### end Alembic commands ###
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def downgrade() -> None:
|
|
31
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
32
|
+
with op.batch_alter_table("pmp_climatology_dataset", schema=None) as batch_op:
|
|
33
|
+
batch_op.drop_column("version")
|
|
34
|
+
|
|
35
|
+
# ### end Alembic commands ###
|
climate_ref/models/__init__.py
CHANGED
|
@@ -4,9 +4,7 @@ Declaration of the models used by the REF.
|
|
|
4
4
|
These models are used to represent the data that is stored in the database.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
from
|
|
8
|
-
|
|
9
|
-
from climate_ref.models.base import Base
|
|
7
|
+
from climate_ref.models.base import Base, Table
|
|
10
8
|
from climate_ref.models.dataset import Dataset
|
|
11
9
|
from climate_ref.models.diagnostic import Diagnostic
|
|
12
10
|
from climate_ref.models.execution import (
|
|
@@ -17,9 +15,6 @@ from climate_ref.models.execution import (
|
|
|
17
15
|
from climate_ref.models.metric_value import MetricValue, ScalarMetricValue, SeriesMetricValue
|
|
18
16
|
from climate_ref.models.provider import Provider
|
|
19
17
|
|
|
20
|
-
Table = TypeVar("Table", bound=Base)
|
|
21
|
-
|
|
22
|
-
|
|
23
18
|
__all__ = [
|
|
24
19
|
"Base",
|
|
25
20
|
"Dataset",
|
climate_ref/models/base.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import Any
|
|
1
|
+
from typing import Any, TypeVar
|
|
3
2
|
|
|
4
|
-
from sqlalchemy import JSON, MetaData
|
|
5
|
-
from sqlalchemy.orm import DeclarativeBase
|
|
3
|
+
from sqlalchemy import JSON, MetaData
|
|
4
|
+
from sqlalchemy.orm import DeclarativeBase
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
class Base(DeclarativeBase):
|
|
@@ -28,17 +27,4 @@ class Base(DeclarativeBase):
|
|
|
28
27
|
)
|
|
29
28
|
|
|
30
29
|
|
|
31
|
-
|
|
32
|
-
"""
|
|
33
|
-
Mixin for models that have a created_at and updated_at fields
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
created_at: Mapped[datetime.datetime] = mapped_column(server_default=func.now())
|
|
37
|
-
"""
|
|
38
|
-
When the dataset was added to the database
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
updated_at: Mapped[datetime.datetime] = mapped_column(server_default=func.now(), onupdate=func.now())
|
|
42
|
-
"""
|
|
43
|
-
When the dataset was updated.
|
|
44
|
-
"""
|
|
30
|
+
Table = TypeVar("Table", bound=Base)
|
climate_ref/models/dataset.py
CHANGED
|
@@ -30,7 +30,7 @@ class Dataset(Base):
|
|
|
30
30
|
|
|
31
31
|
In the case of CMIP6 datasets, this is the instance_id.
|
|
32
32
|
"""
|
|
33
|
-
dataset_type: Mapped[SourceDatasetType] = mapped_column(nullable=False)
|
|
33
|
+
dataset_type: Mapped[SourceDatasetType] = mapped_column(nullable=False, index=True)
|
|
34
34
|
"""
|
|
35
35
|
Type of dataset
|
|
36
36
|
"""
|
|
@@ -73,7 +73,9 @@ class DatasetFile(Base):
|
|
|
73
73
|
__tablename__ = "dataset_file"
|
|
74
74
|
|
|
75
75
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
76
|
-
dataset_id: Mapped[int] = mapped_column(
|
|
76
|
+
dataset_id: Mapped[int] = mapped_column(
|
|
77
|
+
ForeignKey("dataset.id", ondelete="CASCADE"), nullable=False, index=True
|
|
78
|
+
)
|
|
77
79
|
"""
|
|
78
80
|
Foreign key to the dataset table
|
|
79
81
|
"""
|
|
@@ -111,13 +113,13 @@ class CMIP6Dataset(Dataset):
|
|
|
111
113
|
branch_time_in_child: Mapped[float] = mapped_column(nullable=True)
|
|
112
114
|
branch_time_in_parent: Mapped[float] = mapped_column(nullable=True)
|
|
113
115
|
experiment: Mapped[str] = mapped_column(nullable=True)
|
|
114
|
-
experiment_id: Mapped[str] = mapped_column()
|
|
116
|
+
experiment_id: Mapped[str] = mapped_column(index=True)
|
|
115
117
|
frequency: Mapped[str] = mapped_column(nullable=True)
|
|
116
118
|
grid: Mapped[str] = mapped_column(nullable=True)
|
|
117
119
|
grid_label: Mapped[str] = mapped_column()
|
|
118
120
|
institution_id: Mapped[str] = mapped_column()
|
|
119
121
|
long_name: Mapped[str] = mapped_column(nullable=True)
|
|
120
|
-
member_id: Mapped[str] = mapped_column()
|
|
122
|
+
member_id: Mapped[str] = mapped_column(index=True)
|
|
121
123
|
nominal_resolution: Mapped[str] = mapped_column(nullable=True)
|
|
122
124
|
parent_activity_id: Mapped[str] = mapped_column(nullable=True)
|
|
123
125
|
parent_experiment_id: Mapped[str] = mapped_column(nullable=True)
|
|
@@ -126,7 +128,7 @@ class CMIP6Dataset(Dataset):
|
|
|
126
128
|
parent_variant_label: Mapped[str] = mapped_column(nullable=True)
|
|
127
129
|
realm: Mapped[str] = mapped_column(nullable=True)
|
|
128
130
|
product: Mapped[str] = mapped_column(nullable=True)
|
|
129
|
-
source_id: Mapped[str] = mapped_column()
|
|
131
|
+
source_id: Mapped[str] = mapped_column(index=True)
|
|
130
132
|
standard_name: Mapped[str] = mapped_column(nullable=True)
|
|
131
133
|
source_type: Mapped[str] = mapped_column(nullable=True)
|
|
132
134
|
sub_experiment: Mapped[str] = mapped_column(nullable=True)
|
|
@@ -138,7 +140,7 @@ class CMIP6Dataset(Dataset):
|
|
|
138
140
|
vertical_levels: Mapped[int] = mapped_column(nullable=True)
|
|
139
141
|
version: Mapped[str] = mapped_column()
|
|
140
142
|
|
|
141
|
-
instance_id: Mapped[str] = mapped_column()
|
|
143
|
+
instance_id: Mapped[str] = mapped_column(index=True)
|
|
142
144
|
"""
|
|
143
145
|
Unique identifier for the dataset (including the version).
|
|
144
146
|
"""
|
|
@@ -170,6 +172,7 @@ class Obs4MIPsDataset(Dataset):
|
|
|
170
172
|
units: Mapped[str] = mapped_column()
|
|
171
173
|
variable_id: Mapped[str] = mapped_column()
|
|
172
174
|
variant_label: Mapped[str] = mapped_column()
|
|
175
|
+
version: Mapped[str] = mapped_column()
|
|
173
176
|
vertical_levels: Mapped[int] = mapped_column()
|
|
174
177
|
source_version_number: Mapped[str] = mapped_column()
|
|
175
178
|
|
|
@@ -204,6 +207,7 @@ class PMPClimatologyDataset(Dataset):
|
|
|
204
207
|
units: Mapped[str] = mapped_column()
|
|
205
208
|
variable_id: Mapped[str] = mapped_column()
|
|
206
209
|
variant_label: Mapped[str] = mapped_column()
|
|
210
|
+
version: Mapped[str] = mapped_column()
|
|
207
211
|
vertical_levels: Mapped[int] = mapped_column()
|
|
208
212
|
source_version_number: Mapped[str] = mapped_column()
|
|
209
213
|
|
climate_ref/models/diagnostic.py
CHANGED
|
@@ -3,7 +3,8 @@ from typing import TYPE_CHECKING
|
|
|
3
3
|
from sqlalchemy import ForeignKey, UniqueConstraint
|
|
4
4
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
5
5
|
|
|
6
|
-
from climate_ref.models.base import Base
|
|
6
|
+
from climate_ref.models.base import Base
|
|
7
|
+
from climate_ref.models.mixins import CreatedUpdatedMixin
|
|
7
8
|
|
|
8
9
|
if TYPE_CHECKING:
|
|
9
10
|
from climate_ref.models.execution import ExecutionGroup
|
climate_ref/models/execution.py
CHANGED
|
@@ -1,19 +1,22 @@
|
|
|
1
1
|
import enum
|
|
2
2
|
import pathlib
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
4
5
|
|
|
5
6
|
from loguru import logger
|
|
6
|
-
from sqlalchemy import Column, ForeignKey, Table, UniqueConstraint, func
|
|
7
|
+
from sqlalchemy import Column, ForeignKey, Table, UniqueConstraint, func, or_
|
|
7
8
|
from sqlalchemy.orm import Mapped, Session, mapped_column, relationship
|
|
8
9
|
from sqlalchemy.orm.query import RowReturningQuery
|
|
9
10
|
|
|
10
|
-
from climate_ref.models import
|
|
11
|
-
from climate_ref.models.
|
|
11
|
+
from climate_ref.models.base import Base
|
|
12
|
+
from climate_ref.models.dataset import Dataset
|
|
13
|
+
from climate_ref.models.diagnostic import Diagnostic
|
|
14
|
+
from climate_ref.models.mixins import CreatedUpdatedMixin, DimensionMixin
|
|
15
|
+
from climate_ref.models.provider import Provider
|
|
12
16
|
from climate_ref_core.datasets import ExecutionDatasetCollection
|
|
13
17
|
|
|
14
18
|
if TYPE_CHECKING:
|
|
15
19
|
from climate_ref.database import Database
|
|
16
|
-
from climate_ref.models.diagnostic import Diagnostic
|
|
17
20
|
from climate_ref.models.metric_value import MetricValue
|
|
18
21
|
|
|
19
22
|
|
|
@@ -40,7 +43,7 @@ class ExecutionGroup(CreatedUpdatedMixin, Base):
|
|
|
40
43
|
|
|
41
44
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
42
45
|
|
|
43
|
-
diagnostic_id: Mapped[int] = mapped_column(ForeignKey("diagnostic.id"))
|
|
46
|
+
diagnostic_id: Mapped[int] = mapped_column(ForeignKey("diagnostic.id"), index=True)
|
|
44
47
|
"""
|
|
45
48
|
The diagnostic that this execution group belongs to
|
|
46
49
|
"""
|
|
@@ -103,8 +106,8 @@ class ExecutionGroup(CreatedUpdatedMixin, Base):
|
|
|
103
106
|
execution_datasets = Table(
|
|
104
107
|
"execution_dataset",
|
|
105
108
|
Base.metadata,
|
|
106
|
-
Column("execution_id", ForeignKey("execution.id")),
|
|
107
|
-
Column("dataset_id", ForeignKey("dataset.id")),
|
|
109
|
+
Column("execution_id", ForeignKey("execution.id"), index=True),
|
|
110
|
+
Column("dataset_id", ForeignKey("dataset.id"), index=True),
|
|
108
111
|
)
|
|
109
112
|
|
|
110
113
|
|
|
@@ -136,7 +139,8 @@ class Execution(CreatedUpdatedMixin, Base):
|
|
|
136
139
|
ForeignKey(
|
|
137
140
|
"execution_group.id",
|
|
138
141
|
name="fk_execution_id",
|
|
139
|
-
)
|
|
142
|
+
),
|
|
143
|
+
index=True,
|
|
140
144
|
)
|
|
141
145
|
"""
|
|
142
146
|
The execution group that this execution belongs to
|
|
@@ -149,7 +153,7 @@ class Execution(CreatedUpdatedMixin, Base):
|
|
|
149
153
|
This is used to verify if an existing diagnostic execution has been run with the same datasets.
|
|
150
154
|
"""
|
|
151
155
|
|
|
152
|
-
successful: Mapped[bool] = mapped_column(nullable=True)
|
|
156
|
+
successful: Mapped[bool] = mapped_column(nullable=True, index=True)
|
|
153
157
|
"""
|
|
154
158
|
Was the run successful
|
|
155
159
|
"""
|
|
@@ -216,16 +220,21 @@ class ResultOutputType(enum.Enum):
|
|
|
216
220
|
HTML = "html"
|
|
217
221
|
|
|
218
222
|
|
|
219
|
-
class ExecutionOutput(CreatedUpdatedMixin, Base):
|
|
223
|
+
class ExecutionOutput(DimensionMixin, CreatedUpdatedMixin, Base):
|
|
220
224
|
"""
|
|
221
225
|
An output generated as part of an execution.
|
|
222
226
|
|
|
223
227
|
This output may be a plot, data file or HTML file.
|
|
224
|
-
These outputs are defined in the CMEC output bundle
|
|
228
|
+
These outputs are defined in the CMEC output bundle.
|
|
229
|
+
|
|
230
|
+
Outputs can be tagged with dimensions from the controlled vocabulary
|
|
231
|
+
to enable filtering and organization.
|
|
225
232
|
"""
|
|
226
233
|
|
|
227
234
|
__tablename__ = "execution_output"
|
|
228
235
|
|
|
236
|
+
_cv_dimensions: ClassVar[list[str]] = []
|
|
237
|
+
|
|
229
238
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
230
239
|
|
|
231
240
|
execution_id: Mapped[int] = mapped_column(ForeignKey("execution.id"), index=True)
|
|
@@ -263,6 +272,65 @@ class ExecutionOutput(CreatedUpdatedMixin, Base):
|
|
|
263
272
|
|
|
264
273
|
execution: Mapped["Execution"] = relationship(back_populates="outputs")
|
|
265
274
|
|
|
275
|
+
@classmethod
|
|
276
|
+
def build( # noqa: PLR0913
|
|
277
|
+
cls,
|
|
278
|
+
*,
|
|
279
|
+
execution_id: int,
|
|
280
|
+
output_type: ResultOutputType,
|
|
281
|
+
dimensions: dict[str, str],
|
|
282
|
+
filename: str | None = None,
|
|
283
|
+
short_name: str | None = None,
|
|
284
|
+
long_name: str | None = None,
|
|
285
|
+
description: str | None = None,
|
|
286
|
+
) -> "ExecutionOutput":
|
|
287
|
+
"""
|
|
288
|
+
Build an ExecutionOutput from dimensions and metadata
|
|
289
|
+
|
|
290
|
+
This is a helper method that validates the dimensions supplied.
|
|
291
|
+
|
|
292
|
+
Parameters
|
|
293
|
+
----------
|
|
294
|
+
execution_id
|
|
295
|
+
Execution that created the output
|
|
296
|
+
output_type
|
|
297
|
+
Type of the output
|
|
298
|
+
dimensions
|
|
299
|
+
Dimensions that describe the output
|
|
300
|
+
filename
|
|
301
|
+
Path to the output
|
|
302
|
+
short_name
|
|
303
|
+
Short key of the output
|
|
304
|
+
long_name
|
|
305
|
+
Human readable name
|
|
306
|
+
description
|
|
307
|
+
Long description
|
|
308
|
+
|
|
309
|
+
Raises
|
|
310
|
+
------
|
|
311
|
+
KeyError
|
|
312
|
+
If an unknown dimension was supplied.
|
|
313
|
+
|
|
314
|
+
Dimensions must exist in the controlled vocabulary.
|
|
315
|
+
|
|
316
|
+
Returns
|
|
317
|
+
-------
|
|
318
|
+
Newly created ExecutionOutput
|
|
319
|
+
"""
|
|
320
|
+
for k in dimensions:
|
|
321
|
+
if k not in cls._cv_dimensions:
|
|
322
|
+
raise KeyError(f"Unknown dimension column '{k}'")
|
|
323
|
+
|
|
324
|
+
return ExecutionOutput(
|
|
325
|
+
execution_id=execution_id,
|
|
326
|
+
output_type=output_type,
|
|
327
|
+
filename=filename,
|
|
328
|
+
short_name=short_name,
|
|
329
|
+
long_name=long_name,
|
|
330
|
+
description=description,
|
|
331
|
+
**dimensions,
|
|
332
|
+
)
|
|
333
|
+
|
|
266
334
|
|
|
267
335
|
def get_execution_group_and_latest(
|
|
268
336
|
session: Session,
|
|
@@ -304,3 +372,148 @@ def get_execution_group_and_latest(
|
|
|
304
372
|
)
|
|
305
373
|
|
|
306
374
|
return query # type: ignore
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def _filter_executions_by_facets(
|
|
378
|
+
results: Sequence[tuple[ExecutionGroup, Execution | None]],
|
|
379
|
+
facet_filters: dict[str, str],
|
|
380
|
+
) -> list[tuple[ExecutionGroup, Execution | None]]:
|
|
381
|
+
"""
|
|
382
|
+
Filter execution groups and their latest executions based on facet key-value pairs.
|
|
383
|
+
|
|
384
|
+
This is a relatively expensive operation as it requires iterating over all results.
|
|
385
|
+
This should be replaced once we have normalised the selectors into a separate table.
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
Parameters
|
|
389
|
+
----------
|
|
390
|
+
results
|
|
391
|
+
List of tuples containing ExecutionGroup and its latest Execution (or None)
|
|
392
|
+
facet_filters
|
|
393
|
+
Dictionary of facet key-value pairs to filter by (AND logic, exact match)
|
|
394
|
+
|
|
395
|
+
Returns
|
|
396
|
+
-------
|
|
397
|
+
Filtered list of tuples containing ExecutionGroup and its latest Execution (or None)
|
|
398
|
+
|
|
399
|
+
Notes
|
|
400
|
+
-----
|
|
401
|
+
- Facet filters can either be key=value (searches all dataset types)
|
|
402
|
+
or dataset_type.key=value (searches specific dataset type)
|
|
403
|
+
- Key=value filters search across all dataset types
|
|
404
|
+
- dataset_type.key=value filters only search within the specified dataset type
|
|
405
|
+
- Multiple values within same filter type use OR logic
|
|
406
|
+
- All specified facets must match for an execution group to be included (AND logic)
|
|
407
|
+
"""
|
|
408
|
+
filtered_results = []
|
|
409
|
+
for eg, execution in results:
|
|
410
|
+
all_filters_match = True
|
|
411
|
+
for facet_key, facet_value in facet_filters.items():
|
|
412
|
+
filter_match = False
|
|
413
|
+
if "." in facet_key:
|
|
414
|
+
# Handle dataset_type.key=value format
|
|
415
|
+
dataset_type, key = facet_key.split(".", 1)
|
|
416
|
+
if dataset_type in eg.selectors:
|
|
417
|
+
if [key, facet_value] in eg.selectors[dataset_type]:
|
|
418
|
+
filter_match = True
|
|
419
|
+
break
|
|
420
|
+
else:
|
|
421
|
+
# Handle key=value format (search across all dataset types)
|
|
422
|
+
for ds_type_selectors in eg.selectors.values():
|
|
423
|
+
if [facet_key, facet_value] in ds_type_selectors:
|
|
424
|
+
filter_match = True
|
|
425
|
+
break
|
|
426
|
+
|
|
427
|
+
if not filter_match:
|
|
428
|
+
all_filters_match = False
|
|
429
|
+
break
|
|
430
|
+
if all_filters_match:
|
|
431
|
+
filtered_results.append((eg, execution))
|
|
432
|
+
return filtered_results
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def get_execution_group_and_latest_filtered( # noqa: PLR0913
|
|
436
|
+
session: Session,
|
|
437
|
+
diagnostic_filters: list[str] | None = None,
|
|
438
|
+
provider_filters: list[str] | None = None,
|
|
439
|
+
facet_filters: dict[str, str] | None = None,
|
|
440
|
+
dirty: bool | None = None,
|
|
441
|
+
successful: bool | None = None,
|
|
442
|
+
) -> list[tuple[ExecutionGroup, Execution | None]]:
|
|
443
|
+
"""
|
|
444
|
+
Query execution groups with filtering capabilities.
|
|
445
|
+
|
|
446
|
+
Parameters
|
|
447
|
+
----------
|
|
448
|
+
session
|
|
449
|
+
Database session
|
|
450
|
+
diagnostic_filters
|
|
451
|
+
List of diagnostic slug substrings (OR logic, case-insensitive)
|
|
452
|
+
provider_filters
|
|
453
|
+
List of provider slug substrings (OR logic, case-insensitive)
|
|
454
|
+
facet_filters
|
|
455
|
+
Dictionary of facet key-value pairs (AND logic, exact match)
|
|
456
|
+
dirty
|
|
457
|
+
If True, only return dirty execution groups.
|
|
458
|
+
If False, only return clean execution groups.
|
|
459
|
+
If None, do not filter by dirty status.
|
|
460
|
+
successful
|
|
461
|
+
If True, only return execution groups whose latest execution was successful.
|
|
462
|
+
If False, only return execution groups whose latest execution was unsuccessful or has no executions.
|
|
463
|
+
If None, do not filter by execution success.
|
|
464
|
+
|
|
465
|
+
Returns
|
|
466
|
+
-------
|
|
467
|
+
Query returning tuples of (ExecutionGroup, latest Execution or None)
|
|
468
|
+
|
|
469
|
+
Notes
|
|
470
|
+
-----
|
|
471
|
+
- Diagnostic and provider filters use substring matching (case-insensitive)
|
|
472
|
+
- Multiple values within same filter type use OR logic
|
|
473
|
+
- Different filter types use AND logic
|
|
474
|
+
- Facet filters can either be key=value (searches all dataset types)
|
|
475
|
+
or dataset_type.key=value (searches specific dataset type)
|
|
476
|
+
"""
|
|
477
|
+
# Start with base query
|
|
478
|
+
query = get_execution_group_and_latest(session)
|
|
479
|
+
|
|
480
|
+
if diagnostic_filters or provider_filters:
|
|
481
|
+
# Join through to the Diagnostic table
|
|
482
|
+
query = query.join(Diagnostic, ExecutionGroup.diagnostic_id == Diagnostic.id)
|
|
483
|
+
|
|
484
|
+
# Apply diagnostic filter (OR logic for multiple values)
|
|
485
|
+
if diagnostic_filters:
|
|
486
|
+
diagnostic_conditions = [
|
|
487
|
+
Diagnostic.slug.ilike(f"%{filter_value.lower()}%") for filter_value in diagnostic_filters
|
|
488
|
+
]
|
|
489
|
+
query = query.filter(or_(*diagnostic_conditions))
|
|
490
|
+
|
|
491
|
+
# Apply provider filter (OR logic for multiple values)
|
|
492
|
+
if provider_filters:
|
|
493
|
+
# Need to join through Diagnostic to Provider
|
|
494
|
+
query = query.join(Provider, Diagnostic.provider_id == Provider.id)
|
|
495
|
+
|
|
496
|
+
provider_conditions = [
|
|
497
|
+
Provider.slug.ilike(f"%{filter_value.lower()}%") for filter_value in provider_filters
|
|
498
|
+
]
|
|
499
|
+
query = query.filter(or_(*provider_conditions))
|
|
500
|
+
|
|
501
|
+
if successful is not None:
|
|
502
|
+
if successful:
|
|
503
|
+
query = query.filter(Execution.successful.is_(True))
|
|
504
|
+
else:
|
|
505
|
+
query = query.filter(or_(Execution.successful.is_(False), Execution.successful.is_(None)))
|
|
506
|
+
|
|
507
|
+
if dirty is not None:
|
|
508
|
+
if dirty:
|
|
509
|
+
query = query.filter(ExecutionGroup.dirty.is_(True))
|
|
510
|
+
else:
|
|
511
|
+
query = query.filter(or_(ExecutionGroup.dirty.is_(False), ExecutionGroup.dirty.is_(None)))
|
|
512
|
+
|
|
513
|
+
if facet_filters:
|
|
514
|
+
# Load all results into memory for Python-based filtering
|
|
515
|
+
# TODO: Update once we have normalised the selector
|
|
516
|
+
results = [r._tuple() for r in query.all()]
|
|
517
|
+
return _filter_executions_by_facets(results, facet_filters)
|
|
518
|
+
else:
|
|
519
|
+
return [r._tuple() for r in query.all()]
|