climate-ref 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref/cli/executions.py +18 -0
- climate_ref/config.py +11 -1
- climate_ref/dataset_registry/sample_data.txt +278 -107
- climate_ref/datasets/base.py +28 -2
- climate_ref/datasets/cmip6.py +54 -100
- climate_ref/datasets/cmip6_parsers.py +189 -0
- climate_ref/datasets/obs4mips.py +14 -3
- climate_ref/executor/__init__.py +8 -1
- climate_ref/executor/hpc.py +56 -19
- climate_ref/executor/pbs_scheduler.py +152 -0
- climate_ref/executor/result_handling.py +147 -63
- climate_ref/migrations/versions/2025-07-20T1521_94beace57a9c_cmip6_finalised.py +57 -0
- climate_ref/migrations/versions/2025-08-05T0327_a1b2c3d4e5f6_finalised_on_base_dataset.py +57 -0
- climate_ref/migrations/versions/2025-09-05T2019_8d28e5e0f9c3_add_indexes.py +108 -0
- climate_ref/models/base.py +3 -1
- climate_ref/models/dataset.py +31 -21
- climate_ref/models/execution.py +6 -5
- climate_ref/models/metric_value.py +2 -2
- climate_ref/testing.py +1 -1
- {climate_ref-0.6.4.dist-info → climate_ref-0.6.6.dist-info}/METADATA +2 -2
- {climate_ref-0.6.4.dist-info → climate_ref-0.6.6.dist-info}/RECORD +25 -20
- {climate_ref-0.6.4.dist-info → climate_ref-0.6.6.dist-info}/WHEEL +0 -0
- {climate_ref-0.6.4.dist-info → climate_ref-0.6.6.dist-info}/entry_points.txt +0 -0
- {climate_ref-0.6.4.dist-info → climate_ref-0.6.6.dist-info}/licenses/LICENCE +0 -0
- {climate_ref-0.6.4.dist-info → climate_ref-0.6.6.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import shutil
|
|
3
|
+
import subprocess
|
|
4
|
+
import textwrap
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from parsl.launchers import SimpleLauncher
|
|
8
|
+
from parsl.providers import PBSProProvider
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SmartPBSProvider(PBSProProvider):
|
|
12
|
+
"""
|
|
13
|
+
A PBSProProvider subclass that adapts to systems where `-l select` is not supported.
|
|
14
|
+
|
|
15
|
+
Falls back to individual resource requests (ncpus, mem, jobfs, storage) if needed.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__( # noqa: PLR0913
|
|
19
|
+
self,
|
|
20
|
+
account: str | None = None,
|
|
21
|
+
queue: str | None = None,
|
|
22
|
+
scheduler_options: str = "",
|
|
23
|
+
worker_init: str = "",
|
|
24
|
+
nodes_per_block: int | None = 1,
|
|
25
|
+
cpus_per_node: int | None = 1,
|
|
26
|
+
ncpus: int | None = None,
|
|
27
|
+
mem: str = "4GB",
|
|
28
|
+
jobfs: str = "10GB",
|
|
29
|
+
storage: str = "",
|
|
30
|
+
init_blocks: int = 1,
|
|
31
|
+
min_blocks: int = 0,
|
|
32
|
+
max_blocks: int = 1,
|
|
33
|
+
parallelism: int = 1,
|
|
34
|
+
launcher: SimpleLauncher = SimpleLauncher(),
|
|
35
|
+
walltime: str = "00:20:00",
|
|
36
|
+
cmd_timeout: int = 120,
|
|
37
|
+
) -> None:
|
|
38
|
+
self.ncpus = ncpus
|
|
39
|
+
self.mem = mem
|
|
40
|
+
self.jobfs = jobfs
|
|
41
|
+
self.storage = storage
|
|
42
|
+
self._select_supported = self._detect_select_support()
|
|
43
|
+
|
|
44
|
+
# Prepare fallback resource dictionary
|
|
45
|
+
self._fallback_resources = {"mem": mem, "jobfs": jobfs, "storage": storage}
|
|
46
|
+
|
|
47
|
+
# Parse and strip select if present in scheduler_options
|
|
48
|
+
if not self._select_supported and "-l select=" in scheduler_options:
|
|
49
|
+
scheduler_options = self._parse_select_from_scheduler_options(scheduler_options)
|
|
50
|
+
|
|
51
|
+
# Determine fallback ncpus
|
|
52
|
+
if "ncpus" not in self._fallback_resources:
|
|
53
|
+
self._fallback_resources["ncpus"] = str(ncpus if ncpus is not None else (cpus_per_node or 1))
|
|
54
|
+
|
|
55
|
+
# Map ncpus to cpus_per_node if needed (select mode only)
|
|
56
|
+
if self._select_supported:
|
|
57
|
+
if not ncpus and cpus_per_node:
|
|
58
|
+
cpus_per_node = ncpus
|
|
59
|
+
elif ncpus and cpus_per_node and int(ncpus) != int(cpus_per_node):
|
|
60
|
+
print(f"Warning: ncpus={ncpus} and cpus_per_node={cpus_per_node} differ.")
|
|
61
|
+
print(f"Using cpus_per_node={cpus_per_node}.")
|
|
62
|
+
else:
|
|
63
|
+
cpus_per_node = int(self._fallback_resources["ncpus"])
|
|
64
|
+
|
|
65
|
+
super().__init__(
|
|
66
|
+
account=account,
|
|
67
|
+
queue=queue,
|
|
68
|
+
scheduler_options=scheduler_options,
|
|
69
|
+
select_options="", # Not used; we handle resources ourselves
|
|
70
|
+
worker_init=worker_init,
|
|
71
|
+
nodes_per_block=nodes_per_block,
|
|
72
|
+
cpus_per_node=cpus_per_node,
|
|
73
|
+
init_blocks=init_blocks,
|
|
74
|
+
min_blocks=min_blocks,
|
|
75
|
+
max_blocks=max_blocks,
|
|
76
|
+
parallelism=parallelism,
|
|
77
|
+
launcher=launcher,
|
|
78
|
+
walltime=walltime,
|
|
79
|
+
cmd_timeout=cmd_timeout,
|
|
80
|
+
) # type: ignore
|
|
81
|
+
|
|
82
|
+
if not self._select_supported:
|
|
83
|
+
self.template_string = self._fallback_template()
|
|
84
|
+
|
|
85
|
+
def _detect_select_support(self) -> bool:
|
|
86
|
+
"""Detect whether `-l select` is supported by the underlying PBS system."""
|
|
87
|
+
qsub_path = shutil.which("qsub")
|
|
88
|
+
if qsub_path is None:
|
|
89
|
+
raise RuntimeError("qsub command not found. Ensure PBS is installed and in PATH.")
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
result = subprocess.run( # noqa: S603
|
|
93
|
+
[qsub_path, "-l", "wd,select=1:ncpus=1", "--version"],
|
|
94
|
+
capture_output=True,
|
|
95
|
+
timeout=5,
|
|
96
|
+
check=False,
|
|
97
|
+
)
|
|
98
|
+
stderr = result.stderr.decode().lower()
|
|
99
|
+
return "unknown" not in stderr and result.returncode == 0
|
|
100
|
+
except Exception:
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
def _parse_select_from_scheduler_options(self, scheduler_options: str) -> str:
|
|
104
|
+
"""
|
|
105
|
+
Parse `-l select=...` from scheduler_options and update fallback resources.
|
|
106
|
+
|
|
107
|
+
Removes the select line from scheduler_options.
|
|
108
|
+
"""
|
|
109
|
+
select_pattern = r"-l\s+select=([^\s]+)"
|
|
110
|
+
match = re.search(select_pattern, scheduler_options)
|
|
111
|
+
if match:
|
|
112
|
+
select_string = match.group(1)
|
|
113
|
+
scheduler_options = re.sub(select_pattern, "", scheduler_options).strip()
|
|
114
|
+
|
|
115
|
+
parts = select_string.split(":")[1:] # skip the initial `select=1`
|
|
116
|
+
for part in parts:
|
|
117
|
+
if "=" in part:
|
|
118
|
+
key, val = part.split("=")
|
|
119
|
+
self._fallback_resources[key.strip()] = val.strip()
|
|
120
|
+
return scheduler_options
|
|
121
|
+
|
|
122
|
+
def _fallback_template(self) -> str:
|
|
123
|
+
"""Submit script template used if `select` is not supported."""
|
|
124
|
+
return textwrap.dedent("""\
|
|
125
|
+
#!/bin/bash
|
|
126
|
+
#PBS -N ${jobname}
|
|
127
|
+
#PBS -l ncpus=${ncpus}
|
|
128
|
+
#PBS -l mem=${mem}
|
|
129
|
+
#PBS -l jobfs=${jobfs}
|
|
130
|
+
#PBS -l walltime=${walltime}
|
|
131
|
+
#PBS -l storage=${storage}
|
|
132
|
+
#PBS -o ${job_stdout_path}
|
|
133
|
+
#PBS -e ${job_stderr_path}
|
|
134
|
+
${scheduler_options}
|
|
135
|
+
|
|
136
|
+
${worker_init}
|
|
137
|
+
|
|
138
|
+
export JOBNAME="${jobname}"
|
|
139
|
+
${user_script}
|
|
140
|
+
|
|
141
|
+
""")
|
|
142
|
+
|
|
143
|
+
def _write_submit_script(
|
|
144
|
+
self, template: str, script_filename: str, job_name: str, configs: dict[str, Any]
|
|
145
|
+
) -> str:
|
|
146
|
+
"""Inject fallback values into the submit script if `select` is not supported."""
|
|
147
|
+
if not self._select_supported:
|
|
148
|
+
configs.setdefault("ncpus", self._fallback_resources.get("ncpus", "1"))
|
|
149
|
+
configs.setdefault("mem", self._fallback_resources.get("mem", "4GB"))
|
|
150
|
+
configs.setdefault("jobfs", self._fallback_resources.get("jobfs", "10GB"))
|
|
151
|
+
configs.setdefault("storage", self._fallback_resources.get("storage", "gdata1"))
|
|
152
|
+
return super()._write_submit_script(template, script_filename, job_name, configs) # type: ignore
|
|
@@ -17,11 +17,12 @@ from loguru import logger
|
|
|
17
17
|
from sqlalchemy import insert
|
|
18
18
|
|
|
19
19
|
from climate_ref.database import Database
|
|
20
|
-
from climate_ref.models import ScalarMetricValue
|
|
20
|
+
from climate_ref.models import ScalarMetricValue, SeriesMetricValue
|
|
21
21
|
from climate_ref.models.execution import Execution, ExecutionOutput, ResultOutputType
|
|
22
22
|
from climate_ref_core.diagnostics import ExecutionResult, ensure_relative_path
|
|
23
23
|
from climate_ref_core.exceptions import ResultValidationError
|
|
24
24
|
from climate_ref_core.logging import EXECUTION_LOG_FILENAME
|
|
25
|
+
from climate_ref_core.metric_values import SeriesMetricValue as TSeries
|
|
25
26
|
from climate_ref_core.pycmec.controlled_vocabulary import CV
|
|
26
27
|
from climate_ref_core.pycmec.metric import CMECMetric
|
|
27
28
|
from climate_ref_core.pycmec.output import CMECOutput, OutputDict
|
|
@@ -65,6 +66,111 @@ def _copy_file_to_results(
|
|
|
65
66
|
shutil.copy(input_directory / filename, output_filename)
|
|
66
67
|
|
|
67
68
|
|
|
69
|
+
def _process_execution_scalar(
|
|
70
|
+
database: Database,
|
|
71
|
+
result: "ExecutionResult",
|
|
72
|
+
execution: Execution,
|
|
73
|
+
cv: CV,
|
|
74
|
+
) -> None:
|
|
75
|
+
"""
|
|
76
|
+
Process the scalar values from the execution result and store them in the database
|
|
77
|
+
|
|
78
|
+
This also validates the scalar values against the controlled vocabulary
|
|
79
|
+
"""
|
|
80
|
+
# Load the metric bundle from the file
|
|
81
|
+
cmec_metric_bundle = CMECMetric.load_from_json(result.to_output_path(result.metric_bundle_filename))
|
|
82
|
+
|
|
83
|
+
# Check that the diagnostic values conform with the controlled vocabulary
|
|
84
|
+
try:
|
|
85
|
+
cv.validate_metrics(cmec_metric_bundle)
|
|
86
|
+
except (ResultValidationError, AssertionError):
|
|
87
|
+
# TODO: Remove once we have settled on a controlled vocabulary
|
|
88
|
+
logger.exception("Diagnostic values do not conform with the controlled vocabulary")
|
|
89
|
+
# execution.mark_failed()
|
|
90
|
+
|
|
91
|
+
# Perform a bulk insert of scalar values
|
|
92
|
+
# The current implementation will swallow the exception, but display a log message
|
|
93
|
+
try:
|
|
94
|
+
scalar_values = [
|
|
95
|
+
{
|
|
96
|
+
"execution_id": execution.id,
|
|
97
|
+
"value": result.value,
|
|
98
|
+
"attributes": result.attributes,
|
|
99
|
+
**result.dimensions,
|
|
100
|
+
}
|
|
101
|
+
for result in cmec_metric_bundle.iter_results()
|
|
102
|
+
]
|
|
103
|
+
logger.debug(f"Ingesting {len(scalar_values)} scalar values for execution {execution.id}")
|
|
104
|
+
if scalar_values:
|
|
105
|
+
# Perform this in a nested transaction to rollback if something goes wrong
|
|
106
|
+
# We will lose the metric values for a given execution, but not the whole execution
|
|
107
|
+
with database.session.begin_nested():
|
|
108
|
+
database.session.execute(
|
|
109
|
+
insert(ScalarMetricValue),
|
|
110
|
+
scalar_values,
|
|
111
|
+
)
|
|
112
|
+
# This is a broad exception catch to ensure we log any issues
|
|
113
|
+
except Exception:
|
|
114
|
+
logger.exception("Something went wrong when ingesting diagnostic values")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _process_execution_series(
|
|
118
|
+
config: "Config",
|
|
119
|
+
database: Database,
|
|
120
|
+
result: "ExecutionResult",
|
|
121
|
+
execution: Execution,
|
|
122
|
+
cv: CV,
|
|
123
|
+
) -> None:
|
|
124
|
+
"""
|
|
125
|
+
Process the series values from the execution result and store them in the database
|
|
126
|
+
|
|
127
|
+
This also copies the series values file from the scratch directory to the results directory
|
|
128
|
+
and validates the series values against the controlled vocabulary.
|
|
129
|
+
"""
|
|
130
|
+
assert result.series_filename, "Series filename must be set in the result"
|
|
131
|
+
|
|
132
|
+
_copy_file_to_results(
|
|
133
|
+
config.paths.scratch,
|
|
134
|
+
config.paths.results,
|
|
135
|
+
execution.output_fragment,
|
|
136
|
+
result.series_filename,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Load the series values from the file
|
|
140
|
+
series_values_path = result.to_output_path(result.series_filename)
|
|
141
|
+
series_values = TSeries.load_from_json(series_values_path)
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
cv.validate_metrics(series_values)
|
|
145
|
+
except (ResultValidationError, AssertionError):
|
|
146
|
+
# TODO: Remove once we have settled on a controlled vocabulary
|
|
147
|
+
logger.exception("Diagnostic values do not conform with the controlled vocabulary")
|
|
148
|
+
# execution.mark_failed()
|
|
149
|
+
|
|
150
|
+
# Perform a bulk insert of series values
|
|
151
|
+
try:
|
|
152
|
+
series_values_content = [
|
|
153
|
+
{
|
|
154
|
+
"execution_id": execution.id,
|
|
155
|
+
"values": series_result.values,
|
|
156
|
+
"attributes": series_result.attributes,
|
|
157
|
+
**series_result.dimensions,
|
|
158
|
+
}
|
|
159
|
+
for series_result in series_values
|
|
160
|
+
]
|
|
161
|
+
logger.debug(f"Ingesting {len(series_values)} series values for execution {execution.id}")
|
|
162
|
+
if series_values:
|
|
163
|
+
# Perform this in a nested transaction to rollback if something goes wrong
|
|
164
|
+
# We will lose the metric values for a given execution, but not the whole execution
|
|
165
|
+
with database.session.begin_nested():
|
|
166
|
+
database.session.execute(
|
|
167
|
+
insert(SeriesMetricValue),
|
|
168
|
+
series_values_content,
|
|
169
|
+
)
|
|
170
|
+
except Exception:
|
|
171
|
+
logger.exception("Something went wrong when ingesting diagnostic series values")
|
|
172
|
+
|
|
173
|
+
|
|
68
174
|
def handle_execution_result(
|
|
69
175
|
config: "Config",
|
|
70
176
|
database: Database,
|
|
@@ -88,7 +194,7 @@ def handle_execution_result(
|
|
|
88
194
|
result
|
|
89
195
|
The result of the diagnostic execution, either successful or failed
|
|
90
196
|
"""
|
|
91
|
-
# Always copy log data
|
|
197
|
+
# Always copy log data to the results directory
|
|
92
198
|
_copy_file_to_results(
|
|
93
199
|
config.paths.scratch,
|
|
94
200
|
config.paths.results,
|
|
@@ -96,74 +202,52 @@ def handle_execution_result(
|
|
|
96
202
|
EXECUTION_LOG_FILENAME,
|
|
97
203
|
)
|
|
98
204
|
|
|
99
|
-
if result.successful
|
|
100
|
-
logger.
|
|
205
|
+
if not result.successful or result.metric_bundle_filename is None:
|
|
206
|
+
logger.error(f"{execution} failed")
|
|
207
|
+
execution.mark_failed()
|
|
208
|
+
return
|
|
209
|
+
|
|
210
|
+
logger.info(f"{execution} successful")
|
|
211
|
+
|
|
212
|
+
_copy_file_to_results(
|
|
213
|
+
config.paths.scratch,
|
|
214
|
+
config.paths.results,
|
|
215
|
+
execution.output_fragment,
|
|
216
|
+
result.metric_bundle_filename,
|
|
217
|
+
)
|
|
101
218
|
|
|
219
|
+
if result.output_bundle_filename:
|
|
102
220
|
_copy_file_to_results(
|
|
103
221
|
config.paths.scratch,
|
|
104
222
|
config.paths.results,
|
|
105
223
|
execution.output_fragment,
|
|
106
|
-
result.
|
|
224
|
+
result.output_bundle_filename,
|
|
225
|
+
)
|
|
226
|
+
_handle_output_bundle(
|
|
227
|
+
config,
|
|
228
|
+
database,
|
|
229
|
+
execution,
|
|
230
|
+
result.to_output_path(result.output_bundle_filename),
|
|
107
231
|
)
|
|
108
|
-
execution.mark_successful(result.as_relative_path(result.metric_bundle_filename))
|
|
109
|
-
|
|
110
|
-
if result.output_bundle_filename:
|
|
111
|
-
_copy_file_to_results(
|
|
112
|
-
config.paths.scratch,
|
|
113
|
-
config.paths.results,
|
|
114
|
-
execution.output_fragment,
|
|
115
|
-
result.output_bundle_filename,
|
|
116
|
-
)
|
|
117
|
-
_handle_output_bundle(
|
|
118
|
-
config,
|
|
119
|
-
database,
|
|
120
|
-
execution,
|
|
121
|
-
result.to_output_path(result.output_bundle_filename),
|
|
122
|
-
)
|
|
123
232
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
**result.dimensions,
|
|
143
|
-
}
|
|
144
|
-
for result in cmec_metric_bundle.iter_results()
|
|
145
|
-
]
|
|
146
|
-
if scalar_values:
|
|
147
|
-
# Perform this in a nested transaction to rollback if something goes wrong
|
|
148
|
-
# We will lose the metric values for a given execution, but not the whole execution
|
|
149
|
-
with database.session.begin_nested():
|
|
150
|
-
database.session.execute(
|
|
151
|
-
insert(ScalarMetricValue),
|
|
152
|
-
scalar_values,
|
|
153
|
-
)
|
|
154
|
-
except Exception:
|
|
155
|
-
# TODO: Remove once we have settled on a controlled vocabulary
|
|
156
|
-
logger.exception("Something went wrong when ingesting diagnostic values")
|
|
157
|
-
|
|
158
|
-
# TODO Ingest the series values
|
|
159
|
-
|
|
160
|
-
# TODO: This should check if the result is the most recent for the execution,
|
|
161
|
-
# if so then update the dirty fields
|
|
162
|
-
# i.e. if there are outstanding executions don't make as clean
|
|
163
|
-
execution.execution_group.dirty = False
|
|
164
|
-
else:
|
|
165
|
-
logger.error(f"{execution} failed")
|
|
166
|
-
execution.mark_failed()
|
|
233
|
+
cv = CV.load_from_file(config.paths.dimensions_cv)
|
|
234
|
+
|
|
235
|
+
if result.series_filename:
|
|
236
|
+
# Process the series values if they are present
|
|
237
|
+
# This will ingest the series values into the database
|
|
238
|
+
_process_execution_series(config=config, database=database, result=result, execution=execution, cv=cv)
|
|
239
|
+
|
|
240
|
+
# Process the scalar values
|
|
241
|
+
# This will ingest the scalar values into the database
|
|
242
|
+
_process_execution_scalar(database=database, result=result, execution=execution, cv=cv)
|
|
243
|
+
|
|
244
|
+
# TODO: This should check if the result is the most recent for the execution,
|
|
245
|
+
# if so then update the dirty fields
|
|
246
|
+
# i.e. if there are outstanding executions don't make as clean
|
|
247
|
+
execution.execution_group.dirty = False
|
|
248
|
+
|
|
249
|
+
# Finally, mark the execution as successful
|
|
250
|
+
execution.mark_successful(result.as_relative_path(result.metric_bundle_filename))
|
|
167
251
|
|
|
168
252
|
|
|
169
253
|
def _handle_output_bundle(
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""cmip6-finalised
|
|
2
|
+
|
|
3
|
+
Revision ID: 94beace57a9c
|
|
4
|
+
Revises: 795c1e6cf496
|
|
5
|
+
Create Date: 2025-07-20 15:21:17.132458
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
from typing import Union
|
|
11
|
+
|
|
12
|
+
import sqlalchemy as sa
|
|
13
|
+
from alembic import op
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision: str = "94beace57a9c"
|
|
17
|
+
down_revision: Union[str, None] = "795c1e6cf496"
|
|
18
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
19
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def upgrade() -> None:
|
|
23
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
24
|
+
with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
|
|
25
|
+
batch_op.add_column(sa.Column("finalised", sa.Boolean(), nullable=True))
|
|
26
|
+
batch_op.alter_column("experiment", existing_type=sa.VARCHAR(), nullable=True)
|
|
27
|
+
batch_op.alter_column("frequency", existing_type=sa.VARCHAR(), nullable=True)
|
|
28
|
+
batch_op.alter_column("grid", existing_type=sa.VARCHAR(), nullable=True)
|
|
29
|
+
batch_op.alter_column("nominal_resolution", existing_type=sa.VARCHAR(), nullable=True)
|
|
30
|
+
batch_op.alter_column("realm", existing_type=sa.VARCHAR(), nullable=True)
|
|
31
|
+
batch_op.alter_column("product", existing_type=sa.VARCHAR(), nullable=True)
|
|
32
|
+
batch_op.alter_column("standard_name", existing_type=sa.VARCHAR(), nullable=True)
|
|
33
|
+
batch_op.alter_column("source_type", existing_type=sa.VARCHAR(), nullable=True)
|
|
34
|
+
batch_op.alter_column("sub_experiment", existing_type=sa.VARCHAR(), nullable=True)
|
|
35
|
+
batch_op.alter_column("sub_experiment_id", existing_type=sa.VARCHAR(), nullable=True)
|
|
36
|
+
batch_op.alter_column("units", existing_type=sa.VARCHAR(), nullable=True)
|
|
37
|
+
|
|
38
|
+
# ### end Alembic commands ###
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def downgrade() -> None:
|
|
42
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
43
|
+
with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
|
|
44
|
+
batch_op.alter_column("units", existing_type=sa.VARCHAR(), nullable=False)
|
|
45
|
+
batch_op.alter_column("sub_experiment_id", existing_type=sa.VARCHAR(), nullable=False)
|
|
46
|
+
batch_op.alter_column("sub_experiment", existing_type=sa.VARCHAR(), nullable=False)
|
|
47
|
+
batch_op.alter_column("source_type", existing_type=sa.VARCHAR(), nullable=False)
|
|
48
|
+
batch_op.alter_column("standard_name", existing_type=sa.VARCHAR(), nullable=False)
|
|
49
|
+
batch_op.alter_column("product", existing_type=sa.VARCHAR(), nullable=False)
|
|
50
|
+
batch_op.alter_column("realm", existing_type=sa.VARCHAR(), nullable=False)
|
|
51
|
+
batch_op.alter_column("nominal_resolution", existing_type=sa.VARCHAR(), nullable=False)
|
|
52
|
+
batch_op.alter_column("grid", existing_type=sa.VARCHAR(), nullable=False)
|
|
53
|
+
batch_op.alter_column("frequency", existing_type=sa.VARCHAR(), nullable=False)
|
|
54
|
+
batch_op.alter_column("experiment", existing_type=sa.VARCHAR(), nullable=False)
|
|
55
|
+
batch_op.drop_column("finalised")
|
|
56
|
+
|
|
57
|
+
# ### end Alembic commands ###
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""finalised-on-base-dataset
|
|
2
|
+
|
|
3
|
+
Move finalised from cmip6_dataset to base dataset table and default all existing rows to True.
|
|
4
|
+
|
|
5
|
+
Revision ID: a1b2c3d4e5f6
|
|
6
|
+
Revises: 94beace57a9c
|
|
7
|
+
Create Date: 2025-08-05 03:27:00
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from collections.abc import Sequence
|
|
12
|
+
from typing import Union
|
|
13
|
+
|
|
14
|
+
import sqlalchemy as sa
|
|
15
|
+
from alembic import op
|
|
16
|
+
|
|
17
|
+
# revision identifiers, used by Alembic.
|
|
18
|
+
revision: str = "ba5e"
|
|
19
|
+
down_revision: Union[str, None] = "94beace57a9c"
|
|
20
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
21
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def upgrade() -> None:
|
|
25
|
+
# Add finalised to base dataset with default True, non-null
|
|
26
|
+
with op.batch_alter_table("dataset", schema=None) as batch_op:
|
|
27
|
+
batch_op.add_column(
|
|
28
|
+
sa.Column("finalised", sa.Boolean(), nullable=True, server_default=sa.text("true"))
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Backfill: ensure all existing rows are True
|
|
32
|
+
op.execute("UPDATE dataset SET finalised = TRUE WHERE finalised IS NULL")
|
|
33
|
+
|
|
34
|
+
# Enforce NOT NULL after backfill
|
|
35
|
+
with op.batch_alter_table("dataset", schema=None) as batch_op:
|
|
36
|
+
batch_op.alter_column("finalised", nullable=False)
|
|
37
|
+
|
|
38
|
+
# Drop column from cmip6_dataset if it exists
|
|
39
|
+
conn = op.get_bind()
|
|
40
|
+
inspector = sa.inspect(conn)
|
|
41
|
+
cmip6_cols = {col["name"] for col in inspector.get_columns("cmip6_dataset")}
|
|
42
|
+
if "finalised" in cmip6_cols:
|
|
43
|
+
with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
|
|
44
|
+
batch_op.drop_column("finalised")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def downgrade() -> None:
|
|
48
|
+
# Re-create cmip6_dataset.finalised as non-nullable boolean default False
|
|
49
|
+
# Note: Original migration 94beace57a9c added cmip6_dataset.finalised NOT NULL, with no default.
|
|
50
|
+
with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
|
|
51
|
+
batch_op.add_column(
|
|
52
|
+
sa.Column("finalised", sa.Boolean(), nullable=True, server_default=sa.text("false"))
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Drop base dataset finalised
|
|
56
|
+
with op.batch_alter_table("dataset", schema=None) as batch_op:
|
|
57
|
+
batch_op.drop_column("finalised")
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""add indexes
|
|
2
|
+
|
|
3
|
+
Revision ID: 8d28e5e0f9c3
|
|
4
|
+
Revises: ba5e
|
|
5
|
+
Create Date: 2025-09-05 20:19:18.311472
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
from typing import Union
|
|
11
|
+
|
|
12
|
+
from alembic import op
|
|
13
|
+
|
|
14
|
+
# revision identifiers, used by Alembic.
|
|
15
|
+
revision: str = "8d28e5e0f9c3"
|
|
16
|
+
down_revision: Union[str, None] = "ba5e"
|
|
17
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
18
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def upgrade() -> None:
|
|
22
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
23
|
+
with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
|
|
24
|
+
batch_op.create_index(batch_op.f("ix_cmip6_dataset_experiment_id"), ["experiment_id"], unique=False)
|
|
25
|
+
batch_op.create_index(batch_op.f("ix_cmip6_dataset_instance_id"), ["instance_id"], unique=False)
|
|
26
|
+
batch_op.create_index(batch_op.f("ix_cmip6_dataset_member_id"), ["member_id"], unique=False)
|
|
27
|
+
batch_op.create_index(batch_op.f("ix_cmip6_dataset_source_id"), ["source_id"], unique=False)
|
|
28
|
+
|
|
29
|
+
with op.batch_alter_table("dataset", schema=None) as batch_op:
|
|
30
|
+
batch_op.create_index(batch_op.f("ix_dataset_dataset_type"), ["dataset_type"], unique=False)
|
|
31
|
+
|
|
32
|
+
with op.batch_alter_table("dataset_file", schema=None) as batch_op:
|
|
33
|
+
batch_op.create_index(batch_op.f("ix_dataset_file_dataset_id"), ["dataset_id"], unique=False)
|
|
34
|
+
|
|
35
|
+
with op.batch_alter_table("diagnostic", schema=None) as batch_op:
|
|
36
|
+
batch_op.create_index(batch_op.f("ix_diagnostic_updated_at"), ["updated_at"], unique=False)
|
|
37
|
+
|
|
38
|
+
with op.batch_alter_table("execution", schema=None) as batch_op:
|
|
39
|
+
batch_op.create_index(
|
|
40
|
+
batch_op.f("ix_execution_execution_group_id"), ["execution_group_id"], unique=False
|
|
41
|
+
)
|
|
42
|
+
batch_op.create_index(batch_op.f("ix_execution_successful"), ["successful"], unique=False)
|
|
43
|
+
batch_op.create_index(batch_op.f("ix_execution_updated_at"), ["updated_at"], unique=False)
|
|
44
|
+
|
|
45
|
+
with op.batch_alter_table("execution_dataset", schema=None) as batch_op:
|
|
46
|
+
batch_op.create_index(batch_op.f("ix_execution_dataset_dataset_id"), ["dataset_id"], unique=False)
|
|
47
|
+
batch_op.create_index(batch_op.f("ix_execution_dataset_execution_id"), ["execution_id"], unique=False)
|
|
48
|
+
|
|
49
|
+
with op.batch_alter_table("execution_group", schema=None) as batch_op:
|
|
50
|
+
batch_op.create_index(batch_op.f("ix_execution_group_diagnostic_id"), ["diagnostic_id"], unique=False)
|
|
51
|
+
batch_op.create_index(batch_op.f("ix_execution_group_updated_at"), ["updated_at"], unique=False)
|
|
52
|
+
|
|
53
|
+
with op.batch_alter_table("execution_output", schema=None) as batch_op:
|
|
54
|
+
batch_op.create_index(batch_op.f("ix_execution_output_updated_at"), ["updated_at"], unique=False)
|
|
55
|
+
|
|
56
|
+
with op.batch_alter_table("metric_value", schema=None) as batch_op:
|
|
57
|
+
batch_op.create_index(batch_op.f("ix_metric_value_execution_id"), ["execution_id"], unique=False)
|
|
58
|
+
batch_op.create_index(batch_op.f("ix_metric_value_type"), ["type"], unique=False)
|
|
59
|
+
batch_op.create_index(batch_op.f("ix_metric_value_updated_at"), ["updated_at"], unique=False)
|
|
60
|
+
|
|
61
|
+
with op.batch_alter_table("provider", schema=None) as batch_op:
|
|
62
|
+
batch_op.create_index(batch_op.f("ix_provider_updated_at"), ["updated_at"], unique=False)
|
|
63
|
+
|
|
64
|
+
# ### end Alembic commands ###
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def downgrade() -> None:
|
|
68
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
69
|
+
with op.batch_alter_table("provider", schema=None) as batch_op:
|
|
70
|
+
batch_op.drop_index(batch_op.f("ix_provider_updated_at"))
|
|
71
|
+
|
|
72
|
+
with op.batch_alter_table("metric_value", schema=None) as batch_op:
|
|
73
|
+
batch_op.drop_index(batch_op.f("ix_metric_value_updated_at"))
|
|
74
|
+
batch_op.drop_index(batch_op.f("ix_metric_value_type"))
|
|
75
|
+
batch_op.drop_index(batch_op.f("ix_metric_value_execution_id"))
|
|
76
|
+
|
|
77
|
+
with op.batch_alter_table("execution_output", schema=None) as batch_op:
|
|
78
|
+
batch_op.drop_index(batch_op.f("ix_execution_output_updated_at"))
|
|
79
|
+
|
|
80
|
+
with op.batch_alter_table("execution_group", schema=None) as batch_op:
|
|
81
|
+
batch_op.drop_index(batch_op.f("ix_execution_group_updated_at"))
|
|
82
|
+
batch_op.drop_index(batch_op.f("ix_execution_group_diagnostic_id"))
|
|
83
|
+
|
|
84
|
+
with op.batch_alter_table("execution_dataset", schema=None) as batch_op:
|
|
85
|
+
batch_op.drop_index(batch_op.f("ix_execution_dataset_execution_id"))
|
|
86
|
+
batch_op.drop_index(batch_op.f("ix_execution_dataset_dataset_id"))
|
|
87
|
+
|
|
88
|
+
with op.batch_alter_table("execution", schema=None) as batch_op:
|
|
89
|
+
batch_op.drop_index(batch_op.f("ix_execution_updated_at"))
|
|
90
|
+
batch_op.drop_index(batch_op.f("ix_execution_successful"))
|
|
91
|
+
batch_op.drop_index(batch_op.f("ix_execution_execution_group_id"))
|
|
92
|
+
|
|
93
|
+
with op.batch_alter_table("diagnostic", schema=None) as batch_op:
|
|
94
|
+
batch_op.drop_index(batch_op.f("ix_diagnostic_updated_at"))
|
|
95
|
+
|
|
96
|
+
with op.batch_alter_table("dataset_file", schema=None) as batch_op:
|
|
97
|
+
batch_op.drop_index(batch_op.f("ix_dataset_file_dataset_id"))
|
|
98
|
+
|
|
99
|
+
with op.batch_alter_table("dataset", schema=None) as batch_op:
|
|
100
|
+
batch_op.drop_index(batch_op.f("ix_dataset_dataset_type"))
|
|
101
|
+
|
|
102
|
+
with op.batch_alter_table("cmip6_dataset", schema=None) as batch_op:
|
|
103
|
+
batch_op.drop_index(batch_op.f("ix_cmip6_dataset_source_id"))
|
|
104
|
+
batch_op.drop_index(batch_op.f("ix_cmip6_dataset_member_id"))
|
|
105
|
+
batch_op.drop_index(batch_op.f("ix_cmip6_dataset_instance_id"))
|
|
106
|
+
batch_op.drop_index(batch_op.f("ix_cmip6_dataset_experiment_id"))
|
|
107
|
+
|
|
108
|
+
# ### end Alembic commands ###
|
climate_ref/models/base.py
CHANGED
|
@@ -38,7 +38,9 @@ class CreatedUpdatedMixin:
|
|
|
38
38
|
When the dataset was added to the database
|
|
39
39
|
"""
|
|
40
40
|
|
|
41
|
-
updated_at: Mapped[datetime.datetime] = mapped_column(
|
|
41
|
+
updated_at: Mapped[datetime.datetime] = mapped_column(
|
|
42
|
+
server_default=func.now(), onupdate=func.now(), index=True
|
|
43
|
+
)
|
|
42
44
|
"""
|
|
43
45
|
When the dataset was updated.
|
|
44
46
|
"""
|