climate-ref 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref/cli/datasets.py +1 -3
- climate_ref/config.py +63 -0
- climate_ref/executor/hpc.py +149 -53
- climate_ref/executor/local.py +1 -2
- climate_ref/executor/result_handling.py +13 -6
- climate_ref/solver.py +17 -6
- {climate_ref-0.7.0.dist-info → climate_ref-0.8.1.dist-info}/METADATA +1 -1
- {climate_ref-0.7.0.dist-info → climate_ref-0.8.1.dist-info}/RECORD +12 -12
- {climate_ref-0.7.0.dist-info → climate_ref-0.8.1.dist-info}/WHEEL +1 -1
- {climate_ref-0.7.0.dist-info → climate_ref-0.8.1.dist-info}/entry_points.txt +0 -0
- {climate_ref-0.7.0.dist-info → climate_ref-0.8.1.dist-info}/licenses/LICENCE +0 -0
- {climate_ref-0.7.0.dist-info → climate_ref-0.8.1.dist-info}/licenses/NOTICE +0 -0
climate_ref/cli/datasets.py
CHANGED
|
@@ -6,8 +6,6 @@ which executions are required for a given diagnostic without having to re-parse
|
|
|
6
6
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import errno
|
|
10
|
-
import os
|
|
11
9
|
import shutil
|
|
12
10
|
from collections.abc import Iterable
|
|
13
11
|
from pathlib import Path
|
|
@@ -133,7 +131,7 @@ def ingest( # noqa
|
|
|
133
131
|
|
|
134
132
|
if not _dir.exists():
|
|
135
133
|
logger.error(f"File or directory {_dir} does not exist")
|
|
136
|
-
|
|
134
|
+
continue
|
|
137
135
|
|
|
138
136
|
# TODO: This assumes that all datasets are nc files.
|
|
139
137
|
# THis is true for CMIP6 and obs4MIPs but may not be true for other dataset types in the future.
|
climate_ref/config.py
CHANGED
|
@@ -14,11 +14,14 @@ which always take precedence over any other configuration values.
|
|
|
14
14
|
# `esgpull` configuration management system with some of the extra complexity removed.
|
|
15
15
|
# https://github.com/ESGF/esgf-download/blob/main/esgpull/config.py
|
|
16
16
|
|
|
17
|
+
import datetime
|
|
17
18
|
import importlib.resources
|
|
18
19
|
import os
|
|
19
20
|
from pathlib import Path
|
|
20
21
|
from typing import TYPE_CHECKING, Any, Literal
|
|
21
22
|
|
|
23
|
+
import platformdirs
|
|
24
|
+
import requests
|
|
22
25
|
import tomlkit
|
|
23
26
|
from attr import Factory
|
|
24
27
|
from attrs import define, field
|
|
@@ -334,6 +337,46 @@ def _load_config(config_file: str | Path, doc: dict[str, Any]) -> "Config":
|
|
|
334
337
|
return _converter_defaults_relaxed.structure(doc, Config)
|
|
335
338
|
|
|
336
339
|
|
|
340
|
+
DEFAULT_IGNORE_DATASETS_MAX_AGE = datetime.timedelta(hours=6)
|
|
341
|
+
DEFAULT_IGNORE_DATASETS_URL = (
|
|
342
|
+
"https://raw.githubusercontent.com/Climate-REF/climate-ref/refs/heads/main/default_ignore_datasets.yaml"
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _get_default_ignore_datasets_file() -> Path:
|
|
347
|
+
"""
|
|
348
|
+
Get the path to the ignore datasets file
|
|
349
|
+
"""
|
|
350
|
+
cache_dir = platformdirs.user_cache_path("climate_ref")
|
|
351
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
352
|
+
ignore_datasets_file = cache_dir / "default_ignore_datasets.yaml"
|
|
353
|
+
|
|
354
|
+
download = True
|
|
355
|
+
if ignore_datasets_file.exists():
|
|
356
|
+
# Only update if the ignore datasets file is older than `DEFAULT_IGNORE_DATASETS_MAX_AGE`.
|
|
357
|
+
modification_time = datetime.datetime.fromtimestamp(ignore_datasets_file.stat().st_mtime)
|
|
358
|
+
age = datetime.datetime.now() - modification_time
|
|
359
|
+
if age < DEFAULT_IGNORE_DATASETS_MAX_AGE:
|
|
360
|
+
download = False
|
|
361
|
+
|
|
362
|
+
if download:
|
|
363
|
+
logger.info(
|
|
364
|
+
f"Downloading default ignore datasets file from {DEFAULT_IGNORE_DATASETS_URL} "
|
|
365
|
+
f"to {ignore_datasets_file}"
|
|
366
|
+
)
|
|
367
|
+
response = requests.get(DEFAULT_IGNORE_DATASETS_URL, timeout=120)
|
|
368
|
+
try:
|
|
369
|
+
response.raise_for_status()
|
|
370
|
+
except requests.RequestException as exc:
|
|
371
|
+
logger.warning(f"Failed to download default ignore datasets file: {exc}")
|
|
372
|
+
ignore_datasets_file.touch(exist_ok=True)
|
|
373
|
+
else:
|
|
374
|
+
with ignore_datasets_file.open(mode="wb") as file:
|
|
375
|
+
file.write(response.content)
|
|
376
|
+
|
|
377
|
+
return ignore_datasets_file
|
|
378
|
+
|
|
379
|
+
|
|
337
380
|
@define(auto_attribs=True)
|
|
338
381
|
class Config:
|
|
339
382
|
"""
|
|
@@ -364,6 +407,26 @@ class Config:
|
|
|
364
407
|
- `complete`: Use the complete parser, which parses the dataset based on all available metadata.
|
|
365
408
|
"""
|
|
366
409
|
|
|
410
|
+
ignore_datasets_file: Path = field(factory=_get_default_ignore_datasets_file)
|
|
411
|
+
"""
|
|
412
|
+
Path to the file containing the ignore datasets
|
|
413
|
+
|
|
414
|
+
This file is a YAML file that contains a list of facets to ignore per diagnostic.
|
|
415
|
+
|
|
416
|
+
The format is:
|
|
417
|
+
```yaml
|
|
418
|
+
provider:
|
|
419
|
+
diagnostic:
|
|
420
|
+
source_type:
|
|
421
|
+
- facet: value
|
|
422
|
+
- another_facet: [another_value1, another_value2]
|
|
423
|
+
```
|
|
424
|
+
|
|
425
|
+
If this is not specified, a default ignore datasets file will be used.
|
|
426
|
+
The default file is downloaded from the Climate-REF GitHub repository
|
|
427
|
+
if it does not exist or is older than 6 hours.
|
|
428
|
+
"""
|
|
429
|
+
|
|
367
430
|
paths: PathConfig = Factory(PathConfig)
|
|
368
431
|
db: DbConfig = Factory(DbConfig)
|
|
369
432
|
executor: ExecutorConfig = Factory(ExecutorConfig)
|
climate_ref/executor/hpc.py
CHANGED
|
@@ -19,8 +19,9 @@ except ImportError: # pragma: no cover
|
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
import os
|
|
22
|
+
import re
|
|
22
23
|
import time
|
|
23
|
-
from typing import Any
|
|
24
|
+
from typing import Annotated, Any, Literal
|
|
24
25
|
|
|
25
26
|
import parsl
|
|
26
27
|
from loguru import logger
|
|
@@ -29,6 +30,7 @@ from parsl.config import Config as ParslConfig
|
|
|
29
30
|
from parsl.executors import HighThroughputExecutor
|
|
30
31
|
from parsl.launchers import SimpleLauncher, SrunLauncher
|
|
31
32
|
from parsl.providers import SlurmProvider
|
|
33
|
+
from pydantic import BaseModel, Field, StrictBool, field_validator, model_validator
|
|
32
34
|
from tqdm import tqdm
|
|
33
35
|
|
|
34
36
|
from climate_ref.config import Config
|
|
@@ -43,6 +45,72 @@ from .local import ExecutionFuture, process_result
|
|
|
43
45
|
from .pbs_scheduler import SmartPBSProvider
|
|
44
46
|
|
|
45
47
|
|
|
48
|
+
class SlurmConfig(BaseModel):
|
|
49
|
+
"""Slurm Configurations"""
|
|
50
|
+
|
|
51
|
+
scheduler: Literal["slurm"]
|
|
52
|
+
account: str
|
|
53
|
+
username: str
|
|
54
|
+
partition: str | None = None
|
|
55
|
+
log_dir: str = "runinfo"
|
|
56
|
+
qos: str | None = None
|
|
57
|
+
req_nodes: Annotated[int, Field(strict=True, ge=1, le=1000)] = 1
|
|
58
|
+
cores_per_worker: Annotated[int, Field(strict=True, ge=1, le=1000)] = 1
|
|
59
|
+
mem_per_worker: Annotated[float, Field(strict=True, gt=0, lt=1000.0)] | None = None
|
|
60
|
+
max_workers_per_node: Annotated[int, Field(strict=True, ge=1, le=1000)] = 16
|
|
61
|
+
validation: StrictBool = False
|
|
62
|
+
walltime: str = "00:30:00"
|
|
63
|
+
scheduler_options: str = ""
|
|
64
|
+
retries: Annotated[int, Field(strict=True, ge=1, le=3)] = 2
|
|
65
|
+
max_blocks: Annotated[int, Field(strict=True, ge=1)] = 1 # one block mean one job?
|
|
66
|
+
worker_init: str = ""
|
|
67
|
+
overrides: str = ""
|
|
68
|
+
cmd_timeout: Annotated[int, Field(strict=True, ge=0)] = 120
|
|
69
|
+
cpu_affinity: str = "none"
|
|
70
|
+
|
|
71
|
+
@model_validator(mode="before")
|
|
72
|
+
def _check_parition_qos(cls, data: Any) -> Any:
|
|
73
|
+
if not ("partition" in data or "qos" in data):
|
|
74
|
+
raise ValueError("partition or qos is needed")
|
|
75
|
+
return data
|
|
76
|
+
|
|
77
|
+
@field_validator("scheduler_options")
|
|
78
|
+
def _validate_sbatch_syntax(cls, v: str | None) -> Any:
|
|
79
|
+
if not v:
|
|
80
|
+
return v
|
|
81
|
+
|
|
82
|
+
sbatch_pattern = re.compile(
|
|
83
|
+
r"^\s*#SBATCH\s+" # Start with #SBATCH
|
|
84
|
+
r"(?:-\w+\s+[^\s]+" # Option-value pairs
|
|
85
|
+
r"(?:\s+-\w+\s+[^\s]+)*)" # Additional options
|
|
86
|
+
r"\s*$",
|
|
87
|
+
re.IGNORECASE | re.MULTILINE,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
invalid_lines = [
|
|
91
|
+
line
|
|
92
|
+
for line in v.split("\n")
|
|
93
|
+
if not (line.strip().upper().startswith("#SBATCH") and sbatch_pattern.match(line.strip()))
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
if invalid_lines:
|
|
97
|
+
error_msg = (
|
|
98
|
+
"Invalid SBATCH directives:\n"
|
|
99
|
+
+ "\n".join(invalid_lines)
|
|
100
|
+
+ "\n"
|
|
101
|
+
+ "Expected format: '#SBATCH -option value [-option value ...]'"
|
|
102
|
+
)
|
|
103
|
+
raise ValueError(error_msg)
|
|
104
|
+
return v
|
|
105
|
+
|
|
106
|
+
@field_validator("walltime")
|
|
107
|
+
def _validate_walltime(cls, v: str) -> str:
|
|
108
|
+
pattern = r"^(\d+-)?\d{1,5}:[0-5][0-9]:[0-5][0-9]$"
|
|
109
|
+
if not re.match(pattern, v):
|
|
110
|
+
raise ValueError("Walltime must be in `D-HH:MM:SS/HH:MM:SS` format")
|
|
111
|
+
return v
|
|
112
|
+
|
|
113
|
+
|
|
46
114
|
@python_app
|
|
47
115
|
def _process_run(definition: ExecutionDefinition, log_level: str) -> ExecutionResult:
|
|
48
116
|
"""Run the function on computer nodes"""
|
|
@@ -112,13 +180,18 @@ class HPCExecutor:
|
|
|
112
180
|
self.cores_per_worker = _to_int(executor_config.get("cores_per_worker"))
|
|
113
181
|
self.mem_per_worker = _to_float(executor_config.get("mem_per_worker"))
|
|
114
182
|
|
|
115
|
-
|
|
183
|
+
if self.scheduler == "slurm":
|
|
184
|
+
self.slurm_config = SlurmConfig.model_validate(executor_config)
|
|
185
|
+
hours, minutes, seconds = map(int, self.slurm_config.walltime.split(":"))
|
|
186
|
+
|
|
187
|
+
if self.slurm_config.validation and HAS_REAL_SLURM:
|
|
188
|
+
self._validate_slurm_params()
|
|
189
|
+
else:
|
|
190
|
+
hours, minutes, seconds = map(int, self.walltime.split(":"))
|
|
191
|
+
|
|
116
192
|
total_minutes = hours * 60 + minutes + seconds / 60
|
|
117
193
|
self.total_minutes = total_minutes
|
|
118
194
|
|
|
119
|
-
if executor_config.get("validation") and HAS_REAL_SLURM:
|
|
120
|
-
self._validate_slurm_params()
|
|
121
|
-
|
|
122
195
|
self._initialize_parsl()
|
|
123
196
|
|
|
124
197
|
self.parsl_results: list[ExecutionFuture] = []
|
|
@@ -131,45 +204,52 @@ class HPCExecutor:
|
|
|
131
204
|
ValueError: If account, partition or QOS are invalid or inaccessible.
|
|
132
205
|
"""
|
|
133
206
|
slurm_checker = SlurmChecker()
|
|
134
|
-
if self.account and not slurm_checker.get_account_info(self.account):
|
|
135
|
-
raise ValueError(f"Account: {self.account} not valid")
|
|
207
|
+
if self.slurm_config.account and not slurm_checker.get_account_info(self.slurm_config.account):
|
|
208
|
+
raise ValueError(f"Account: {self.slurm_config.account} not valid")
|
|
136
209
|
|
|
137
210
|
partition_limits = None
|
|
138
211
|
node_info = None
|
|
139
212
|
|
|
140
|
-
if self.partition:
|
|
141
|
-
if not slurm_checker.get_partition_info(self.partition):
|
|
142
|
-
raise ValueError(f"Partition: {self.partition} not valid")
|
|
213
|
+
if self.slurm_config.partition:
|
|
214
|
+
if not slurm_checker.get_partition_info(self.slurm_config.partition):
|
|
215
|
+
raise ValueError(f"Partition: {self.slurm_config.partition} not valid")
|
|
143
216
|
|
|
144
|
-
if not slurm_checker.can_account_use_partition(
|
|
145
|
-
|
|
217
|
+
if not slurm_checker.can_account_use_partition(
|
|
218
|
+
self.slurm_config.account, self.slurm_config.partition
|
|
219
|
+
):
|
|
220
|
+
raise ValueError(
|
|
221
|
+
f"Account: {self.slurm_config.account}"
|
|
222
|
+
f" cannot access partiton: {self.slurm_config.partition}"
|
|
223
|
+
)
|
|
146
224
|
|
|
147
|
-
partition_limits = slurm_checker.get_partition_limits(self.partition)
|
|
148
|
-
node_info = slurm_checker.get_node_from_partition(self.partition)
|
|
225
|
+
partition_limits = slurm_checker.get_partition_limits(self.slurm_config.partition)
|
|
226
|
+
node_info = slurm_checker.get_node_from_partition(self.slurm_config.partition)
|
|
149
227
|
|
|
150
228
|
qos_limits = None
|
|
151
|
-
if self.qos:
|
|
152
|
-
if not slurm_checker.get_qos_info(self.qos):
|
|
153
|
-
raise ValueError(f"QOS: {self.qos} not valid")
|
|
229
|
+
if self.slurm_config.qos:
|
|
230
|
+
if not slurm_checker.get_qos_info(self.slurm_config.qos):
|
|
231
|
+
raise ValueError(f"QOS: {self.slurm_config.qos} not valid")
|
|
154
232
|
|
|
155
|
-
if not slurm_checker.can_account_use_qos(self.account, self.qos):
|
|
156
|
-
raise ValueError(
|
|
233
|
+
if not slurm_checker.can_account_use_qos(self.slurm_config.account, self.slurm_config.qos):
|
|
234
|
+
raise ValueError(
|
|
235
|
+
f"Account: {self.slurm_config.account} cannot access qos: {self.slurm_config.qos}"
|
|
236
|
+
)
|
|
157
237
|
|
|
158
|
-
qos_limits = slurm_checker.get_qos_limits(self.qos)
|
|
238
|
+
qos_limits = slurm_checker.get_qos_limits(self.slurm_config.qos)
|
|
159
239
|
|
|
160
240
|
max_cores_per_node = int(node_info["cpus"]) if node_info else None
|
|
161
|
-
if max_cores_per_node and self.cores_per_worker:
|
|
162
|
-
if self.cores_per_worker > max_cores_per_node:
|
|
241
|
+
if max_cores_per_node and self.slurm_config.cores_per_worker:
|
|
242
|
+
if self.slurm_config.cores_per_worker > max_cores_per_node:
|
|
163
243
|
raise ValueError(
|
|
164
|
-
f"cores_per_work:{self.cores_per_worker}"
|
|
244
|
+
f"cores_per_work:{self.slurm_config.cores_per_worker}"
|
|
165
245
|
f"larger than the maximum in a node {max_cores_per_node}"
|
|
166
246
|
)
|
|
167
247
|
|
|
168
248
|
max_mem_per_node = float(node_info["real_memory"]) if node_info else None
|
|
169
|
-
if max_mem_per_node and self.mem_per_worker:
|
|
170
|
-
if self.mem_per_worker > max_mem_per_node:
|
|
249
|
+
if max_mem_per_node and self.slurm_config.mem_per_worker:
|
|
250
|
+
if self.slurm_config.mem_per_worker > max_mem_per_node:
|
|
171
251
|
raise ValueError(
|
|
172
|
-
f"mem_per_work:{self.mem_per_worker}"
|
|
252
|
+
f"mem_per_work:{self.slurm_config.mem_per_worker}"
|
|
173
253
|
f"larger than the maximum mem in a node {max_mem_per_node}"
|
|
174
254
|
)
|
|
175
255
|
|
|
@@ -182,8 +262,8 @@ class HPCExecutor:
|
|
|
182
262
|
|
|
183
263
|
if self.total_minutes > float(max_walltime_minutes):
|
|
184
264
|
raise ValueError(
|
|
185
|
-
f"Walltime: {self.walltime} exceed the maximum time "
|
|
186
|
-
f"{max_walltime_minutes} allowed by {self.partition} and {self.qos}"
|
|
265
|
+
f"Walltime: {self.slurm_config.walltime} exceed the maximum time "
|
|
266
|
+
f"{max_walltime_minutes} allowed by {self.slurm_config.partition} and {self.slurm_config.qos}"
|
|
187
267
|
)
|
|
188
268
|
|
|
189
269
|
def _initialize_parsl(self) -> None:
|
|
@@ -192,19 +272,34 @@ class HPCExecutor:
|
|
|
192
272
|
provider: SlurmProvider | SmartPBSProvider
|
|
193
273
|
if self.scheduler == "slurm":
|
|
194
274
|
provider = SlurmProvider(
|
|
195
|
-
account=self.account,
|
|
196
|
-
partition=self.partition,
|
|
197
|
-
qos=self.qos,
|
|
198
|
-
nodes_per_block=self.req_nodes,
|
|
199
|
-
max_blocks=
|
|
200
|
-
scheduler_options=
|
|
201
|
-
worker_init=
|
|
275
|
+
account=self.slurm_config.account,
|
|
276
|
+
partition=self.slurm_config.partition,
|
|
277
|
+
qos=self.slurm_config.qos,
|
|
278
|
+
nodes_per_block=self.slurm_config.req_nodes,
|
|
279
|
+
max_blocks=self.slurm_config.max_blocks,
|
|
280
|
+
scheduler_options=self.slurm_config.scheduler_options,
|
|
281
|
+
worker_init=self.slurm_config.worker_init,
|
|
202
282
|
launcher=SrunLauncher(
|
|
203
283
|
debug=True,
|
|
204
|
-
overrides=
|
|
284
|
+
overrides=self.slurm_config.overrides,
|
|
205
285
|
),
|
|
206
|
-
walltime=self.walltime,
|
|
207
|
-
cmd_timeout=
|
|
286
|
+
walltime=self.slurm_config.walltime,
|
|
287
|
+
cmd_timeout=self.slurm_config.cmd_timeout,
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
executor = HighThroughputExecutor(
|
|
291
|
+
label="ref_hpc_executor",
|
|
292
|
+
cores_per_worker=self.slurm_config.cores_per_worker,
|
|
293
|
+
mem_per_worker=self.slurm_config.mem_per_worker,
|
|
294
|
+
max_workers_per_node=self.slurm_config.max_workers_per_node,
|
|
295
|
+
cpu_affinity=self.slurm_config.cpu_affinity,
|
|
296
|
+
provider=provider,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
hpc_config = ParslConfig(
|
|
300
|
+
run_dir=self.slurm_config.log_dir,
|
|
301
|
+
executors=[executor],
|
|
302
|
+
retries=self.slurm_config.retries,
|
|
208
303
|
)
|
|
209
304
|
|
|
210
305
|
elif self.scheduler == "pbs":
|
|
@@ -227,23 +322,24 @@ class HPCExecutor:
|
|
|
227
322
|
walltime=self.walltime,
|
|
228
323
|
cmd_timeout=int(executor_config.get("cmd_timeout", 120)),
|
|
229
324
|
)
|
|
230
|
-
else:
|
|
231
|
-
raise ValueError(f"Unsupported scheduler: {self.scheduler}")
|
|
232
325
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
326
|
+
executor = HighThroughputExecutor(
|
|
327
|
+
label="ref_hpc_executor",
|
|
328
|
+
cores_per_worker=self.cores_per_worker if self.cores_per_worker else 1,
|
|
329
|
+
mem_per_worker=self.mem_per_worker,
|
|
330
|
+
max_workers_per_node=_to_int(executor_config.get("max_workers_per_node", 16)),
|
|
331
|
+
cpu_affinity=str(executor_config.get("cpu_affinity")),
|
|
332
|
+
provider=provider,
|
|
333
|
+
)
|
|
241
334
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
335
|
+
hpc_config = ParslConfig(
|
|
336
|
+
run_dir=self.log_dir,
|
|
337
|
+
executors=[executor],
|
|
338
|
+
retries=int(executor_config.get("retries", 2)),
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
else:
|
|
342
|
+
raise ValueError(f"Unsupported scheduler: {self.scheduler}")
|
|
247
343
|
|
|
248
344
|
parsl.load(hpc_config)
|
|
249
345
|
|
climate_ref/executor/local.py
CHANGED
|
@@ -88,8 +88,7 @@ def _process_run(definition: ExecutionDefinition, log_level: str) -> ExecutionRe
|
|
|
88
88
|
except Exception: # pragma: no cover
|
|
89
89
|
# This isn't expected but if it happens we want to log the error before the process exits
|
|
90
90
|
logger.exception("Error running diagnostic")
|
|
91
|
-
|
|
92
|
-
raise
|
|
91
|
+
return ExecutionResult.build_from_failure(definition)
|
|
93
92
|
|
|
94
93
|
|
|
95
94
|
class LocalExecutor:
|
|
@@ -197,12 +197,19 @@ def handle_execution_result(
|
|
|
197
197
|
The result of the diagnostic execution, either successful or failed
|
|
198
198
|
"""
|
|
199
199
|
# Always copy log data to the results directory
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
200
|
+
try:
|
|
201
|
+
_copy_file_to_results(
|
|
202
|
+
config.paths.scratch,
|
|
203
|
+
config.paths.results,
|
|
204
|
+
execution.output_fragment,
|
|
205
|
+
EXECUTION_LOG_FILENAME,
|
|
206
|
+
)
|
|
207
|
+
except FileNotFoundError:
|
|
208
|
+
logger.error(
|
|
209
|
+
f"Could not find log file {EXECUTION_LOG_FILENAME} in scratch directory: {config.paths.scratch}"
|
|
210
|
+
)
|
|
211
|
+
execution.mark_failed()
|
|
212
|
+
return
|
|
206
213
|
|
|
207
214
|
if not result.successful or result.metric_bundle_filename is None:
|
|
208
215
|
logger.error(f"{execution} failed")
|
climate_ref/solver.py
CHANGED
|
@@ -353,7 +353,7 @@ class ExecutionSolver:
|
|
|
353
353
|
yield from solve_executions(self.data_catalog, diagnostic, provider)
|
|
354
354
|
|
|
355
355
|
|
|
356
|
-
def solve_required_executions( # noqa: PLR0913
|
|
356
|
+
def solve_required_executions( # noqa: PLR0912, PLR0913
|
|
357
357
|
db: Database,
|
|
358
358
|
dry_run: bool = False,
|
|
359
359
|
execute: bool = True,
|
|
@@ -396,7 +396,14 @@ def solve_required_executions( # noqa: PLR0913
|
|
|
396
396
|
f"for {potential_execution.diagnostic.full_slug()}"
|
|
397
397
|
)
|
|
398
398
|
|
|
399
|
+
if potential_execution.provider.slug not in provider_count:
|
|
400
|
+
provider_count[potential_execution.provider.slug] = 0
|
|
401
|
+
if potential_execution.diagnostic.full_slug() not in diagnostic_count:
|
|
402
|
+
diagnostic_count[potential_execution.diagnostic.full_slug()] = 0
|
|
403
|
+
|
|
399
404
|
if dry_run:
|
|
405
|
+
provider_count[potential_execution.provider.slug] += 1
|
|
406
|
+
diagnostic_count[potential_execution.diagnostic.full_slug()] += 1
|
|
400
407
|
continue
|
|
401
408
|
|
|
402
409
|
# Use a transaction to make sure that the models
|
|
@@ -421,11 +428,6 @@ def solve_required_executions( # noqa: PLR0913
|
|
|
421
428
|
},
|
|
422
429
|
)
|
|
423
430
|
|
|
424
|
-
if diagnostic.provider.slug not in provider_count:
|
|
425
|
-
provider_count[diagnostic.provider.slug] = 0
|
|
426
|
-
if diagnostic.full_slug() not in diagnostic_count:
|
|
427
|
-
diagnostic_count[diagnostic.full_slug()] = 0
|
|
428
|
-
|
|
429
431
|
if created:
|
|
430
432
|
logger.info(f"Created new execution group: {potential_execution.execution_slug()!r}")
|
|
431
433
|
db.session.flush()
|
|
@@ -471,5 +473,14 @@ def solve_required_executions( # noqa: PLR0913
|
|
|
471
473
|
|
|
472
474
|
provider_count[diagnostic.provider.slug] += 1
|
|
473
475
|
diagnostic_count[diagnostic.full_slug()] += 1
|
|
476
|
+
|
|
477
|
+
logger.info("Solve complete")
|
|
478
|
+
logger.info(f"Found {sum(diagnostic_count.values())} new executions")
|
|
479
|
+
for diag, count in diagnostic_count.items():
|
|
480
|
+
logger.info(f" {diag}: {count} new executions")
|
|
481
|
+
for prov, count in provider_count.items():
|
|
482
|
+
logger.info(f" {prov}: {count} new executions")
|
|
483
|
+
|
|
474
484
|
if timeout > 0:
|
|
475
485
|
executor.join(timeout=timeout)
|
|
486
|
+
logger.info("All executions complete")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: climate-ref
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: Application which runs the CMIP Rapid Evaluation Framework
|
|
5
5
|
Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
climate_ref/__init__.py,sha256=M45QGfl0KCPK48A8MjI08weNvZHMYH__GblraQMxsoM,808
|
|
2
2
|
climate_ref/_config_helpers.py,sha256=-atI5FX7SukhLE_jz_rL-EHQ7s0YYqKu3dSFYWxSyMU,6632
|
|
3
3
|
climate_ref/alembic.ini,sha256=WRvbwSIFuZ7hWNMnR2-yHPJAwYUnwhvRYBzkJhtpGdg,3535
|
|
4
|
-
climate_ref/config.py,sha256=
|
|
4
|
+
climate_ref/config.py,sha256=MyAQQP0LCiO20e20C1GSz-W1o1CFW5XRYINL89oRFWM,19686
|
|
5
5
|
climate_ref/constants.py,sha256=9RaNLgUSuQva7ki4eRW3TjOKeVP6T81QNiu0veB1zVk,111
|
|
6
6
|
climate_ref/database.py,sha256=stO0K61D8Jh6zRXpjq8rTTeuz0aSi2ZEmeb_9ZqUHJc,10707
|
|
7
7
|
climate_ref/provider_registry.py,sha256=NJssVC0ws7BqaYnAPy-1jSxwdFSXl1VCId67WXMUeGU,4230
|
|
8
8
|
climate_ref/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
climate_ref/slurm.py,sha256=N2L1pZ1A79dtkASEFU4TUjrVg2qtYUf7HMeoGXErTyA,7338
|
|
10
|
-
climate_ref/solver.py,sha256=
|
|
10
|
+
climate_ref/solver.py,sha256=ssmzQuxoEhO8Lc62cUJmW6FzM82t08jMoCZ9AzSIfLc,17367
|
|
11
11
|
climate_ref/testing.py,sha256=5zte0H4trwxOgRpzAJJ8by8aGuDSDOQAlUqFBgqjhWg,4256
|
|
12
12
|
climate_ref/cli/__init__.py,sha256=iVgsOBnf4YNgLwxoR-VCrrcnMWM90wtLBFxvI7AaHB0,4576
|
|
13
13
|
climate_ref/cli/_utils.py,sha256=KQDp-0YDLXqljpcUFUGoksloJocMWbwh3-kxoUqMbgo,3440
|
|
14
14
|
climate_ref/cli/config.py,sha256=ak4Rn9S6fH23PkHHlI-pXuPiZYOvUB4r26eu3p525-M,532
|
|
15
|
-
climate_ref/cli/datasets.py,sha256=
|
|
15
|
+
climate_ref/cli/datasets.py,sha256=8dZUNJuLJLkhuEHP7BOANM3Wxx2Gy0DZ7k3DvMeC4D0,9912
|
|
16
16
|
climate_ref/cli/executions.py,sha256=O1xm89r9cI4e99VzLgc5pbhrp71OjROEHa6-GRjaFg4,18191
|
|
17
17
|
climate_ref/cli/providers.py,sha256=8C3xSDBdzfUMil6HPG9a7g0_EKQEmlfPbI3VnN_NmMI,2590
|
|
18
18
|
climate_ref/cli/solve.py,sha256=ZTXrwDFDXNrX5GLMJTN9tFnpV3zlcZbEu2aF3JDJVxI,2367
|
|
@@ -26,10 +26,10 @@ climate_ref/datasets/obs4mips.py,sha256=AerO5QaISiRYPzBm_C6lGsKQgE_Zyzo4XoOOKrpB
|
|
|
26
26
|
climate_ref/datasets/pmp_climatology.py,sha256=goHDc_3B2Wdiy_hmpERNvWDdDYZACPOyFDt3Du6nGc0,534
|
|
27
27
|
climate_ref/datasets/utils.py,sha256=iLJO7h4G3DWsRe9hIC4qkIyi5_zIW1ZMw-FDASLujtM,359
|
|
28
28
|
climate_ref/executor/__init__.py,sha256=tXuXxoFQKH9-te4O3bJg9dVrBKtjRS2si0yzRsHVfGk,902
|
|
29
|
-
climate_ref/executor/hpc.py,sha256=
|
|
30
|
-
climate_ref/executor/local.py,sha256=
|
|
29
|
+
climate_ref/executor/hpc.py,sha256=cFdQKIU-80g9_MlBk88-3ELIRELVyXTxKIGMuwvLQ0Y,17985
|
|
30
|
+
climate_ref/executor/local.py,sha256=gXU9fstBnBkHYdnNFq0UXBVV8iP-iFISD98N6m6DNLw,8588
|
|
31
31
|
climate_ref/executor/pbs_scheduler.py,sha256=WoH1sTmDl7bdmYodpcxZjkUSvInYUcWR4x7buIgBxqk,5807
|
|
32
|
-
climate_ref/executor/result_handling.py,sha256=
|
|
32
|
+
climate_ref/executor/result_handling.py,sha256=l1xvtYzfGYKtb5T41fT4Q5iB5ayqFk18JS07T2P2igU,11212
|
|
33
33
|
climate_ref/executor/synchronous.py,sha256=o4TndsoKMu9AzJYLkusU9lRkgHCy6HcCP46tEs6o86U,1895
|
|
34
34
|
climate_ref/migrations/README,sha256=xM5osYbyEbEFA2eh5kwary_oh-5VFWtDubA-vgWwvlE,935
|
|
35
35
|
climate_ref/migrations/env.py,sha256=jSW4j__q39MaWRHBJ_FZFHl7gEZ7b6_YyU5wDxxzpWQ,4570
|
|
@@ -50,9 +50,9 @@ climate_ref/models/execution.py,sha256=gfkrs0wyySNVQpfob1Bc-26iLDV99K6aSPHs0GZmd
|
|
|
50
50
|
climate_ref/models/metric_value.py,sha256=NFvduNVyB5aOj8rwn8KiPDzIjomBzIroAyFACkSSEUw,7400
|
|
51
51
|
climate_ref/models/mixins.py,sha256=1EAJU2RlhY-9UUIN8F5SZOg5k5uD9r1rG6isvrjQF0o,4683
|
|
52
52
|
climate_ref/models/provider.py,sha256=OnoacwAa50XBS9CCgxJnylIfsGXFP4EqTlLhBXmh6So,991
|
|
53
|
-
climate_ref-0.
|
|
54
|
-
climate_ref-0.
|
|
55
|
-
climate_ref-0.
|
|
56
|
-
climate_ref-0.
|
|
57
|
-
climate_ref-0.
|
|
58
|
-
climate_ref-0.
|
|
53
|
+
climate_ref-0.8.1.dist-info/METADATA,sha256=ZgNlP4qzntgW1tNzyTT3pJejEv3JElwsIJ-Y90jFbhA,4507
|
|
54
|
+
climate_ref-0.8.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
55
|
+
climate_ref-0.8.1.dist-info/entry_points.txt,sha256=IaggEJlDIhoYWXdXJafacWbWtCcoEqUKceP1qD7_7vU,44
|
|
56
|
+
climate_ref-0.8.1.dist-info/licenses/LICENCE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
57
|
+
climate_ref-0.8.1.dist-info/licenses/NOTICE,sha256=4qTlax9aX2-mswYJuVrLqJ9jK1IkN5kSBqfVvYLF3Ws,128
|
|
58
|
+
climate_ref-0.8.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|