climate-ref 0.6.6__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref/cli/__init__.py +12 -3
- climate_ref/cli/_utils.py +56 -2
- climate_ref/cli/datasets.py +49 -12
- climate_ref/cli/executions.py +333 -24
- climate_ref/cli/providers.py +1 -2
- climate_ref/config.py +67 -4
- climate_ref/database.py +62 -4
- climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
- climate_ref/dataset_registry/sample_data.txt +10 -19
- climate_ref/datasets/__init__.py +3 -3
- climate_ref/datasets/base.py +121 -20
- climate_ref/datasets/cmip6.py +2 -0
- climate_ref/datasets/obs4mips.py +26 -15
- climate_ref/executor/hpc.py +149 -53
- climate_ref/executor/local.py +1 -2
- climate_ref/executor/result_handling.py +17 -7
- climate_ref/migrations/env.py +12 -10
- climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
- climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
- climate_ref/models/__init__.py +1 -6
- climate_ref/models/base.py +4 -20
- climate_ref/models/dataset.py +2 -0
- climate_ref/models/diagnostic.py +2 -1
- climate_ref/models/execution.py +219 -7
- climate_ref/models/metric_value.py +25 -110
- climate_ref/models/mixins.py +144 -0
- climate_ref/models/provider.py +2 -1
- climate_ref/provider_registry.py +4 -4
- climate_ref/slurm.py +2 -2
- climate_ref/solver.py +17 -6
- climate_ref/testing.py +1 -1
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/METADATA +1 -1
- climate_ref-0.8.0.dist-info/RECORD +58 -0
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/WHEEL +1 -1
- climate_ref-0.6.6.dist-info/RECORD +0 -55
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/entry_points.txt +0 -0
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/licenses/LICENCE +0 -0
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/licenses/NOTICE +0 -0
climate_ref/cli/providers.py
CHANGED
|
@@ -7,14 +7,12 @@ from typing import Annotated
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
import typer
|
|
9
9
|
from loguru import logger
|
|
10
|
-
from rich.console import Console
|
|
11
10
|
|
|
12
11
|
from climate_ref.cli._utils import pretty_print_df
|
|
13
12
|
from climate_ref.provider_registry import ProviderRegistry
|
|
14
13
|
from climate_ref_core.providers import CondaDiagnosticProvider, DiagnosticProvider
|
|
15
14
|
|
|
16
15
|
app = typer.Typer(help=__doc__)
|
|
17
|
-
console = Console()
|
|
18
16
|
|
|
19
17
|
|
|
20
18
|
@app.command(name="list")
|
|
@@ -24,6 +22,7 @@ def list_(ctx: typer.Context) -> None:
|
|
|
24
22
|
"""
|
|
25
23
|
config = ctx.obj.config
|
|
26
24
|
db = ctx.obj.database
|
|
25
|
+
console = ctx.obj.console
|
|
27
26
|
provider_registry = ProviderRegistry.build_from_config(config, db)
|
|
28
27
|
|
|
29
28
|
def get_env(provider: DiagnosticProvider) -> str:
|
climate_ref/config.py
CHANGED
|
@@ -14,11 +14,14 @@ which always take precedence over any other configuration values.
|
|
|
14
14
|
# `esgpull` configuration management system with some of the extra complexity removed.
|
|
15
15
|
# https://github.com/ESGF/esgf-download/blob/main/esgpull/config.py
|
|
16
16
|
|
|
17
|
+
import datetime
|
|
17
18
|
import importlib.resources
|
|
18
19
|
import os
|
|
19
20
|
from pathlib import Path
|
|
20
21
|
from typing import TYPE_CHECKING, Any, Literal
|
|
21
22
|
|
|
23
|
+
import platformdirs
|
|
24
|
+
import requests
|
|
22
25
|
import tomlkit
|
|
23
26
|
from attr import Factory
|
|
24
27
|
from attrs import define, field
|
|
@@ -334,6 +337,46 @@ def _load_config(config_file: str | Path, doc: dict[str, Any]) -> "Config":
|
|
|
334
337
|
return _converter_defaults_relaxed.structure(doc, Config)
|
|
335
338
|
|
|
336
339
|
|
|
340
|
+
DEFAULT_IGNORE_DATASETS_MAX_AGE = datetime.timedelta(hours=6)
|
|
341
|
+
DEFAULT_IGNORE_DATASETS_URL = (
|
|
342
|
+
"https://raw.githubusercontent.com/Climate-REF/climate-ref/refs/heads/main/default_ignore_datasets.yaml"
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _get_default_ignore_datasets_file() -> Path:
|
|
347
|
+
"""
|
|
348
|
+
Get the path to the ignore datasets file
|
|
349
|
+
"""
|
|
350
|
+
cache_dir = platformdirs.user_cache_path("climate_ref")
|
|
351
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
352
|
+
ignore_datasets_file = cache_dir / "default_ignore_datasets.yaml"
|
|
353
|
+
|
|
354
|
+
download = True
|
|
355
|
+
if ignore_datasets_file.exists():
|
|
356
|
+
# Only update if the ignore datasets file is older than `DEFAULT_IGNORE_DATASETS_MAX_AGE`.
|
|
357
|
+
modification_time = datetime.datetime.fromtimestamp(ignore_datasets_file.stat().st_mtime)
|
|
358
|
+
age = datetime.datetime.now() - modification_time
|
|
359
|
+
if age < DEFAULT_IGNORE_DATASETS_MAX_AGE:
|
|
360
|
+
download = False
|
|
361
|
+
|
|
362
|
+
if download:
|
|
363
|
+
logger.info(
|
|
364
|
+
f"Downloading default ignore datasets file from {DEFAULT_IGNORE_DATASETS_URL} "
|
|
365
|
+
f"to {ignore_datasets_file}"
|
|
366
|
+
)
|
|
367
|
+
response = requests.get(DEFAULT_IGNORE_DATASETS_URL, timeout=120)
|
|
368
|
+
try:
|
|
369
|
+
response.raise_for_status()
|
|
370
|
+
except requests.RequestException as exc:
|
|
371
|
+
logger.warning(f"Failed to download default ignore datasets file: {exc}")
|
|
372
|
+
ignore_datasets_file.touch(exist_ok=True)
|
|
373
|
+
else:
|
|
374
|
+
with ignore_datasets_file.open(mode="wb") as file:
|
|
375
|
+
file.write(response.content)
|
|
376
|
+
|
|
377
|
+
return ignore_datasets_file
|
|
378
|
+
|
|
379
|
+
|
|
337
380
|
@define(auto_attribs=True)
|
|
338
381
|
class Config:
|
|
339
382
|
"""
|
|
@@ -364,10 +407,30 @@ class Config:
|
|
|
364
407
|
- `complete`: Use the complete parser, which parses the dataset based on all available metadata.
|
|
365
408
|
"""
|
|
366
409
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
410
|
+
ignore_datasets_file: Path = field(factory=_get_default_ignore_datasets_file)
|
|
411
|
+
"""
|
|
412
|
+
Path to the file containing the ignore datasets
|
|
413
|
+
|
|
414
|
+
This file is a YAML file that contains a list of facets to ignore per diagnostic.
|
|
415
|
+
|
|
416
|
+
The format is:
|
|
417
|
+
```yaml
|
|
418
|
+
provider:
|
|
419
|
+
diagnostic:
|
|
420
|
+
source_type:
|
|
421
|
+
- facet: value
|
|
422
|
+
- another_facet: [another_value1, another_value2]
|
|
423
|
+
```
|
|
424
|
+
|
|
425
|
+
If this is not specified, a default ignore datasets file will be used.
|
|
426
|
+
The default file is downloaded from the Climate-REF GitHub repository
|
|
427
|
+
if it does not exist or is older than 6 hours.
|
|
428
|
+
"""
|
|
429
|
+
|
|
430
|
+
paths: PathConfig = Factory(PathConfig)
|
|
431
|
+
db: DbConfig = Factory(DbConfig)
|
|
432
|
+
executor: ExecutorConfig = Factory(ExecutorConfig)
|
|
433
|
+
diagnostic_providers: list[DiagnosticProviderConfig] = Factory(default_providers) # noqa: RUF009, RUF100
|
|
371
434
|
_raw: TOMLDocument | None = field(init=False, default=None, repr=False)
|
|
372
435
|
_config_file: Path | None = field(init=False, default=None, repr=False)
|
|
373
436
|
|
climate_ref/database.py
CHANGED
|
@@ -8,6 +8,7 @@ The `Database` class is the main entry point for interacting with the database.
|
|
|
8
8
|
It provides a session object that can be used to interact with the database and run queries.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
+
import enum
|
|
11
12
|
import importlib.resources
|
|
12
13
|
import shutil
|
|
13
14
|
from datetime import datetime
|
|
@@ -23,6 +24,7 @@ from loguru import logger
|
|
|
23
24
|
from sqlalchemy.orm import Session
|
|
24
25
|
|
|
25
26
|
from climate_ref.models import MetricValue, Table
|
|
27
|
+
from climate_ref.models.execution import ExecutionOutput
|
|
26
28
|
from climate_ref_core.pycmec.controlled_vocabulary import CV
|
|
27
29
|
|
|
28
30
|
if TYPE_CHECKING:
|
|
@@ -135,6 +137,16 @@ def validate_database_url(database_url: str) -> str:
|
|
|
135
137
|
return database_url
|
|
136
138
|
|
|
137
139
|
|
|
140
|
+
class ModelState(enum.Enum):
|
|
141
|
+
"""
|
|
142
|
+
State of a model instance
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
CREATED = "created"
|
|
146
|
+
UPDATED = "updated"
|
|
147
|
+
DELETED = "deleted"
|
|
148
|
+
|
|
149
|
+
|
|
138
150
|
class Database:
|
|
139
151
|
"""
|
|
140
152
|
Manage the database connection and migrations
|
|
@@ -234,11 +246,57 @@ class Database:
|
|
|
234
246
|
# This will add new columns to the db if the CVs have changed
|
|
235
247
|
MetricValue.register_cv_dimensions(cv)
|
|
236
248
|
|
|
249
|
+
# Register the CV dimensions with the ExecutionOutput model
|
|
250
|
+
# This enables dimension-based filtering of outputs
|
|
251
|
+
ExecutionOutput.register_cv_dimensions(cv)
|
|
252
|
+
|
|
237
253
|
return db
|
|
238
254
|
|
|
255
|
+
def update_or_create(
|
|
256
|
+
self, model: type[Table], defaults: dict[str, Any] | None = None, **kwargs: Any
|
|
257
|
+
) -> tuple[Table, ModelState | None]:
|
|
258
|
+
"""
|
|
259
|
+
Update an existing instance or create a new one
|
|
260
|
+
|
|
261
|
+
This doesn't commit the transaction,
|
|
262
|
+
so you will need to call `session.commit()` after this method
|
|
263
|
+
or use a transaction context manager.
|
|
264
|
+
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
model
|
|
268
|
+
The model to update or create
|
|
269
|
+
defaults
|
|
270
|
+
Default values to use when creating a new instance, or values to update on existing instance
|
|
271
|
+
kwargs
|
|
272
|
+
The filter parameters to use when querying for an instance
|
|
273
|
+
|
|
274
|
+
Returns
|
|
275
|
+
-------
|
|
276
|
+
:
|
|
277
|
+
A tuple containing the instance and a state enum indicating if the instance was created or updated
|
|
278
|
+
"""
|
|
279
|
+
instance = self.session.query(model).filter_by(**kwargs).first()
|
|
280
|
+
state: ModelState | None = None
|
|
281
|
+
if instance:
|
|
282
|
+
# Update existing instance with defaults
|
|
283
|
+
if defaults:
|
|
284
|
+
for key, value in defaults.items():
|
|
285
|
+
if getattr(instance, key) != value:
|
|
286
|
+
logger.debug(f"Updating {model.__name__} {key} to {value}")
|
|
287
|
+
setattr(instance, key, value)
|
|
288
|
+
state = ModelState.UPDATED
|
|
289
|
+
return instance, state
|
|
290
|
+
else:
|
|
291
|
+
# Create new instance
|
|
292
|
+
params = {**kwargs, **(defaults or {})}
|
|
293
|
+
instance = model(**params)
|
|
294
|
+
self.session.add(instance)
|
|
295
|
+
return instance, ModelState.CREATED
|
|
296
|
+
|
|
239
297
|
def get_or_create(
|
|
240
298
|
self, model: type[Table], defaults: dict[str, Any] | None = None, **kwargs: Any
|
|
241
|
-
) -> tuple[Table,
|
|
299
|
+
) -> tuple[Table, ModelState | None]:
|
|
242
300
|
"""
|
|
243
301
|
Get or create an instance of a model
|
|
244
302
|
|
|
@@ -258,13 +316,13 @@ class Database:
|
|
|
258
316
|
Returns
|
|
259
317
|
-------
|
|
260
318
|
:
|
|
261
|
-
A tuple containing the instance and
|
|
319
|
+
A tuple containing the instance and enum indicating if the instance was created
|
|
262
320
|
"""
|
|
263
321
|
instance = self.session.query(model).filter_by(**kwargs).first()
|
|
264
322
|
if instance:
|
|
265
|
-
return instance,
|
|
323
|
+
return instance, None
|
|
266
324
|
else:
|
|
267
325
|
params = {**kwargs, **(defaults or {})}
|
|
268
326
|
instance = model(**params)
|
|
269
327
|
self.session.add(instance)
|
|
270
|
-
return instance,
|
|
328
|
+
return instance, ModelState.CREATED
|
|
@@ -5,15 +5,6 @@ obs4REF/ColumbiaU/WECANN-1-0/mon/hfls/gn/20250516/hfls_mon_WECANN-1-0_REF_gn_200
|
|
|
5
5
|
obs4REF/ColumbiaU/WECANN-1-0/mon/hfss/gn/20250516/hfss_mon_WECANN-1-0_REF_gn_200701-201512.nc md5:b7a911e0fc164d07d3ab42a86d09b18b
|
|
6
6
|
obs4REF/ECMWF/ERA-20C/mon/psl/gn/v20210727/psl_mon_ERA-20C_PCMDI_gn_190001-201012.nc md5:c100cf25d5681c375cd6c1ee60b678ba
|
|
7
7
|
obs4REF/ECMWF/ERA-20C/mon/ts/gn/v20210727/ts_mon_ERA-20C_PCMDI_gn_190001-201012.nc md5:9ed8dfbb805ed4caa282ed70f873a3a0
|
|
8
|
-
obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200701-200712.nc md5:695633a2b401cfb66c8addbf58073dbc
|
|
9
|
-
obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200801-200812.nc md5:404f1e1f111859be06c00bcb8d740ff2
|
|
10
|
-
obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200901-200912.nc md5:a1bb8584d60cdd71154c01a692fa1fb4
|
|
11
|
-
obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201001-201012.nc md5:b78016a3c61d99dc0fd29563aa344ca1
|
|
12
|
-
obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201101-201112.nc md5:d64c231a7f798a255997ffe196613ea1
|
|
13
|
-
obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201201-201212.nc md5:7d90ce60b872dc4f044b9b0101114983
|
|
14
|
-
obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201301-201312.nc md5:2fc032707cb8a31ac60fa4abe9efe183
|
|
15
|
-
obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201401-201412.nc md5:6022d17e11df7818f5b0429d6e401d17
|
|
16
|
-
obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201501-201512.nc md5:c68fdabf6eeb4813befceace089c9494
|
|
17
8
|
obs4REF/ECMWF/ERA-INT/mon/hfls/gn/v20210727/hfls_mon_ERA-INT_PCMDI_gn_197901-201903.nc md5:1ae4587143f05ee81432b3d9960aab63
|
|
18
9
|
obs4REF/ECMWF/ERA-INT/mon/hfss/gn/v20210727/hfss_mon_ERA-INT_PCMDI_gn_197901-201903.nc md5:261f02b8cbce18486548882a11f9aa34
|
|
19
10
|
obs4REF/ECMWF/ERA-INT/mon/hur/gn/v20210727/hur_mon_ERA-INT_PCMDI_gn_198901-201001.nc md5:56fcd2df8ed2879f18b5e8c78134a148
|
|
@@ -68,16 +68,16 @@ CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/esm-piControl/r1i1p1f1/Amon/tas/gn/v20190815/tas_
|
|
|
68
68
|
CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/esm-piControl/r1i1p1f1/Amon/tas/gn/v20190815/tas_Amon_MPI-ESM1-2-LR_esm-piControl_r1i1p1f1_gn_189001-190912.nc 5ce6e74fb80748e34a567b2895f029131c5980a292c744fbbf555c2235afe77f
|
|
69
69
|
CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/esm-piControl/r1i1p1f1/Amon/tas/gn/v20190815/tas_Amon_MPI-ESM1-2-LR_esm-piControl_r1i1p1f1_gn_191001-191512.nc f4a83f01af6563a63f43e4497ba0ea6e90297fb076fbcc8d63ac0105e6450ab5
|
|
70
70
|
CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/esm-piControl/r1i1p1f1/fx/areacella/gn/v20190815/areacella_fx_MPI-ESM1-2-LR_esm-piControl_r1i1p1f1_gn.nc b67f7d92ee13d5f0fabc5397e8ba5743f11cb062fd2f761e42ae5ac8438e69a4
|
|
71
|
-
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/cli/gn/v20190308/
|
|
72
|
-
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/clivi/gn/v20190308/
|
|
73
|
-
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/clt/gn/v20190308/
|
|
74
|
-
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/clwvi/gn/v20190308/
|
|
75
|
-
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/pr/gn/v20190401/
|
|
76
|
-
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/rlut/gn/v20190308/
|
|
77
|
-
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/rlutcs/gn/v20190308/
|
|
78
|
-
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/rsut/gn/v20190308/
|
|
79
|
-
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/rsutcs/gn/v20190308/
|
|
80
|
-
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/ta/gn/v20190308/
|
|
71
|
+
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/cli/gn/v20190308/cli_Amon_CESM2_historical_r1i1p1f1_gn_199601-201412.nc a3cf533720f63ad0cf7ae6668649df4b43169dfee8783ffea7889c285c6df925
|
|
72
|
+
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/clivi/gn/v20190308/clivi_Amon_CESM2_historical_r1i1p1f1_gn_199601-201412.nc 564c22fa5c94fe257a0bf613d4674b69e505b3c2967e69d0cf529654256bb5fd
|
|
73
|
+
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/clt/gn/v20190308/clt_Amon_CESM2_historical_r1i1p1f1_gn_199601-201412.nc bb1e70874e7589f98ed6c9a02bc0a0612050ca373bc66670ae404e0a2d0a138b
|
|
74
|
+
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/clwvi/gn/v20190308/clwvi_Amon_CESM2_historical_r1i1p1f1_gn_199601-201412.nc 7194d9f605e11f4f54ca93e08c045f3889f8d8d63ecebe40a78cf07913647d7a
|
|
75
|
+
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/pr/gn/v20190401/pr_Amon_CESM2_historical_r1i1p1f1_gn_199601-201412.nc cb5c48b389bcce3af009b32a7100c5669da0f79bd2058ebc3711489ca7ccbfb7
|
|
76
|
+
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/rlut/gn/v20190308/rlut_Amon_CESM2_historical_r1i1p1f1_gn_199601-201412.nc 948760afc79c7f9401d5ed1b94bded8919aed9297dc672f45917b2f9e0228973
|
|
77
|
+
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/rlutcs/gn/v20190308/rlutcs_Amon_CESM2_historical_r1i1p1f1_gn_199601-201412.nc 855961882d420cc2ddecd573c6b64c027c822402d57c3d157832fef42de0247b
|
|
78
|
+
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/rsut/gn/v20190308/rsut_Amon_CESM2_historical_r1i1p1f1_gn_199601-201412.nc 7f3cea7e273ad8b593b00fd0bee865949f20a11bf468df4ac91ca80657e8e37d
|
|
79
|
+
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/rsutcs/gn/v20190308/rsutcs_Amon_CESM2_historical_r1i1p1f1_gn_199601-201412.nc 0f7cd2a564cbc6382e35d4bbbd67fed3b4c337f8a926dd56222e1fbc7b77380d
|
|
80
|
+
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/ta/gn/v20190308/ta_Amon_CESM2_historical_r1i1p1f1_gn_199601-201412.nc c3cab5bbcd4cbf3563271e4e8b634b6a849182ae391e5dde21865ec7b22061ba
|
|
81
81
|
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/ImonAnt/snc/gn/v20190308/snc_ImonAnt_CESM2_historical_r1i1p1f1_gn_200001-201412.nc 3c933b6aaf471b170d6498fed4e01fa73e45169cb8e0790ed70051b69107482f
|
|
82
82
|
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/ImonGre/snc/gn/v20190308/snc_ImonGre_CESM2_historical_r1i1p1f1_gn_200001-201412.nc 6b6d84052cd6283663a60f416cc8e80cccfa0f4a2c963b5256ad208f567bbfde
|
|
83
83
|
CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/LImon/snc/gn/v20190308/snc_LImon_CESM2_historical_r1i1p1f1_gn_200001-201412.nc b0f91e0eb9eef40ffa0ff6b57b8016bf2d3c25e6b3fdc8d12a8aca4d27e692a3
|
|
@@ -216,15 +216,6 @@ obs4REF/obs4REF/ColumbiaU/WECANN-1-0/mon/hfls/gn/20250516/hfls_mon_WECANN-1-0_RE
|
|
|
216
216
|
obs4REF/obs4REF/ColumbiaU/WECANN-1-0/mon/hfss/gn/20250516/hfss_mon_WECANN-1-0_REF_gn_200701-201512.nc 14bdeae9e0b4b7bfe849c97dbdd29eae87f27d9464e8b3795d815369b13ffd0c
|
|
217
217
|
obs4REF/obs4REF/ECMWF/ERA-20C/mon/psl/gn/v20210727/psl_mon_ERA-20C_PCMDI_gn_190001-201012.nc 53262d8f9076f233399d149810a644464d3bb36ae0f131fd55f164bc623b78da
|
|
218
218
|
obs4REF/obs4REF/ECMWF/ERA-20C/mon/ts/gn/v20210727/ts_mon_ERA-20C_PCMDI_gn_190001-201012.nc 95bf8da4b8a071464688b527e822724c33c2794d100052eb12eb2804219ddb94
|
|
219
|
-
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200701-200712.nc 36bd5cbda06258fb6aafd9fb2ccb79b4d08574116a6ebe8ccc48b6462bdb6419
|
|
220
|
-
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200801-200812.nc 9b7802f845ca67f6b4d4bd0a73e0bce1c5042ecf3e7b209a5e470fd084ead238
|
|
221
|
-
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200901-200912.nc 208a988bc440699beda1738342e7571c28dd2c3b2d169e0770c1764996bd41a4
|
|
222
|
-
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201001-201012.nc 3bfb4dec6966cea160af4ce872302af4d84ee2bd8bd3bba91468a424e17d9eae
|
|
223
|
-
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201101-201112.nc da16b7d20e764e25af3c6b834376bed5041872a0b11fab59234eca5cf1124495
|
|
224
|
-
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201201-201212.nc 08ae50141a576dfcbba0a9cf15a32653f48fa88d58406b60d21383e50dd309f0
|
|
225
|
-
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201301-201312.nc 488e55c4f6c858301abb957a5fb7de866e93fa54b234dbce08df652fad634649
|
|
226
|
-
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201401-201412.nc 9c5c4656b929d1c6dba5d83d5459db61d7d543182e58e29168eacdb7f151b125
|
|
227
|
-
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201501-201512.nc 98e254f10b15c4d90dd258f66b8352f6e8b758f9bd64f435c90cb3bdd99c7086
|
|
228
219
|
obs4REF/obs4REF/ECMWF/ERA-INT/mon/hfls/gn/v20210727/hfls_mon_ERA-INT_PCMDI_gn_197901-201903.nc 50d2b48789dcd642641b30ab52cc0f3ad161c057220cda52788080b2be2b927e
|
|
229
220
|
obs4REF/obs4REF/ECMWF/ERA-INT/mon/hfss/gn/v20210727/hfss_mon_ERA-INT_PCMDI_gn_197901-201903.nc 72f15a671e88cb0ec239af9e8c1a608bdf2837c884efde9721213481bcfa02a0
|
|
230
221
|
obs4REF/obs4REF/ECMWF/ERA-INT/mon/hur/gn/v20210727/hur_mon_ERA-INT_PCMDI_gn_198901-201001.nc 54c939a1a461930230a1ae1423856c1929d5dd6bab72cbdad1fe24c5da579908
|
climate_ref/datasets/__init__.py
CHANGED
|
@@ -25,15 +25,15 @@ def get_dataset_adapter(source_type: str, **kwargs: Any) -> "DatasetAdapter":
|
|
|
25
25
|
DatasetAdapter instance
|
|
26
26
|
"""
|
|
27
27
|
if source_type.lower() == SourceDatasetType.CMIP6.value:
|
|
28
|
-
from climate_ref.datasets.cmip6 import CMIP6DatasetAdapter
|
|
28
|
+
from climate_ref.datasets.cmip6 import CMIP6DatasetAdapter # noqa: PLC0415
|
|
29
29
|
|
|
30
30
|
return CMIP6DatasetAdapter(**kwargs)
|
|
31
31
|
elif source_type.lower() == SourceDatasetType.obs4MIPs.value.lower():
|
|
32
|
-
from climate_ref.datasets.obs4mips import Obs4MIPsDatasetAdapter
|
|
32
|
+
from climate_ref.datasets.obs4mips import Obs4MIPsDatasetAdapter # noqa: PLC0415
|
|
33
33
|
|
|
34
34
|
return Obs4MIPsDatasetAdapter(**kwargs)
|
|
35
35
|
elif source_type.lower() == SourceDatasetType.PMPClimatology.value.lower():
|
|
36
|
-
from climate_ref.datasets.pmp_climatology import PMPClimatologyDatasetAdapter
|
|
36
|
+
from climate_ref.datasets.pmp_climatology import PMPClimatologyDatasetAdapter # noqa: PLC0415
|
|
37
37
|
|
|
38
38
|
return PMPClimatologyDatasetAdapter(**kwargs)
|
|
39
39
|
else:
|
climate_ref/datasets/base.py
CHANGED
|
@@ -2,16 +2,36 @@ from pathlib import Path
|
|
|
2
2
|
from typing import Any, Protocol, cast
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
|
+
from attrs import define
|
|
5
6
|
from loguru import logger
|
|
6
7
|
from sqlalchemy.orm import joinedload
|
|
7
8
|
|
|
8
9
|
from climate_ref.config import Config
|
|
9
|
-
from climate_ref.database import Database
|
|
10
|
+
from climate_ref.database import Database, ModelState
|
|
10
11
|
from climate_ref.datasets.utils import validate_path
|
|
11
12
|
from climate_ref.models.dataset import Dataset, DatasetFile
|
|
12
13
|
from climate_ref_core.exceptions import RefException
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
@define
|
|
17
|
+
class DatasetRegistrationResult:
|
|
18
|
+
"""
|
|
19
|
+
Result of registering a dataset, containing information about file changes
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
dataset: Dataset
|
|
23
|
+
dataset_state: ModelState | None
|
|
24
|
+
files_added: list[str]
|
|
25
|
+
files_updated: list[str]
|
|
26
|
+
files_removed: list[str]
|
|
27
|
+
files_unchanged: list[str]
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def total_changes(self) -> int:
|
|
31
|
+
"""Total number of file changes (added + updated + removed)"""
|
|
32
|
+
return len(self.files_added) + len(self.files_updated) + len(self.files_removed)
|
|
33
|
+
|
|
34
|
+
|
|
15
35
|
def _log_duplicate_metadata(
|
|
16
36
|
data_catalog: pd.DataFrame, unique_metadata: pd.DataFrame, slug_column: str
|
|
17
37
|
) -> None:
|
|
@@ -26,7 +46,8 @@ def _log_duplicate_metadata(
|
|
|
26
46
|
invalid_dataset_columns = invalid_dataset_nunique[invalid_dataset_nunique.gt(1)].index.tolist()
|
|
27
47
|
|
|
28
48
|
# Include time_range in the list of invalid columns to make debugging easier
|
|
29
|
-
|
|
49
|
+
if "time_range" in data_catalog.columns and "time_range" not in invalid_dataset_columns:
|
|
50
|
+
invalid_dataset_columns.append("time_range")
|
|
30
51
|
|
|
31
52
|
data_catalog_subset = data_catalog[data_catalog[slug_column] == instance_id]
|
|
32
53
|
|
|
@@ -169,9 +190,9 @@ class DatasetAdapter(Protocol):
|
|
|
169
190
|
|
|
170
191
|
return data_catalog
|
|
171
192
|
|
|
172
|
-
def register_dataset(
|
|
193
|
+
def register_dataset( # noqa: PLR0915
|
|
173
194
|
self, config: Config, db: Database, data_catalog_dataset: pd.DataFrame
|
|
174
|
-
) ->
|
|
195
|
+
) -> DatasetRegistrationResult:
|
|
175
196
|
"""
|
|
176
197
|
Register a dataset in the database using the data catalog
|
|
177
198
|
|
|
@@ -187,7 +208,7 @@ class DatasetAdapter(Protocol):
|
|
|
187
208
|
Returns
|
|
188
209
|
-------
|
|
189
210
|
:
|
|
190
|
-
|
|
211
|
+
Registration result with dataset and file change information
|
|
191
212
|
"""
|
|
192
213
|
DatasetModel = self.dataset_cls
|
|
193
214
|
|
|
@@ -197,24 +218,104 @@ class DatasetAdapter(Protocol):
|
|
|
197
218
|
raise RefException(f"Found multiple datasets in the same directory: {unique_slugs}")
|
|
198
219
|
slug = unique_slugs[0]
|
|
199
220
|
|
|
221
|
+
# Upsert the dataset (create a new dataset or update the metadata)
|
|
200
222
|
dataset_metadata = data_catalog_dataset[list(self.dataset_specific_metadata)].iloc[0].to_dict()
|
|
201
|
-
dataset,
|
|
202
|
-
if
|
|
203
|
-
logger.
|
|
204
|
-
|
|
223
|
+
dataset, dataset_state = db.update_or_create(DatasetModel, defaults=dataset_metadata, slug=slug)
|
|
224
|
+
if dataset_state == ModelState.CREATED:
|
|
225
|
+
logger.info(f"Created new dataset: {dataset}")
|
|
226
|
+
elif dataset_state == ModelState.UPDATED:
|
|
227
|
+
logger.info(f"Updating existing dataset: {dataset}")
|
|
205
228
|
db.session.flush()
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
229
|
+
|
|
230
|
+
# Initialize result tracking
|
|
231
|
+
files_added = []
|
|
232
|
+
files_updated = []
|
|
233
|
+
files_removed = []
|
|
234
|
+
files_unchanged = []
|
|
235
|
+
|
|
236
|
+
# Get current files for this dataset
|
|
237
|
+
current_files = db.session.query(DatasetFile).filter_by(dataset_id=dataset.id).all()
|
|
238
|
+
current_file_paths = {f.path: f for f in current_files}
|
|
239
|
+
|
|
240
|
+
# Get new file data from data catalog
|
|
241
|
+
new_file_data = data_catalog_dataset.to_dict(orient="records")
|
|
242
|
+
new_file_lookup = {}
|
|
243
|
+
for dataset_file in new_file_data:
|
|
244
|
+
file_path = str(validate_path(dataset_file["path"]))
|
|
245
|
+
new_file_lookup[file_path] = {
|
|
246
|
+
"start_time": dataset_file["start_time"],
|
|
247
|
+
"end_time": dataset_file["end_time"],
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
new_file_paths = set(new_file_lookup.keys())
|
|
251
|
+
existing_file_paths = set(current_file_paths.keys())
|
|
252
|
+
|
|
253
|
+
# TODO: support removing files that are no longer present
|
|
254
|
+
# We want to keep a record of the dataset if it was used by a diagnostic in the past
|
|
255
|
+
files_to_remove = existing_file_paths - new_file_paths
|
|
256
|
+
if files_to_remove:
|
|
257
|
+
files_removed = list(files_to_remove)
|
|
258
|
+
logger.warning(f"Files to remove: {files_removed}")
|
|
259
|
+
raise NotImplementedError("Removing files is not yet supported")
|
|
260
|
+
|
|
261
|
+
# Update existing files if start/end times have changed
|
|
262
|
+
for file_path, existing_file in current_file_paths.items():
|
|
263
|
+
if file_path in new_file_lookup:
|
|
264
|
+
new_times = new_file_lookup[file_path]
|
|
265
|
+
if (
|
|
266
|
+
existing_file.start_time != new_times["start_time"]
|
|
267
|
+
or existing_file.end_time != new_times["end_time"]
|
|
268
|
+
):
|
|
269
|
+
logger.warning(f"Updating file times for {file_path}")
|
|
270
|
+
existing_file.start_time = new_times["start_time"]
|
|
271
|
+
existing_file.end_time = new_times["end_time"]
|
|
272
|
+
files_updated.append(file_path)
|
|
273
|
+
else:
|
|
274
|
+
files_unchanged.append(file_path)
|
|
275
|
+
|
|
276
|
+
# Add new files (batch operation)
|
|
277
|
+
files_to_add = new_file_paths - existing_file_paths
|
|
278
|
+
if files_to_add:
|
|
279
|
+
files_added = list(files_to_add)
|
|
280
|
+
new_dataset_files = []
|
|
281
|
+
for file_path in files_to_add:
|
|
282
|
+
file_times = new_file_lookup[file_path]
|
|
283
|
+
new_dataset_files.append(
|
|
284
|
+
DatasetFile(
|
|
285
|
+
path=file_path,
|
|
286
|
+
dataset_id=dataset.id,
|
|
287
|
+
start_time=file_times["start_time"],
|
|
288
|
+
end_time=file_times["end_time"],
|
|
289
|
+
)
|
|
215
290
|
)
|
|
216
|
-
)
|
|
217
|
-
|
|
291
|
+
db.session.add_all(new_dataset_files)
|
|
292
|
+
|
|
293
|
+
# Determine final dataset state
|
|
294
|
+
# If dataset metadata changed, use that state
|
|
295
|
+
# If no metadata changed but files changed, consider it updated
|
|
296
|
+
# If nothing changed, keep the original state (None for existing, CREATED for new)
|
|
297
|
+
final_dataset_state = dataset_state
|
|
298
|
+
if dataset_state is None and (files_added or files_updated or files_removed):
|
|
299
|
+
final_dataset_state = ModelState.UPDATED
|
|
300
|
+
|
|
301
|
+
result = DatasetRegistrationResult(
|
|
302
|
+
dataset=dataset,
|
|
303
|
+
dataset_state=final_dataset_state,
|
|
304
|
+
files_added=files_added,
|
|
305
|
+
files_updated=files_updated,
|
|
306
|
+
files_removed=files_removed,
|
|
307
|
+
files_unchanged=files_unchanged,
|
|
308
|
+
)
|
|
309
|
+
change_message = f": ({final_dataset_state.name})" if final_dataset_state else ""
|
|
310
|
+
logger.debug(
|
|
311
|
+
f"Dataset registration complete for {dataset.slug}{change_message} "
|
|
312
|
+
f"{len(files_added)} files added, "
|
|
313
|
+
f"{len(files_updated)} files updated, "
|
|
314
|
+
f"{len(files_removed)} files removed, "
|
|
315
|
+
f"{len(files_unchanged)} files unchanged"
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
return result
|
|
218
319
|
|
|
219
320
|
def _get_dataset_files(self, db: Database, limit: int | None = None) -> pd.DataFrame:
|
|
220
321
|
dataset_type = self.dataset_cls.__mapper_args__["polymorphic_identity"]
|
climate_ref/datasets/cmip6.py
CHANGED
|
@@ -119,6 +119,8 @@ class CMIP6DatasetAdapter(DatasetAdapter):
|
|
|
119
119
|
file_specific_metadata = ("start_time", "end_time", "path")
|
|
120
120
|
|
|
121
121
|
version_metadata = "version"
|
|
122
|
+
# See https://wcrp-cmip.github.io/WGCM_Infrastructure_Panel/Papers/CMIP6_global_attributes_filenames_CVs_v6.2.7.pdf
|
|
123
|
+
# under "Directory structure template"
|
|
122
124
|
dataset_id_metadata = (
|
|
123
125
|
"activity_id",
|
|
124
126
|
"institution_id",
|
climate_ref/datasets/obs4mips.py
CHANGED
|
@@ -7,7 +7,6 @@ from typing import Any
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
import xarray as xr
|
|
9
9
|
from ecgtools import Builder
|
|
10
|
-
from ecgtools.parsers.utilities import extract_attr_with_regex # type: ignore
|
|
11
10
|
from loguru import logger
|
|
12
11
|
|
|
13
12
|
from climate_ref.datasets.base import DatasetAdapter
|
|
@@ -15,7 +14,7 @@ from climate_ref.datasets.cmip6 import _parse_datetime
|
|
|
15
14
|
from climate_ref.models.dataset import Dataset, Obs4MIPsDataset
|
|
16
15
|
|
|
17
16
|
|
|
18
|
-
def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
|
|
17
|
+
def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]: # noqa: PLR0912
|
|
19
18
|
"""
|
|
20
19
|
Parser for obs4mips
|
|
21
20
|
|
|
@@ -41,6 +40,7 @@ def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
|
|
|
41
40
|
"source_type",
|
|
42
41
|
"variable_id",
|
|
43
42
|
"variant_label",
|
|
43
|
+
"source_version_number",
|
|
44
44
|
}
|
|
45
45
|
)
|
|
46
46
|
)
|
|
@@ -48,6 +48,10 @@ def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
|
|
|
48
48
|
try:
|
|
49
49
|
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
|
|
50
50
|
with xr.open_dataset(file, chunks={}, decode_times=time_coder) as ds:
|
|
51
|
+
if ds.attrs.get("activity_id", "") != "obs4MIPs":
|
|
52
|
+
traceback_message = f"{file} is not an obs4MIPs dataset"
|
|
53
|
+
raise TypeError(traceback_message)
|
|
54
|
+
|
|
51
55
|
has_none_value = any(ds.attrs.get(key) is None for key in keys)
|
|
52
56
|
if has_none_value:
|
|
53
57
|
missing_fields = [key for key in keys if ds.attrs.get(key) is None]
|
|
@@ -55,10 +59,6 @@ def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
|
|
|
55
59
|
raise AttributeError(traceback_message)
|
|
56
60
|
info = {key: ds.attrs.get(key) for key in keys}
|
|
57
61
|
|
|
58
|
-
if info["activity_id"] != "obs4MIPs":
|
|
59
|
-
traceback_message = f"{file} is not an obs4MIPs dataset"
|
|
60
|
-
raise TypeError(traceback_message)
|
|
61
|
-
|
|
62
62
|
variable_id = info["variable_id"]
|
|
63
63
|
|
|
64
64
|
if variable_id:
|
|
@@ -86,12 +86,12 @@ def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
|
|
|
86
86
|
else:
|
|
87
87
|
info["time_range"] = f"{start_time}-{end_time}"
|
|
88
88
|
info["path"] = str(file)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
89
|
+
# Parsing the version like for CMIP6 fails because some obs4REF paths
|
|
90
|
+
# do not include "v" in the version directory name.
|
|
91
|
+
# TODO: fix obs4REF paths
|
|
92
|
+
info["version"] = Path(file).parent.name
|
|
93
|
+
if not info["version"].startswith("v"): # type: ignore[union-attr]
|
|
94
|
+
info["version"] = "v{version}".format(**info)
|
|
95
95
|
return info
|
|
96
96
|
|
|
97
97
|
except (TypeError, AttributeError) as err:
|
|
@@ -99,7 +99,7 @@ def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
|
|
|
99
99
|
logger.warning(str(err.args[0]))
|
|
100
100
|
else:
|
|
101
101
|
logger.warning(str(err.args))
|
|
102
|
-
return {"INVALID_ASSET": file, "TRACEBACK":
|
|
102
|
+
return {"INVALID_ASSET": file, "TRACEBACK": str(err)}
|
|
103
103
|
except Exception:
|
|
104
104
|
logger.warning(traceback.format_exc())
|
|
105
105
|
return {"INVALID_ASSET": file, "TRACEBACK": traceback.format_exc()}
|
|
@@ -129,18 +129,22 @@ class Obs4MIPsDatasetAdapter(DatasetAdapter):
|
|
|
129
129
|
"variant_label",
|
|
130
130
|
"long_name",
|
|
131
131
|
"units",
|
|
132
|
+
"version",
|
|
132
133
|
"vertical_levels",
|
|
133
134
|
"source_version_number",
|
|
134
135
|
slug_column,
|
|
135
136
|
)
|
|
136
137
|
|
|
137
138
|
file_specific_metadata = ("start_time", "end_time", "path")
|
|
138
|
-
version_metadata = "
|
|
139
|
+
version_metadata = "version"
|
|
140
|
+
# See ODS2.5 at https://doi.org/10.5281/zenodo.11500474 under "Directory structure template"
|
|
139
141
|
dataset_id_metadata = (
|
|
140
142
|
"activity_id",
|
|
141
143
|
"institution_id",
|
|
142
144
|
"source_id",
|
|
145
|
+
"frequency",
|
|
143
146
|
"variable_id",
|
|
147
|
+
"nominal_resolution",
|
|
144
148
|
"grid_label",
|
|
145
149
|
)
|
|
146
150
|
|
|
@@ -186,7 +190,14 @@ class Obs4MIPsDatasetAdapter(DatasetAdapter):
|
|
|
186
190
|
self.version_metadata,
|
|
187
191
|
]
|
|
188
192
|
datasets["instance_id"] = datasets.apply(
|
|
189
|
-
lambda row: "obs4MIPs."
|
|
193
|
+
lambda row: "obs4MIPs."
|
|
194
|
+
+ ".".join(
|
|
195
|
+
[
|
|
196
|
+
row[item].replace(" ", "") if item == "nominal_resolution" else row[item]
|
|
197
|
+
for item in drs_items
|
|
198
|
+
]
|
|
199
|
+
),
|
|
200
|
+
axis=1,
|
|
190
201
|
)
|
|
191
202
|
datasets["finalised"] = True
|
|
192
203
|
return datasets
|