anemoi-datasets 0.4.5__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/create.py +3 -2
- anemoi/datasets/commands/inspect.py +1 -1
- anemoi/datasets/commands/publish.py +30 -0
- anemoi/datasets/create/__init__.py +72 -35
- anemoi/datasets/create/check.py +6 -0
- anemoi/datasets/create/config.py +4 -3
- anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
- anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
- anemoi/datasets/create/functions/filters/rename.py +2 -3
- anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
- anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
- anemoi/datasets/create/functions/sources/__init__.py +7 -1
- anemoi/datasets/create/functions/sources/accumulations.py +2 -0
- anemoi/datasets/create/functions/sources/grib.py +87 -2
- anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
- anemoi/datasets/create/functions/sources/mars.py +9 -3
- anemoi/datasets/create/functions/sources/xarray/__init__.py +6 -1
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +6 -1
- anemoi/datasets/create/functions/sources/xarray/field.py +20 -5
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +16 -16
- anemoi/datasets/create/functions/sources/xarray/flavour.py +126 -12
- anemoi/datasets/create/functions/sources/xarray/grid.py +106 -17
- anemoi/datasets/create/functions/sources/xarray/metadata.py +6 -12
- anemoi/datasets/create/functions/sources/xarray/time.py +1 -5
- anemoi/datasets/create/functions/sources/xarray/variable.py +10 -10
- anemoi/datasets/create/input/__init__.py +69 -0
- anemoi/datasets/create/input/action.py +123 -0
- anemoi/datasets/create/input/concat.py +92 -0
- anemoi/datasets/create/input/context.py +59 -0
- anemoi/datasets/create/input/data_sources.py +71 -0
- anemoi/datasets/create/input/empty.py +42 -0
- anemoi/datasets/create/input/filter.py +76 -0
- anemoi/datasets/create/input/function.py +122 -0
- anemoi/datasets/create/input/join.py +57 -0
- anemoi/datasets/create/input/misc.py +85 -0
- anemoi/datasets/create/input/pipe.py +33 -0
- anemoi/datasets/create/input/repeated_dates.py +217 -0
- anemoi/datasets/create/input/result.py +413 -0
- anemoi/datasets/create/input/step.py +99 -0
- anemoi/datasets/create/{template.py → input/template.py} +0 -42
- anemoi/datasets/create/persistent.py +1 -1
- anemoi/datasets/create/statistics/__init__.py +1 -1
- anemoi/datasets/create/utils.py +3 -0
- anemoi/datasets/create/zarr.py +4 -2
- anemoi/datasets/data/dataset.py +11 -1
- anemoi/datasets/data/debug.py +5 -1
- anemoi/datasets/data/masked.py +2 -2
- anemoi/datasets/data/rescale.py +147 -0
- anemoi/datasets/data/stores.py +20 -7
- anemoi/datasets/dates/__init__.py +113 -30
- anemoi/datasets/dates/groups.py +92 -19
- anemoi/datasets/fields.py +66 -0
- anemoi/datasets/utils/fields.py +47 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/METADATA +10 -19
- anemoi_datasets-0.5.5.dist-info/RECORD +121 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/input.py +0 -1065
- anemoi_datasets-0.4.5.dist-info/RECORD +0 -96
- /anemoi/datasets/create/{trace.py → input/trace.py} +0 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/top_level.txt +0 -0
anemoi/datasets/_version.py
CHANGED
|
@@ -19,7 +19,7 @@ def task(what, options, *args, **kwargs):
|
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
now = datetime.datetime.now()
|
|
22
|
-
LOG.info(f"Task {what}({args},{kwargs}) starting")
|
|
22
|
+
LOG.info(f"🎬 Task {what}({args},{kwargs}) starting")
|
|
23
23
|
|
|
24
24
|
from anemoi.datasets.create import creator_factory
|
|
25
25
|
|
|
@@ -28,7 +28,7 @@ def task(what, options, *args, **kwargs):
|
|
|
28
28
|
c = creator_factory(what.replace("-", "_"), **options)
|
|
29
29
|
result = c.run()
|
|
30
30
|
|
|
31
|
-
LOG.
|
|
31
|
+
LOG.info(f"🏁 Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
|
|
32
32
|
return result
|
|
33
33
|
|
|
34
34
|
|
|
@@ -57,6 +57,7 @@ class Create(Command):
|
|
|
57
57
|
command_parser.add_argument("--trace", action="store_true")
|
|
58
58
|
|
|
59
59
|
def run(self, args):
|
|
60
|
+
|
|
60
61
|
now = time.time()
|
|
61
62
|
if args.threads + args.processes:
|
|
62
63
|
self.parallel_create(args)
|
|
@@ -311,7 +311,7 @@ class Version:
|
|
|
311
311
|
print(f"🕰️ Dataset initialized {when(start)}.")
|
|
312
312
|
if built and latest:
|
|
313
313
|
speed = (latest - start) / built
|
|
314
|
-
eta = datetime.datetime.
|
|
314
|
+
eta = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None) + speed * (total - built)
|
|
315
315
|
print(f"🏁 ETA {when(eta)}.")
|
|
316
316
|
else:
|
|
317
317
|
if latest:
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from . import Command
|
|
4
|
+
|
|
5
|
+
LOG = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Publish(Command):
|
|
9
|
+
"""Publish a dataset."""
|
|
10
|
+
|
|
11
|
+
# This is a command that is used to publish a dataset.
|
|
12
|
+
# it is a class, inheriting from Command.
|
|
13
|
+
|
|
14
|
+
internal = True
|
|
15
|
+
timestamp = True
|
|
16
|
+
|
|
17
|
+
def add_arguments(self, parser):
|
|
18
|
+
parser.add_argument("path", help="Path of the dataset to publish.")
|
|
19
|
+
|
|
20
|
+
def run(self, args):
|
|
21
|
+
try:
|
|
22
|
+
from anemoi.registry import publish_dataset
|
|
23
|
+
except ImportError:
|
|
24
|
+
LOG.error("anemoi-registry is not installed. Please install it to use this command.")
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
publish_dataset(args.path)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
command = Publish
|
|
@@ -14,6 +14,7 @@ import os
|
|
|
14
14
|
import time
|
|
15
15
|
import uuid
|
|
16
16
|
import warnings
|
|
17
|
+
from copy import deepcopy
|
|
17
18
|
from functools import cached_property
|
|
18
19
|
|
|
19
20
|
import numpy as np
|
|
@@ -24,9 +25,11 @@ from anemoi.utils.dates import frequency_to_string
|
|
|
24
25
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
25
26
|
from anemoi.utils.humanize import compress_dates
|
|
26
27
|
from anemoi.utils.humanize import seconds_to_human
|
|
28
|
+
from earthkit.data.core.order import build_remapping
|
|
27
29
|
|
|
28
30
|
from anemoi.datasets import MissingDateError
|
|
29
31
|
from anemoi.datasets import open_dataset
|
|
32
|
+
from anemoi.datasets.create.input.trace import enable_trace
|
|
30
33
|
from anemoi.datasets.create.persistent import build_storage
|
|
31
34
|
from anemoi.datasets.data.misc import as_first_date
|
|
32
35
|
from anemoi.datasets.data.misc import as_last_date
|
|
@@ -132,7 +135,7 @@ class Dataset:
|
|
|
132
135
|
v = v.isoformat()
|
|
133
136
|
z.attrs[k] = json.loads(json.dumps(v, default=json_tidy))
|
|
134
137
|
|
|
135
|
-
@
|
|
138
|
+
@cached_property
|
|
136
139
|
def anemoi_dataset(self):
|
|
137
140
|
return open_dataset(self.path)
|
|
138
141
|
|
|
@@ -245,9 +248,9 @@ class Actor: # TODO: rename to Creator
|
|
|
245
248
|
missing_dates = z.attrs.get("missing_dates", [])
|
|
246
249
|
missing_dates = sorted([np.datetime64(d) for d in missing_dates])
|
|
247
250
|
if missing_dates != expected:
|
|
248
|
-
LOG.
|
|
249
|
-
LOG.
|
|
250
|
-
LOG.
|
|
251
|
+
LOG.warning("Missing dates given in recipe do not match the actual missing dates in the dataset.")
|
|
252
|
+
LOG.warning(f"Missing dates in recipe: {sorted(str(x) for x in missing_dates)}")
|
|
253
|
+
LOG.warning(f"Missing dates in dataset: {sorted(str(x) for x in expected)}")
|
|
251
254
|
raise ValueError("Missing dates given in recipe do not match the actual missing dates in the dataset.")
|
|
252
255
|
|
|
253
256
|
check_missing_dates(self.missing_dates)
|
|
@@ -308,7 +311,6 @@ class HasElementForDataMixin:
|
|
|
308
311
|
|
|
309
312
|
|
|
310
313
|
def build_input_(main_config, output_config):
|
|
311
|
-
from earthkit.data.core.order import build_remapping
|
|
312
314
|
|
|
313
315
|
builder = build_input(
|
|
314
316
|
main_config.input,
|
|
@@ -323,11 +325,48 @@ def build_input_(main_config, output_config):
|
|
|
323
325
|
return builder
|
|
324
326
|
|
|
325
327
|
|
|
328
|
+
def tidy_recipe(config: object):
|
|
329
|
+
"""Remove potentially private information in the config"""
|
|
330
|
+
config = deepcopy(config)
|
|
331
|
+
if isinstance(config, (tuple, list)):
|
|
332
|
+
return [tidy_recipe(_) for _ in config]
|
|
333
|
+
if isinstance(config, (dict, DotDict)):
|
|
334
|
+
for k, v in config.items():
|
|
335
|
+
if k.startswith("_"):
|
|
336
|
+
config[k] = "*** REMOVED FOR SECURITY ***"
|
|
337
|
+
else:
|
|
338
|
+
config[k] = tidy_recipe(v)
|
|
339
|
+
if isinstance(config, str):
|
|
340
|
+
if config.startswith("_"):
|
|
341
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
342
|
+
if config.startswith("s3://"):
|
|
343
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
344
|
+
if config.startswith("gs://"):
|
|
345
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
346
|
+
if config.startswith("http"):
|
|
347
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
348
|
+
if config.startswith("ftp"):
|
|
349
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
350
|
+
if config.startswith("file"):
|
|
351
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
352
|
+
if config.startswith("ssh"):
|
|
353
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
354
|
+
if config.startswith("scp"):
|
|
355
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
356
|
+
if config.startswith("rsync"):
|
|
357
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
358
|
+
if config.startswith("/"):
|
|
359
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
360
|
+
if "@" in config:
|
|
361
|
+
return "*** REMOVED FOR SECURITY ***"
|
|
362
|
+
return config
|
|
363
|
+
|
|
364
|
+
|
|
326
365
|
class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
|
|
327
366
|
dataset_class = NewDataset
|
|
328
367
|
def __init__(self, path, config, check_name=False, overwrite=False, use_threads=False, statistics_temp_dir=None, progress=None, test=False, cache=None, **kwargs): # fmt: skip
|
|
329
368
|
if _path_readable(path) and not overwrite:
|
|
330
|
-
raise Exception(f"{
|
|
369
|
+
raise Exception(f"{path} already exists. Use overwrite=True to overwrite.")
|
|
331
370
|
|
|
332
371
|
super().__init__(path, cache=cache)
|
|
333
372
|
self.config = config
|
|
@@ -345,9 +384,12 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
345
384
|
assert isinstance(self.main_config.output.order_by, dict), self.main_config.output.order_by
|
|
346
385
|
self.create_elements(self.main_config)
|
|
347
386
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
387
|
+
LOG.info(f"Groups: {self.groups}")
|
|
388
|
+
|
|
389
|
+
one_date = self.groups.one_date()
|
|
390
|
+
# assert False, (type(one_date), type(self.groups))
|
|
391
|
+
self.minimal_input = self.input.select(one_date)
|
|
392
|
+
LOG.info(f"Minimal input for 'init' step (using only the first date) : {one_date}")
|
|
351
393
|
LOG.info(self.minimal_input)
|
|
352
394
|
|
|
353
395
|
def run(self):
|
|
@@ -363,13 +405,15 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
363
405
|
LOG.info("Config loaded ok:")
|
|
364
406
|
# LOG.info(self.main_config)
|
|
365
407
|
|
|
366
|
-
dates = self.groups.
|
|
367
|
-
frequency =
|
|
408
|
+
dates = self.groups.provider.values
|
|
409
|
+
frequency = self.groups.provider.frequency
|
|
410
|
+
missing = self.groups.provider.missing
|
|
411
|
+
|
|
368
412
|
assert isinstance(frequency, datetime.timedelta), frequency
|
|
369
413
|
|
|
370
414
|
LOG.info(f"Found {len(dates)} datetimes.")
|
|
371
415
|
LOG.info(f"Dates: Found {len(dates)} datetimes, in {len(self.groups)} groups: ")
|
|
372
|
-
LOG.info(f"Missing dates: {len(
|
|
416
|
+
LOG.info(f"Missing dates: {len(missing)}")
|
|
373
417
|
lengths = tuple(len(g) for g in self.groups)
|
|
374
418
|
|
|
375
419
|
variables = self.minimal_input.variables
|
|
@@ -404,6 +448,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
404
448
|
metadata.update(self.main_config.get("add_metadata", {}))
|
|
405
449
|
|
|
406
450
|
metadata["_create_yaml_config"] = self.main_config.get_serialisable_dict()
|
|
451
|
+
metadata["recipe"] = tidy_recipe(self.main_config.get_serialisable_dict())
|
|
407
452
|
|
|
408
453
|
metadata["description"] = self.main_config.description
|
|
409
454
|
metadata["licence"] = self.main_config["licence"]
|
|
@@ -426,7 +471,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
426
471
|
metadata["start_date"] = dates[0].isoformat()
|
|
427
472
|
metadata["end_date"] = dates[-1].isoformat()
|
|
428
473
|
metadata["frequency"] = frequency
|
|
429
|
-
metadata["missing_dates"] = [_.isoformat() for _ in
|
|
474
|
+
metadata["missing_dates"] = [_.isoformat() for _ in missing]
|
|
430
475
|
|
|
431
476
|
metadata["version"] = VERSION
|
|
432
477
|
|
|
@@ -481,17 +526,6 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
481
526
|
|
|
482
527
|
assert chunks == self.dataset.get_zarr_chunks(), (chunks, self.dataset.get_zarr_chunks())
|
|
483
528
|
|
|
484
|
-
def sanity_check_config(a, b):
|
|
485
|
-
a = json.dumps(a, sort_keys=True, default=str)
|
|
486
|
-
b = json.dumps(b, sort_keys=True, default=str)
|
|
487
|
-
b = b.replace("T", " ") # dates are expected to be different because
|
|
488
|
-
if a != b:
|
|
489
|
-
print("❌❌❌ FIXME: Config serialisation to be checked")
|
|
490
|
-
print(a)
|
|
491
|
-
print(b)
|
|
492
|
-
|
|
493
|
-
sanity_check_config(self.main_config, self.dataset.get_main_config())
|
|
494
|
-
|
|
495
529
|
# Return the number of groups to process, so we can show a nice progress bar
|
|
496
530
|
return len(lengths)
|
|
497
531
|
|
|
@@ -527,11 +561,11 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
527
561
|
LOG.info(f" -> Skipping {igroup} total={len(self.groups)} (already done)")
|
|
528
562
|
continue
|
|
529
563
|
|
|
530
|
-
assert isinstance(group[0], datetime.datetime), group
|
|
564
|
+
# assert isinstance(group[0], datetime.datetime), type(group[0])
|
|
531
565
|
LOG.debug(f"Building data for group {igroup}/{self.n_groups}")
|
|
532
566
|
|
|
533
|
-
result = self.input.select(
|
|
534
|
-
assert result.
|
|
567
|
+
result = self.input.select(group_of_dates=group)
|
|
568
|
+
assert result.group_of_dates == group, (len(result.group_of_dates), len(group), group)
|
|
535
569
|
|
|
536
570
|
# There are several groups.
|
|
537
571
|
# There is one result to load for each group.
|
|
@@ -545,7 +579,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
545
579
|
|
|
546
580
|
def load_result(self, result):
|
|
547
581
|
# There is one cube to load for each result.
|
|
548
|
-
dates = result.
|
|
582
|
+
dates = list(result.group_of_dates)
|
|
549
583
|
|
|
550
584
|
cube = result.get_cube()
|
|
551
585
|
shape = cube.extended_user_shape
|
|
@@ -555,7 +589,9 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
555
589
|
|
|
556
590
|
def check_shape(cube, dates, dates_in_data):
|
|
557
591
|
if cube.extended_user_shape[0] != len(dates):
|
|
558
|
-
print(
|
|
592
|
+
print(
|
|
593
|
+
f"Cube shape does not match the number of dates got {cube.extended_user_shape[0]}, expected {len(dates)}"
|
|
594
|
+
)
|
|
559
595
|
print("Requested dates", compress_dates(dates))
|
|
560
596
|
print("Cube dates", compress_dates(dates_in_data))
|
|
561
597
|
|
|
@@ -566,7 +602,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
566
602
|
print("Extra dates", compress_dates(b - a))
|
|
567
603
|
|
|
568
604
|
raise ValueError(
|
|
569
|
-
f"Cube shape does not match the number of dates {cube.extended_user_shape[0]}, {len(dates)}"
|
|
605
|
+
f"Cube shape does not match the number of dates got {cube.extended_user_shape[0]}, expected {len(dates)}"
|
|
570
606
|
)
|
|
571
607
|
|
|
572
608
|
check_shape(cube, dates, dates_in_data)
|
|
@@ -846,7 +882,7 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
|
846
882
|
)
|
|
847
883
|
|
|
848
884
|
if len(ifound) < 2:
|
|
849
|
-
LOG.
|
|
885
|
+
LOG.warning(f"Not enough data found in {self.path} to compute {self.__class__.__name__}. Skipped.")
|
|
850
886
|
self.tmp_storage.delete()
|
|
851
887
|
return
|
|
852
888
|
|
|
@@ -919,7 +955,7 @@ def multi_addition(cls):
|
|
|
919
955
|
self.actors.append(cls(*args, delta=k, **kwargs))
|
|
920
956
|
|
|
921
957
|
if not self.actors:
|
|
922
|
-
LOG.warning("No delta found in kwargs, no
|
|
958
|
+
LOG.warning("No delta found in kwargs, no additions will be computed.")
|
|
923
959
|
|
|
924
960
|
def run(self):
|
|
925
961
|
for actor in self.actors:
|
|
@@ -947,7 +983,9 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
|
|
|
947
983
|
)
|
|
948
984
|
start, end = np.datetime64(start), np.datetime64(end)
|
|
949
985
|
dates = self.dataset.anemoi_dataset.dates
|
|
950
|
-
|
|
986
|
+
|
|
987
|
+
assert type(dates[0]) is type(start), (type(dates[0]), type(start))
|
|
988
|
+
|
|
951
989
|
dates = [d for d in dates if d >= start and d <= end]
|
|
952
990
|
dates = [d for i, d in enumerate(dates) if i not in self.dataset.anemoi_dataset.missing]
|
|
953
991
|
variables = self.dataset.anemoi_dataset.variables
|
|
@@ -956,7 +994,7 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
|
|
|
956
994
|
LOG.info(stats)
|
|
957
995
|
|
|
958
996
|
if not all(self.registry.get_flags(sync=False)):
|
|
959
|
-
raise Exception(f"❗Zarr {self.path} is not fully built, not
|
|
997
|
+
raise Exception(f"❗Zarr {self.path} is not fully built, not writing statistics into dataset.")
|
|
960
998
|
|
|
961
999
|
for k in ["mean", "stdev", "minimum", "maximum", "sums", "squares", "count", "has_nans"]:
|
|
962
1000
|
self.dataset.add_dataset(name=k, array=stats[k], dimensions=("variable",))
|
|
@@ -994,7 +1032,6 @@ def chain(tasks):
|
|
|
994
1032
|
|
|
995
1033
|
def creator_factory(name, trace=None, **kwargs):
|
|
996
1034
|
if trace:
|
|
997
|
-
from anemoi.datasets.create.trace import enable_trace
|
|
998
1035
|
|
|
999
1036
|
enable_trace(trace)
|
|
1000
1037
|
|
anemoi/datasets/create/check.py
CHANGED
|
@@ -140,9 +140,15 @@ class StatisticsValueError(ValueError):
|
|
|
140
140
|
|
|
141
141
|
def check_data_values(arr, *, name: str, log=[], allow_nans=False):
|
|
142
142
|
|
|
143
|
+
shape = arr.shape
|
|
144
|
+
|
|
143
145
|
if (isinstance(allow_nans, (set, list, tuple, dict)) and name in allow_nans) or allow_nans:
|
|
144
146
|
arr = arr[~np.isnan(arr)]
|
|
145
147
|
|
|
148
|
+
if arr.size == 0:
|
|
149
|
+
warnings.warn(f"Empty array for {name} ({shape})")
|
|
150
|
+
return
|
|
151
|
+
|
|
146
152
|
assert arr.size > 0, (name, *log)
|
|
147
153
|
|
|
148
154
|
min, max = arr.min(), arr.max()
|
anemoi/datasets/create/config.py
CHANGED
|
@@ -215,8 +215,9 @@ def set_to_test_mode(cfg):
|
|
|
215
215
|
NUMBER_OF_DATES = 4
|
|
216
216
|
|
|
217
217
|
dates = cfg["dates"]
|
|
218
|
-
LOG.
|
|
218
|
+
LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
|
|
219
219
|
groups = Groups(**LoadersConfig(cfg).dates)
|
|
220
|
+
|
|
220
221
|
dates = groups.dates
|
|
221
222
|
cfg["dates"] = dict(
|
|
222
223
|
start=dates[0],
|
|
@@ -234,12 +235,12 @@ def set_to_test_mode(cfg):
|
|
|
234
235
|
if "grid" in obj:
|
|
235
236
|
previous = obj["grid"]
|
|
236
237
|
obj["grid"] = "20./20."
|
|
237
|
-
LOG.
|
|
238
|
+
LOG.warning(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
|
|
238
239
|
if "number" in obj:
|
|
239
240
|
if isinstance(obj["number"], (list, tuple)):
|
|
240
241
|
previous = obj["number"]
|
|
241
242
|
obj["number"] = previous[0:3]
|
|
242
|
-
LOG.
|
|
243
|
+
LOG.warning(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
|
|
243
244
|
for k, v in obj.items():
|
|
244
245
|
set_element_to_test(v)
|
|
245
246
|
if "constants" in obj:
|
anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
|
|
12
|
+
from earthkit.data.indexing.fieldlist import FieldArray
|
|
13
|
+
from earthkit.meteo import thermo
|
|
14
|
+
|
|
15
|
+
from .single_level_specific_humidity_to_relative_humidity import NewDataField
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def execute(context, input, t, rh, q="q"):
|
|
19
|
+
"""Convert relative humidity on pressure levels to specific humidity"""
|
|
20
|
+
result = FieldArray()
|
|
21
|
+
|
|
22
|
+
params = (t, rh)
|
|
23
|
+
pairs = defaultdict(dict)
|
|
24
|
+
|
|
25
|
+
# Gather all necessary fields
|
|
26
|
+
for f in input:
|
|
27
|
+
key = f.metadata(namespace="mars")
|
|
28
|
+
param = key.pop("param")
|
|
29
|
+
if param in params:
|
|
30
|
+
key = tuple(key.items())
|
|
31
|
+
|
|
32
|
+
if param in pairs[key]:
|
|
33
|
+
raise ValueError(f"Duplicate field {param} for {key}")
|
|
34
|
+
|
|
35
|
+
pairs[key][param] = f
|
|
36
|
+
if param == t:
|
|
37
|
+
result.append(f)
|
|
38
|
+
# all other parameters
|
|
39
|
+
else:
|
|
40
|
+
result.append(f)
|
|
41
|
+
|
|
42
|
+
for keys, values in pairs.items():
|
|
43
|
+
# some checks
|
|
44
|
+
|
|
45
|
+
if len(values) != 2:
|
|
46
|
+
raise ValueError("Missing fields")
|
|
47
|
+
|
|
48
|
+
t_pl = values[t].to_numpy(flatten=True)
|
|
49
|
+
rh_pl = values[rh].to_numpy(flatten=True)
|
|
50
|
+
pressure = keys[4][1] * 100 # TODO: REMOVE HARDCODED INDICES
|
|
51
|
+
# print(f"Handling fields for pressure level {pressure}...")
|
|
52
|
+
|
|
53
|
+
# actual conversion from rh --> q_v
|
|
54
|
+
q_pl = thermo.specific_humidity_from_relative_humidity(t_pl, rh_pl, pressure)
|
|
55
|
+
result.append(NewDataField(values[rh], q_pl, q))
|
|
56
|
+
|
|
57
|
+
return result
|
anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
|
|
12
|
+
from earthkit.data.indexing.fieldlist import FieldArray
|
|
13
|
+
from earthkit.meteo import thermo
|
|
14
|
+
|
|
15
|
+
from .single_level_specific_humidity_to_relative_humidity import NewDataField
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def execute(context, input, t, q, rh="r"):
|
|
19
|
+
"""Convert specific humidity on pressure levels to relative humidity"""
|
|
20
|
+
result = FieldArray()
|
|
21
|
+
|
|
22
|
+
params = (t, q)
|
|
23
|
+
pairs = defaultdict(dict)
|
|
24
|
+
|
|
25
|
+
# Gather all necessary fields
|
|
26
|
+
for f in input:
|
|
27
|
+
key = f.metadata(namespace="mars")
|
|
28
|
+
param = key.pop("param")
|
|
29
|
+
if param in params:
|
|
30
|
+
key = tuple(key.items())
|
|
31
|
+
|
|
32
|
+
if param in pairs[key]:
|
|
33
|
+
raise ValueError(f"Duplicate field {param} for {key}")
|
|
34
|
+
|
|
35
|
+
pairs[key][param] = f
|
|
36
|
+
if param == t:
|
|
37
|
+
result.append(f)
|
|
38
|
+
# all other parameters
|
|
39
|
+
else:
|
|
40
|
+
result.append(f)
|
|
41
|
+
|
|
42
|
+
for keys, values in pairs.items():
|
|
43
|
+
# some checks
|
|
44
|
+
|
|
45
|
+
if len(values) != 2:
|
|
46
|
+
raise ValueError("Missing fields")
|
|
47
|
+
|
|
48
|
+
t_pl = values[t].to_numpy(flatten=True)
|
|
49
|
+
q_pl = values[q].to_numpy(flatten=True)
|
|
50
|
+
pressure = keys[4][1] * 100 # TODO: REMOVE HARDCODED INDICES
|
|
51
|
+
# print(f"Handling fields for pressure level {pressure}...")
|
|
52
|
+
|
|
53
|
+
# actual conversion from rh --> q_v
|
|
54
|
+
rh_pl = thermo.relative_humidity_from_specific_humidity(t_pl, q_pl, pressure)
|
|
55
|
+
result.append(NewDataField(values[q], rh_pl, rh))
|
|
56
|
+
|
|
57
|
+
return result
|
|
@@ -32,7 +32,7 @@ class RenamedFieldMapping:
|
|
|
32
32
|
|
|
33
33
|
value = self.field.metadata(key, **kwargs)
|
|
34
34
|
if key == self.what:
|
|
35
|
-
return self.renaming.get(value, value)
|
|
35
|
+
return self.renaming.get(self.what, {}).get(value, value)
|
|
36
36
|
|
|
37
37
|
return value
|
|
38
38
|
|
|
@@ -68,8 +68,7 @@ class RenamedFieldFormat:
|
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
def execute(context, input, what="param", **kwargs):
|
|
71
|
-
|
|
72
|
-
if what in kwargs:
|
|
71
|
+
if what in kwargs and isinstance(kwargs[what], str):
|
|
73
72
|
return FieldArray([RenamedFieldFormat(fs, kwargs[what]) for fs in input])
|
|
74
73
|
|
|
75
74
|
return FieldArray([RenamedFieldMapping(fs, what, kwargs) for fs in input])
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
|
|
12
|
+
from earthkit.data.indexing.fieldlist import FieldArray
|
|
13
|
+
from earthkit.meteo import thermo
|
|
14
|
+
|
|
15
|
+
from .single_level_specific_humidity_to_relative_humidity import NewDataField
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def execute(context, input, t, td, rh="d"):
|
|
19
|
+
"""Convert relative humidity on single levels to dewpoint"""
|
|
20
|
+
result = FieldArray()
|
|
21
|
+
|
|
22
|
+
params = (t, td)
|
|
23
|
+
pairs = defaultdict(dict)
|
|
24
|
+
|
|
25
|
+
# Gather all necessary fields
|
|
26
|
+
for f in input:
|
|
27
|
+
key = f.metadata(namespace="mars")
|
|
28
|
+
param = key.pop("param")
|
|
29
|
+
if param in params:
|
|
30
|
+
key = tuple(key.items())
|
|
31
|
+
|
|
32
|
+
if param in pairs[key]:
|
|
33
|
+
raise ValueError(f"Duplicate field {param} for {key}")
|
|
34
|
+
|
|
35
|
+
pairs[key][param] = f
|
|
36
|
+
if param == t:
|
|
37
|
+
result.append(f)
|
|
38
|
+
# all other parameters
|
|
39
|
+
else:
|
|
40
|
+
result.append(f)
|
|
41
|
+
|
|
42
|
+
for keys, values in pairs.items():
|
|
43
|
+
# some checks
|
|
44
|
+
|
|
45
|
+
if len(values) != 2:
|
|
46
|
+
raise ValueError("Missing fields")
|
|
47
|
+
|
|
48
|
+
t_values = values[t].to_numpy(flatten=True)
|
|
49
|
+
td_values = values[td].to_numpy(flatten=True)
|
|
50
|
+
# actual conversion from td --> rh
|
|
51
|
+
rh_values = thermo.relative_humidity_from_dewpoint(t=t_values, td=td_values)
|
|
52
|
+
result.append(NewDataField(values[td], rh_values, rh))
|
|
53
|
+
|
|
54
|
+
return result
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
|
|
12
|
+
from earthkit.data.indexing.fieldlist import FieldArray
|
|
13
|
+
from earthkit.meteo import thermo
|
|
14
|
+
|
|
15
|
+
from .single_level_specific_humidity_to_relative_humidity import NewDataField
|
|
16
|
+
|
|
17
|
+
EPS = 1.0e-4
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def execute(context, input, t, rh, td="d"):
|
|
21
|
+
"""Convert relative humidity on single levels to dewpoint"""
|
|
22
|
+
result = FieldArray()
|
|
23
|
+
|
|
24
|
+
params = (t, rh)
|
|
25
|
+
pairs = defaultdict(dict)
|
|
26
|
+
|
|
27
|
+
# Gather all necessary fields
|
|
28
|
+
for f in input:
|
|
29
|
+
key = f.metadata(namespace="mars")
|
|
30
|
+
param = key.pop("param")
|
|
31
|
+
if param in params:
|
|
32
|
+
key = tuple(key.items())
|
|
33
|
+
|
|
34
|
+
if param in pairs[key]:
|
|
35
|
+
raise ValueError(f"Duplicate field {param} for {key}")
|
|
36
|
+
|
|
37
|
+
pairs[key][param] = f
|
|
38
|
+
if param == t:
|
|
39
|
+
result.append(f)
|
|
40
|
+
# all other parameters
|
|
41
|
+
else:
|
|
42
|
+
result.append(f)
|
|
43
|
+
|
|
44
|
+
for keys, values in pairs.items():
|
|
45
|
+
# some checks
|
|
46
|
+
|
|
47
|
+
if len(values) != 2:
|
|
48
|
+
raise ValueError("Missing fields")
|
|
49
|
+
|
|
50
|
+
t_values = values[t].to_numpy(flatten=True)
|
|
51
|
+
rh_values = values[rh].to_numpy(flatten=True)
|
|
52
|
+
# Prevent 0 % Relative humidity which cannot be converted to dewpoint
|
|
53
|
+
# Seems to happen over Egypt in the CERRA dataset
|
|
54
|
+
rh_values[rh_values == 0] = EPS
|
|
55
|
+
# actual conversion from rh --> td
|
|
56
|
+
td_values = thermo.dewpoint_from_relative_humidity(t=t_values, r=rh_values)
|
|
57
|
+
result.append(NewDataField(values[rh], td_values, td))
|
|
58
|
+
|
|
59
|
+
return result
|