anemoi-datasets 0.4.5__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/create.py +3 -2
- anemoi/datasets/create/__init__.py +30 -32
- anemoi/datasets/create/config.py +4 -3
- anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
- anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
- anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
- anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
- anemoi/datasets/create/functions/sources/grib.py +86 -1
- anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
- anemoi/datasets/create/functions/sources/mars.py +9 -3
- anemoi/datasets/create/functions/sources/xarray/field.py +7 -1
- anemoi/datasets/create/functions/sources/xarray/metadata.py +13 -11
- anemoi/datasets/create/input.py +39 -17
- anemoi/datasets/create/persistent.py +1 -1
- anemoi/datasets/create/utils.py +3 -0
- anemoi/datasets/data/dataset.py +11 -1
- anemoi/datasets/data/debug.py +5 -1
- anemoi/datasets/data/masked.py +2 -2
- anemoi/datasets/data/rescale.py +147 -0
- anemoi/datasets/data/stores.py +20 -7
- anemoi/datasets/dates/__init__.py +112 -30
- anemoi/datasets/dates/groups.py +84 -19
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.0.dist-info}/METADATA +10 -19
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.0.dist-info}/RECORD +33 -24
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.0.dist-info}/WHEEL +1 -1
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.0.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.0.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.0.dist-info}/top_level.txt +0 -0
anemoi/datasets/_version.py
CHANGED
|
@@ -19,7 +19,7 @@ def task(what, options, *args, **kwargs):
|
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
now = datetime.datetime.now()
|
|
22
|
-
LOG.info(f"Task {what}({args},{kwargs}) starting")
|
|
22
|
+
LOG.info(f"🎬 Task {what}({args},{kwargs}) starting")
|
|
23
23
|
|
|
24
24
|
from anemoi.datasets.create import creator_factory
|
|
25
25
|
|
|
@@ -28,7 +28,7 @@ def task(what, options, *args, **kwargs):
|
|
|
28
28
|
c = creator_factory(what.replace("-", "_"), **options)
|
|
29
29
|
result = c.run()
|
|
30
30
|
|
|
31
|
-
LOG.
|
|
31
|
+
LOG.info(f"🏁 Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
|
|
32
32
|
return result
|
|
33
33
|
|
|
34
34
|
|
|
@@ -57,6 +57,7 @@ class Create(Command):
|
|
|
57
57
|
command_parser.add_argument("--trace", action="store_true")
|
|
58
58
|
|
|
59
59
|
def run(self, args):
|
|
60
|
+
|
|
60
61
|
now = time.time()
|
|
61
62
|
if args.threads + args.processes:
|
|
62
63
|
self.parallel_create(args)
|
|
@@ -132,7 +132,7 @@ class Dataset:
|
|
|
132
132
|
v = v.isoformat()
|
|
133
133
|
z.attrs[k] = json.loads(json.dumps(v, default=json_tidy))
|
|
134
134
|
|
|
135
|
-
@
|
|
135
|
+
@cached_property
|
|
136
136
|
def anemoi_dataset(self):
|
|
137
137
|
return open_dataset(self.path)
|
|
138
138
|
|
|
@@ -245,9 +245,9 @@ class Actor: # TODO: rename to Creator
|
|
|
245
245
|
missing_dates = z.attrs.get("missing_dates", [])
|
|
246
246
|
missing_dates = sorted([np.datetime64(d) for d in missing_dates])
|
|
247
247
|
if missing_dates != expected:
|
|
248
|
-
LOG.
|
|
249
|
-
LOG.
|
|
250
|
-
LOG.
|
|
248
|
+
LOG.warning("Missing dates given in recipe do not match the actual missing dates in the dataset.")
|
|
249
|
+
LOG.warning(f"Missing dates in recipe: {sorted(str(x) for x in missing_dates)}")
|
|
250
|
+
LOG.warning(f"Missing dates in dataset: {sorted(str(x) for x in expected)}")
|
|
251
251
|
raise ValueError("Missing dates given in recipe do not match the actual missing dates in the dataset.")
|
|
252
252
|
|
|
253
253
|
check_missing_dates(self.missing_dates)
|
|
@@ -327,7 +327,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
327
327
|
dataset_class = NewDataset
|
|
328
328
|
def __init__(self, path, config, check_name=False, overwrite=False, use_threads=False, statistics_temp_dir=None, progress=None, test=False, cache=None, **kwargs): # fmt: skip
|
|
329
329
|
if _path_readable(path) and not overwrite:
|
|
330
|
-
raise Exception(f"{
|
|
330
|
+
raise Exception(f"{path} already exists. Use overwrite=True to overwrite.")
|
|
331
331
|
|
|
332
332
|
super().__init__(path, cache=cache)
|
|
333
333
|
self.config = config
|
|
@@ -345,9 +345,12 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
345
345
|
assert isinstance(self.main_config.output.order_by, dict), self.main_config.output.order_by
|
|
346
346
|
self.create_elements(self.main_config)
|
|
347
347
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
348
|
+
LOG.info(f"Groups: {self.groups}")
|
|
349
|
+
|
|
350
|
+
one_date = self.groups.one_date()
|
|
351
|
+
# assert False, (type(one_date), type(self.groups))
|
|
352
|
+
self.minimal_input = self.input.select(one_date)
|
|
353
|
+
LOG.info(f"Minimal input for 'init' step (using only the first date) : {one_date}")
|
|
351
354
|
LOG.info(self.minimal_input)
|
|
352
355
|
|
|
353
356
|
def run(self):
|
|
@@ -363,13 +366,15 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
363
366
|
LOG.info("Config loaded ok:")
|
|
364
367
|
# LOG.info(self.main_config)
|
|
365
368
|
|
|
366
|
-
dates = self.groups.
|
|
367
|
-
frequency =
|
|
369
|
+
dates = self.groups.provider.values
|
|
370
|
+
frequency = self.groups.provider.frequency
|
|
371
|
+
missing = self.groups.provider.missing
|
|
372
|
+
|
|
368
373
|
assert isinstance(frequency, datetime.timedelta), frequency
|
|
369
374
|
|
|
370
375
|
LOG.info(f"Found {len(dates)} datetimes.")
|
|
371
376
|
LOG.info(f"Dates: Found {len(dates)} datetimes, in {len(self.groups)} groups: ")
|
|
372
|
-
LOG.info(f"Missing dates: {len(
|
|
377
|
+
LOG.info(f"Missing dates: {len(missing)}")
|
|
373
378
|
lengths = tuple(len(g) for g in self.groups)
|
|
374
379
|
|
|
375
380
|
variables = self.minimal_input.variables
|
|
@@ -426,7 +431,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
426
431
|
metadata["start_date"] = dates[0].isoformat()
|
|
427
432
|
metadata["end_date"] = dates[-1].isoformat()
|
|
428
433
|
metadata["frequency"] = frequency
|
|
429
|
-
metadata["missing_dates"] = [_.isoformat() for _ in
|
|
434
|
+
metadata["missing_dates"] = [_.isoformat() for _ in missing]
|
|
430
435
|
|
|
431
436
|
metadata["version"] = VERSION
|
|
432
437
|
|
|
@@ -481,17 +486,6 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
481
486
|
|
|
482
487
|
assert chunks == self.dataset.get_zarr_chunks(), (chunks, self.dataset.get_zarr_chunks())
|
|
483
488
|
|
|
484
|
-
def sanity_check_config(a, b):
|
|
485
|
-
a = json.dumps(a, sort_keys=True, default=str)
|
|
486
|
-
b = json.dumps(b, sort_keys=True, default=str)
|
|
487
|
-
b = b.replace("T", " ") # dates are expected to be different because
|
|
488
|
-
if a != b:
|
|
489
|
-
print("❌❌❌ FIXME: Config serialisation to be checked")
|
|
490
|
-
print(a)
|
|
491
|
-
print(b)
|
|
492
|
-
|
|
493
|
-
sanity_check_config(self.main_config, self.dataset.get_main_config())
|
|
494
|
-
|
|
495
489
|
# Return the number of groups to process, so we can show a nice progress bar
|
|
496
490
|
return len(lengths)
|
|
497
491
|
|
|
@@ -527,11 +521,11 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
527
521
|
LOG.info(f" -> Skipping {igroup} total={len(self.groups)} (already done)")
|
|
528
522
|
continue
|
|
529
523
|
|
|
530
|
-
assert isinstance(group[0], datetime.datetime), group
|
|
524
|
+
# assert isinstance(group[0], datetime.datetime), type(group[0])
|
|
531
525
|
LOG.debug(f"Building data for group {igroup}/{self.n_groups}")
|
|
532
526
|
|
|
533
527
|
result = self.input.select(dates=group)
|
|
534
|
-
assert result.
|
|
528
|
+
assert result.group_of_dates == group, (len(result.group_of_dates), len(group), group)
|
|
535
529
|
|
|
536
530
|
# There are several groups.
|
|
537
531
|
# There is one result to load for each group.
|
|
@@ -545,7 +539,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
545
539
|
|
|
546
540
|
def load_result(self, result):
|
|
547
541
|
# There is one cube to load for each result.
|
|
548
|
-
dates = result.
|
|
542
|
+
dates = list(result.group_of_dates)
|
|
549
543
|
|
|
550
544
|
cube = result.get_cube()
|
|
551
545
|
shape = cube.extended_user_shape
|
|
@@ -555,7 +549,9 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
555
549
|
|
|
556
550
|
def check_shape(cube, dates, dates_in_data):
|
|
557
551
|
if cube.extended_user_shape[0] != len(dates):
|
|
558
|
-
print(
|
|
552
|
+
print(
|
|
553
|
+
f"Cube shape does not match the number of dates got {cube.extended_user_shape[0]}, expected {len(dates)}"
|
|
554
|
+
)
|
|
559
555
|
print("Requested dates", compress_dates(dates))
|
|
560
556
|
print("Cube dates", compress_dates(dates_in_data))
|
|
561
557
|
|
|
@@ -566,7 +562,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
566
562
|
print("Extra dates", compress_dates(b - a))
|
|
567
563
|
|
|
568
564
|
raise ValueError(
|
|
569
|
-
f"Cube shape does not match the number of dates {cube.extended_user_shape[0]}, {len(dates)}"
|
|
565
|
+
f"Cube shape does not match the number of dates got {cube.extended_user_shape[0]}, expected {len(dates)}"
|
|
570
566
|
)
|
|
571
567
|
|
|
572
568
|
check_shape(cube, dates, dates_in_data)
|
|
@@ -846,7 +842,7 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
|
846
842
|
)
|
|
847
843
|
|
|
848
844
|
if len(ifound) < 2:
|
|
849
|
-
LOG.
|
|
845
|
+
LOG.warning(f"Not enough data found in {self.path} to compute {self.__class__.__name__}. Skipped.")
|
|
850
846
|
self.tmp_storage.delete()
|
|
851
847
|
return
|
|
852
848
|
|
|
@@ -919,7 +915,7 @@ def multi_addition(cls):
|
|
|
919
915
|
self.actors.append(cls(*args, delta=k, **kwargs))
|
|
920
916
|
|
|
921
917
|
if not self.actors:
|
|
922
|
-
LOG.warning("No delta found in kwargs, no
|
|
918
|
+
LOG.warning("No delta found in kwargs, no additions will be computed.")
|
|
923
919
|
|
|
924
920
|
def run(self):
|
|
925
921
|
for actor in self.actors:
|
|
@@ -947,7 +943,9 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
|
|
|
947
943
|
)
|
|
948
944
|
start, end = np.datetime64(start), np.datetime64(end)
|
|
949
945
|
dates = self.dataset.anemoi_dataset.dates
|
|
950
|
-
|
|
946
|
+
|
|
947
|
+
assert type(dates[0]) is type(start), (type(dates[0]), type(start))
|
|
948
|
+
|
|
951
949
|
dates = [d for d in dates if d >= start and d <= end]
|
|
952
950
|
dates = [d for i, d in enumerate(dates) if i not in self.dataset.anemoi_dataset.missing]
|
|
953
951
|
variables = self.dataset.anemoi_dataset.variables
|
|
@@ -956,7 +954,7 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
|
|
|
956
954
|
LOG.info(stats)
|
|
957
955
|
|
|
958
956
|
if not all(self.registry.get_flags(sync=False)):
|
|
959
|
-
raise Exception(f"❗Zarr {self.path} is not fully built, not
|
|
957
|
+
raise Exception(f"❗Zarr {self.path} is not fully built, not writing statistics into dataset.")
|
|
960
958
|
|
|
961
959
|
for k in ["mean", "stdev", "minimum", "maximum", "sums", "squares", "count", "has_nans"]:
|
|
962
960
|
self.dataset.add_dataset(name=k, array=stats[k], dimensions=("variable",))
|
anemoi/datasets/create/config.py
CHANGED
|
@@ -215,8 +215,9 @@ def set_to_test_mode(cfg):
|
|
|
215
215
|
NUMBER_OF_DATES = 4
|
|
216
216
|
|
|
217
217
|
dates = cfg["dates"]
|
|
218
|
-
LOG.
|
|
218
|
+
LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
|
|
219
219
|
groups = Groups(**LoadersConfig(cfg).dates)
|
|
220
|
+
|
|
220
221
|
dates = groups.dates
|
|
221
222
|
cfg["dates"] = dict(
|
|
222
223
|
start=dates[0],
|
|
@@ -234,12 +235,12 @@ def set_to_test_mode(cfg):
|
|
|
234
235
|
if "grid" in obj:
|
|
235
236
|
previous = obj["grid"]
|
|
236
237
|
obj["grid"] = "20./20."
|
|
237
|
-
LOG.
|
|
238
|
+
LOG.warning(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
|
|
238
239
|
if "number" in obj:
|
|
239
240
|
if isinstance(obj["number"], (list, tuple)):
|
|
240
241
|
previous = obj["number"]
|
|
241
242
|
obj["number"] = previous[0:3]
|
|
242
|
-
LOG.
|
|
243
|
+
LOG.warning(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
|
|
243
244
|
for k, v in obj.items():
|
|
244
245
|
set_element_to_test(v)
|
|
245
246
|
if "constants" in obj:
|
anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
|
|
12
|
+
from earthkit.data.indexing.fieldlist import FieldArray
|
|
13
|
+
from earthkit.meteo import thermo
|
|
14
|
+
|
|
15
|
+
from .single_level_specific_humidity_to_relative_humidity import NewDataField
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def execute(context, input, t, rh, q="q"):
|
|
19
|
+
"""Convert relative humidity on pressure levels to specific humidity"""
|
|
20
|
+
result = FieldArray()
|
|
21
|
+
|
|
22
|
+
params = (t, rh)
|
|
23
|
+
pairs = defaultdict(dict)
|
|
24
|
+
|
|
25
|
+
# Gather all necessary fields
|
|
26
|
+
for f in input:
|
|
27
|
+
key = f.metadata(namespace="mars")
|
|
28
|
+
param = key.pop("param")
|
|
29
|
+
if param in params:
|
|
30
|
+
key = tuple(key.items())
|
|
31
|
+
|
|
32
|
+
if param in pairs[key]:
|
|
33
|
+
raise ValueError(f"Duplicate field {param} for {key}")
|
|
34
|
+
|
|
35
|
+
pairs[key][param] = f
|
|
36
|
+
if param == t:
|
|
37
|
+
result.append(f)
|
|
38
|
+
# all other parameters
|
|
39
|
+
else:
|
|
40
|
+
result.append(f)
|
|
41
|
+
|
|
42
|
+
for keys, values in pairs.items():
|
|
43
|
+
# some checks
|
|
44
|
+
|
|
45
|
+
if len(values) != 2:
|
|
46
|
+
raise ValueError("Missing fields")
|
|
47
|
+
|
|
48
|
+
t_pl = values[t].to_numpy(flatten=True)
|
|
49
|
+
rh_pl = values[rh].to_numpy(flatten=True)
|
|
50
|
+
pressure = keys[4][1] * 100 # TODO: REMOVE HARDCODED INDICES
|
|
51
|
+
# print(f"Handling fields for pressure level {pressure}...")
|
|
52
|
+
|
|
53
|
+
# actual conversion from rh --> q_v
|
|
54
|
+
q_pl = thermo.specific_humidity_from_relative_humidity(t_pl, rh_pl, pressure)
|
|
55
|
+
result.append(NewDataField(values[rh], q_pl, q))
|
|
56
|
+
|
|
57
|
+
return result
|
anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
|
|
12
|
+
from earthkit.data.indexing.fieldlist import FieldArray
|
|
13
|
+
from earthkit.meteo import thermo
|
|
14
|
+
|
|
15
|
+
from .single_level_specific_humidity_to_relative_humidity import NewDataField
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def execute(context, input, t, q, rh="r"):
|
|
19
|
+
"""Convert specific humidity on pressure levels to relative humidity"""
|
|
20
|
+
result = FieldArray()
|
|
21
|
+
|
|
22
|
+
params = (t, q)
|
|
23
|
+
pairs = defaultdict(dict)
|
|
24
|
+
|
|
25
|
+
# Gather all necessary fields
|
|
26
|
+
for f in input:
|
|
27
|
+
key = f.metadata(namespace="mars")
|
|
28
|
+
param = key.pop("param")
|
|
29
|
+
if param in params:
|
|
30
|
+
key = tuple(key.items())
|
|
31
|
+
|
|
32
|
+
if param in pairs[key]:
|
|
33
|
+
raise ValueError(f"Duplicate field {param} for {key}")
|
|
34
|
+
|
|
35
|
+
pairs[key][param] = f
|
|
36
|
+
if param == t:
|
|
37
|
+
result.append(f)
|
|
38
|
+
# all other parameters
|
|
39
|
+
else:
|
|
40
|
+
result.append(f)
|
|
41
|
+
|
|
42
|
+
for keys, values in pairs.items():
|
|
43
|
+
# some checks
|
|
44
|
+
|
|
45
|
+
if len(values) != 2:
|
|
46
|
+
raise ValueError("Missing fields")
|
|
47
|
+
|
|
48
|
+
t_pl = values[t].to_numpy(flatten=True)
|
|
49
|
+
q_pl = values[q].to_numpy(flatten=True)
|
|
50
|
+
pressure = keys[4][1] * 100 # TODO: REMOVE HARDCODED INDICES
|
|
51
|
+
# print(f"Handling fields for pressure level {pressure}...")
|
|
52
|
+
|
|
53
|
+
# actual conversion from rh --> q_v
|
|
54
|
+
rh_pl = thermo.relative_humidity_from_specific_humidity(t_pl, q_pl, pressure)
|
|
55
|
+
result.append(NewDataField(values[q], rh_pl, rh))
|
|
56
|
+
|
|
57
|
+
return result
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
|
|
12
|
+
from earthkit.data.indexing.fieldlist import FieldArray
|
|
13
|
+
from earthkit.meteo import thermo
|
|
14
|
+
|
|
15
|
+
from .single_level_specific_humidity_to_relative_humidity import NewDataField
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def execute(context, input, t, td, rh="d"):
|
|
19
|
+
"""Convert relative humidity on single levels to dewpoint"""
|
|
20
|
+
result = FieldArray()
|
|
21
|
+
|
|
22
|
+
params = (t, td)
|
|
23
|
+
pairs = defaultdict(dict)
|
|
24
|
+
|
|
25
|
+
# Gather all necessary fields
|
|
26
|
+
for f in input:
|
|
27
|
+
key = f.metadata(namespace="mars")
|
|
28
|
+
param = key.pop("param")
|
|
29
|
+
if param in params:
|
|
30
|
+
key = tuple(key.items())
|
|
31
|
+
|
|
32
|
+
if param in pairs[key]:
|
|
33
|
+
raise ValueError(f"Duplicate field {param} for {key}")
|
|
34
|
+
|
|
35
|
+
pairs[key][param] = f
|
|
36
|
+
if param == t:
|
|
37
|
+
result.append(f)
|
|
38
|
+
# all other parameters
|
|
39
|
+
else:
|
|
40
|
+
result.append(f)
|
|
41
|
+
|
|
42
|
+
for keys, values in pairs.items():
|
|
43
|
+
# some checks
|
|
44
|
+
|
|
45
|
+
if len(values) != 2:
|
|
46
|
+
raise ValueError("Missing fields")
|
|
47
|
+
|
|
48
|
+
t_values = values[t].to_numpy(flatten=True)
|
|
49
|
+
td_values = values[td].to_numpy(flatten=True)
|
|
50
|
+
# actual conversion from td --> rh
|
|
51
|
+
rh_values = thermo.relative_humidity_from_dewpoint(t=t_values, td=td_values)
|
|
52
|
+
result.append(NewDataField(values[td], rh_values, rh))
|
|
53
|
+
|
|
54
|
+
return result
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
|
|
12
|
+
from earthkit.data.indexing.fieldlist import FieldArray
|
|
13
|
+
from earthkit.meteo import thermo
|
|
14
|
+
|
|
15
|
+
from .single_level_specific_humidity_to_relative_humidity import NewDataField
|
|
16
|
+
|
|
17
|
+
EPS = 1.0e-4
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def execute(context, input, t, rh, td="d"):
|
|
21
|
+
"""Convert relative humidity on single levels to dewpoint"""
|
|
22
|
+
result = FieldArray()
|
|
23
|
+
|
|
24
|
+
params = (t, rh)
|
|
25
|
+
pairs = defaultdict(dict)
|
|
26
|
+
|
|
27
|
+
# Gather all necessary fields
|
|
28
|
+
for f in input:
|
|
29
|
+
key = f.metadata(namespace="mars")
|
|
30
|
+
param = key.pop("param")
|
|
31
|
+
if param in params:
|
|
32
|
+
key = tuple(key.items())
|
|
33
|
+
|
|
34
|
+
if param in pairs[key]:
|
|
35
|
+
raise ValueError(f"Duplicate field {param} for {key}")
|
|
36
|
+
|
|
37
|
+
pairs[key][param] = f
|
|
38
|
+
if param == t:
|
|
39
|
+
result.append(f)
|
|
40
|
+
# all other parameters
|
|
41
|
+
else:
|
|
42
|
+
result.append(f)
|
|
43
|
+
|
|
44
|
+
for keys, values in pairs.items():
|
|
45
|
+
# some checks
|
|
46
|
+
|
|
47
|
+
if len(values) != 2:
|
|
48
|
+
raise ValueError("Missing fields")
|
|
49
|
+
|
|
50
|
+
t_values = values[t].to_numpy(flatten=True)
|
|
51
|
+
rh_values = values[rh].to_numpy(flatten=True)
|
|
52
|
+
# Prevent 0 % Relative humidity which cannot be converted to dewpoint
|
|
53
|
+
# Seems to happen over Egypt in the CERRA dataset
|
|
54
|
+
rh_values[rh_values == 0] = EPS
|
|
55
|
+
# actual conversion from rh --> td
|
|
56
|
+
td_values = thermo.dewpoint_from_relative_humidity(t=t_values, r=rh_values)
|
|
57
|
+
result.append(NewDataField(values[rh], td_values, td))
|
|
58
|
+
|
|
59
|
+
return result
|
anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from earthkit.data.indexing.fieldlist import FieldArray
|
|
13
|
+
from earthkit.meteo import thermo
|
|
14
|
+
|
|
15
|
+
from .single_level_specific_humidity_to_relative_humidity import AutoDict
|
|
16
|
+
from .single_level_specific_humidity_to_relative_humidity import NewDataField
|
|
17
|
+
from .single_level_specific_humidity_to_relative_humidity import pressure_at_height_level
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def execute(context, input, height, t, rh, sp, new_name="2q", **kwargs):
|
|
21
|
+
"""Convert the single (height) level relative humidity to specific humidity"""
|
|
22
|
+
result = FieldArray()
|
|
23
|
+
|
|
24
|
+
MANDATORY_KEYS = ["A", "B"]
|
|
25
|
+
OPTIONAL_KEYS = ["t_ml", "q_ml"]
|
|
26
|
+
MISSING_KEYS = []
|
|
27
|
+
DEFAULTS = dict(t_ml="t", q_ml="q")
|
|
28
|
+
|
|
29
|
+
for key in OPTIONAL_KEYS:
|
|
30
|
+
if key not in kwargs:
|
|
31
|
+
print(f"key {key} not found in yaml-file, using default key: {DEFAULTS[key]}")
|
|
32
|
+
kwargs[key] = DEFAULTS[key]
|
|
33
|
+
|
|
34
|
+
for key in MANDATORY_KEYS:
|
|
35
|
+
if key not in kwargs:
|
|
36
|
+
MISSING_KEYS.append(key)
|
|
37
|
+
|
|
38
|
+
if MISSING_KEYS:
|
|
39
|
+
raise KeyError(f"Following keys are missing: {', '.join(MISSING_KEYS)}")
|
|
40
|
+
|
|
41
|
+
single_level_params = (t, rh, sp)
|
|
42
|
+
model_level_params = (kwargs["t_ml"], kwargs["q_ml"])
|
|
43
|
+
|
|
44
|
+
needed_fields = AutoDict()
|
|
45
|
+
|
|
46
|
+
# Gather all necessary fields
|
|
47
|
+
for f in input:
|
|
48
|
+
key = f.metadata(namespace="mars")
|
|
49
|
+
param = key.pop("param")
|
|
50
|
+
# check single level parameters
|
|
51
|
+
if param in single_level_params:
|
|
52
|
+
levtype = key.pop("levtype")
|
|
53
|
+
key = tuple(key.items())
|
|
54
|
+
|
|
55
|
+
if param in needed_fields[key][levtype]:
|
|
56
|
+
raise ValueError(f"Duplicate single level field {param} for {key}")
|
|
57
|
+
|
|
58
|
+
needed_fields[key][levtype][param] = f
|
|
59
|
+
if param == rh:
|
|
60
|
+
if kwargs.get("keep_rh", False):
|
|
61
|
+
result.append(f)
|
|
62
|
+
else:
|
|
63
|
+
result.append(f)
|
|
64
|
+
|
|
65
|
+
# check model level parameters
|
|
66
|
+
elif param in model_level_params:
|
|
67
|
+
levtype = key.pop("levtype")
|
|
68
|
+
levelist = key.pop("levelist")
|
|
69
|
+
key = tuple(key.items())
|
|
70
|
+
|
|
71
|
+
if param in needed_fields[key][levtype][levelist]:
|
|
72
|
+
raise ValueError(f"Duplicate model level field {param} for {key} at level {levelist}")
|
|
73
|
+
|
|
74
|
+
needed_fields[key][levtype][levelist][param] = f
|
|
75
|
+
|
|
76
|
+
# all other parameters
|
|
77
|
+
else:
|
|
78
|
+
result.append(f)
|
|
79
|
+
|
|
80
|
+
for _, values in needed_fields.items():
|
|
81
|
+
# some checks
|
|
82
|
+
if len(values["sfc"]) != 3:
|
|
83
|
+
raise ValueError("Missing surface fields")
|
|
84
|
+
|
|
85
|
+
rh_sl = values["sfc"][rh].to_numpy(flatten=True)
|
|
86
|
+
t_sl = values["sfc"][t].to_numpy(flatten=True)
|
|
87
|
+
sp_sl = values["sfc"][sp].to_numpy(flatten=True)
|
|
88
|
+
|
|
89
|
+
nlevels = len(kwargs["A"]) - 1
|
|
90
|
+
if len(values["ml"]) != nlevels:
|
|
91
|
+
raise ValueError("Missing model levels")
|
|
92
|
+
|
|
93
|
+
for key in values["ml"].keys():
|
|
94
|
+
if len(values["ml"][key]) != 2:
|
|
95
|
+
raise ValueError(f"Missing field on level {key}")
|
|
96
|
+
|
|
97
|
+
# create 3D arrays for upper air fields
|
|
98
|
+
levels = list(values["ml"].keys())
|
|
99
|
+
levels.sort()
|
|
100
|
+
t_ml = []
|
|
101
|
+
q_ml = []
|
|
102
|
+
for level in levels:
|
|
103
|
+
t_ml.append(values["ml"][level][kwargs["t_ml"]].to_numpy(flatten=True))
|
|
104
|
+
q_ml.append(values["ml"][level][kwargs["q_ml"]].to_numpy(flatten=True))
|
|
105
|
+
|
|
106
|
+
t_ml = np.stack(t_ml)
|
|
107
|
+
q_ml = np.stack(q_ml)
|
|
108
|
+
|
|
109
|
+
# actual conversion from rh --> q_v
|
|
110
|
+
p_sl = pressure_at_height_level(height, q_ml, t_ml, sp_sl, np.array(kwargs["A"]), np.array(kwargs["B"]))
|
|
111
|
+
q_sl = thermo.specific_humidity_from_relative_humidity(t_sl, rh_sl, p_sl)
|
|
112
|
+
|
|
113
|
+
result.append(NewDataField(values["sfc"][rh], q_sl, new_name))
|
|
114
|
+
|
|
115
|
+
return result
|