anemoi-datasets 0.4.5__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/create.py +3 -2
  3. anemoi/datasets/create/__init__.py +30 -32
  4. anemoi/datasets/create/config.py +4 -3
  5. anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
  6. anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
  7. anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
  8. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
  9. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
  10. anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
  11. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
  12. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
  13. anemoi/datasets/create/functions/sources/grib.py +86 -1
  14. anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
  15. anemoi/datasets/create/functions/sources/mars.py +9 -3
  16. anemoi/datasets/create/functions/sources/xarray/field.py +7 -1
  17. anemoi/datasets/create/functions/sources/xarray/metadata.py +13 -11
  18. anemoi/datasets/create/input.py +39 -17
  19. anemoi/datasets/create/persistent.py +1 -1
  20. anemoi/datasets/create/utils.py +3 -0
  21. anemoi/datasets/data/dataset.py +11 -1
  22. anemoi/datasets/data/debug.py +5 -1
  23. anemoi/datasets/data/masked.py +2 -2
  24. anemoi/datasets/data/rescale.py +147 -0
  25. anemoi/datasets/data/stores.py +20 -7
  26. anemoi/datasets/dates/__init__.py +112 -30
  27. anemoi/datasets/dates/groups.py +84 -19
  28. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.0.dist-info}/METADATA +10 -19
  29. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.0.dist-info}/RECORD +33 -24
  30. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.0.dist-info}/WHEEL +1 -1
  31. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.0.dist-info}/LICENSE +0 -0
  32. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.0.dist-info}/entry_points.txt +0 -0
  33. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.0.dist-info}/top_level.txt +0 -0
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.4.5'
16
- __version_tuple__ = version_tuple = (0, 4, 5)
15
+ __version__ = version = '0.5.0'
16
+ __version_tuple__ = version_tuple = (0, 5, 0)
@@ -19,7 +19,7 @@ def task(what, options, *args, **kwargs):
19
19
  """
20
20
 
21
21
  now = datetime.datetime.now()
22
- LOG.info(f"Task {what}({args},{kwargs}) starting")
22
+ LOG.info(f"🎬 Task {what}({args},{kwargs}) starting")
23
23
 
24
24
  from anemoi.datasets.create import creator_factory
25
25
 
@@ -28,7 +28,7 @@ def task(what, options, *args, **kwargs):
28
28
  c = creator_factory(what.replace("-", "_"), **options)
29
29
  result = c.run()
30
30
 
31
- LOG.debug(f"Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
31
+ LOG.info(f"🏁 Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
32
32
  return result
33
33
 
34
34
 
@@ -57,6 +57,7 @@ class Create(Command):
57
57
  command_parser.add_argument("--trace", action="store_true")
58
58
 
59
59
  def run(self, args):
60
+
60
61
  now = time.time()
61
62
  if args.threads + args.processes:
62
63
  self.parallel_create(args)
@@ -132,7 +132,7 @@ class Dataset:
132
132
  v = v.isoformat()
133
133
  z.attrs[k] = json.loads(json.dumps(v, default=json_tidy))
134
134
 
135
- @property
135
+ @cached_property
136
136
  def anemoi_dataset(self):
137
137
  return open_dataset(self.path)
138
138
 
@@ -245,9 +245,9 @@ class Actor: # TODO: rename to Creator
245
245
  missing_dates = z.attrs.get("missing_dates", [])
246
246
  missing_dates = sorted([np.datetime64(d) for d in missing_dates])
247
247
  if missing_dates != expected:
248
- LOG.warn("Missing dates given in recipe do not match the actual missing dates in the dataset.")
249
- LOG.warn(f"Missing dates in recipe: {sorted(str(x) for x in missing_dates)}")
250
- LOG.warn(f"Missing dates in dataset: {sorted(str(x) for x in expected)}")
248
+ LOG.warning("Missing dates given in recipe do not match the actual missing dates in the dataset.")
249
+ LOG.warning(f"Missing dates in recipe: {sorted(str(x) for x in missing_dates)}")
250
+ LOG.warning(f"Missing dates in dataset: {sorted(str(x) for x in expected)}")
251
251
  raise ValueError("Missing dates given in recipe do not match the actual missing dates in the dataset.")
252
252
 
253
253
  check_missing_dates(self.missing_dates)
@@ -327,7 +327,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
327
327
  dataset_class = NewDataset
328
328
  def __init__(self, path, config, check_name=False, overwrite=False, use_threads=False, statistics_temp_dir=None, progress=None, test=False, cache=None, **kwargs): # fmt: skip
329
329
  if _path_readable(path) and not overwrite:
330
- raise Exception(f"{self.path} already exists. Use overwrite=True to overwrite.")
330
+ raise Exception(f"{path} already exists. Use overwrite=True to overwrite.")
331
331
 
332
332
  super().__init__(path, cache=cache)
333
333
  self.config = config
@@ -345,9 +345,12 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
345
345
  assert isinstance(self.main_config.output.order_by, dict), self.main_config.output.order_by
346
346
  self.create_elements(self.main_config)
347
347
 
348
- first_date = self.groups.dates[0]
349
- self.minimal_input = self.input.select([first_date])
350
- LOG.info("Minimal input for 'init' step (using only the first date) :")
348
+ LOG.info(f"Groups: {self.groups}")
349
+
350
+ one_date = self.groups.one_date()
351
+ # assert False, (type(one_date), type(self.groups))
352
+ self.minimal_input = self.input.select(one_date)
353
+ LOG.info(f"Minimal input for 'init' step (using only the first date) : {one_date}")
351
354
  LOG.info(self.minimal_input)
352
355
 
353
356
  def run(self):
@@ -363,13 +366,15 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
363
366
  LOG.info("Config loaded ok:")
364
367
  # LOG.info(self.main_config)
365
368
 
366
- dates = self.groups.dates
367
- frequency = dates.frequency
369
+ dates = self.groups.provider.values
370
+ frequency = self.groups.provider.frequency
371
+ missing = self.groups.provider.missing
372
+
368
373
  assert isinstance(frequency, datetime.timedelta), frequency
369
374
 
370
375
  LOG.info(f"Found {len(dates)} datetimes.")
371
376
  LOG.info(f"Dates: Found {len(dates)} datetimes, in {len(self.groups)} groups: ")
372
- LOG.info(f"Missing dates: {len(dates.missing)}")
377
+ LOG.info(f"Missing dates: {len(missing)}")
373
378
  lengths = tuple(len(g) for g in self.groups)
374
379
 
375
380
  variables = self.minimal_input.variables
@@ -426,7 +431,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
426
431
  metadata["start_date"] = dates[0].isoformat()
427
432
  metadata["end_date"] = dates[-1].isoformat()
428
433
  metadata["frequency"] = frequency
429
- metadata["missing_dates"] = [_.isoformat() for _ in dates.missing]
434
+ metadata["missing_dates"] = [_.isoformat() for _ in missing]
430
435
 
431
436
  metadata["version"] = VERSION
432
437
 
@@ -481,17 +486,6 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
481
486
 
482
487
  assert chunks == self.dataset.get_zarr_chunks(), (chunks, self.dataset.get_zarr_chunks())
483
488
 
484
- def sanity_check_config(a, b):
485
- a = json.dumps(a, sort_keys=True, default=str)
486
- b = json.dumps(b, sort_keys=True, default=str)
487
- b = b.replace("T", " ") # dates are expected to be different because
488
- if a != b:
489
- print("❌❌❌ FIXME: Config serialisation to be checked")
490
- print(a)
491
- print(b)
492
-
493
- sanity_check_config(self.main_config, self.dataset.get_main_config())
494
-
495
489
  # Return the number of groups to process, so we can show a nice progress bar
496
490
  return len(lengths)
497
491
 
@@ -527,11 +521,11 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
527
521
  LOG.info(f" -> Skipping {igroup} total={len(self.groups)} (already done)")
528
522
  continue
529
523
 
530
- assert isinstance(group[0], datetime.datetime), group
524
+ # assert isinstance(group[0], datetime.datetime), type(group[0])
531
525
  LOG.debug(f"Building data for group {igroup}/{self.n_groups}")
532
526
 
533
527
  result = self.input.select(dates=group)
534
- assert result.dates == group, (len(result.dates), len(group))
528
+ assert result.group_of_dates == group, (len(result.group_of_dates), len(group), group)
535
529
 
536
530
  # There are several groups.
537
531
  # There is one result to load for each group.
@@ -545,7 +539,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
545
539
 
546
540
  def load_result(self, result):
547
541
  # There is one cube to load for each result.
548
- dates = result.dates
542
+ dates = list(result.group_of_dates)
549
543
 
550
544
  cube = result.get_cube()
551
545
  shape = cube.extended_user_shape
@@ -555,7 +549,9 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
555
549
 
556
550
  def check_shape(cube, dates, dates_in_data):
557
551
  if cube.extended_user_shape[0] != len(dates):
558
- print(f"Cube shape does not match the number of dates {cube.extended_user_shape[0]}, {len(dates)}")
552
+ print(
553
+ f"Cube shape does not match the number of dates got {cube.extended_user_shape[0]}, expected {len(dates)}"
554
+ )
559
555
  print("Requested dates", compress_dates(dates))
560
556
  print("Cube dates", compress_dates(dates_in_data))
561
557
 
@@ -566,7 +562,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
566
562
  print("Extra dates", compress_dates(b - a))
567
563
 
568
564
  raise ValueError(
569
- f"Cube shape does not match the number of dates {cube.extended_user_shape[0]}, {len(dates)}"
565
+ f"Cube shape does not match the number of dates got {cube.extended_user_shape[0]}, expected {len(dates)}"
570
566
  )
571
567
 
572
568
  check_shape(cube, dates, dates_in_data)
@@ -846,7 +842,7 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
846
842
  )
847
843
 
848
844
  if len(ifound) < 2:
849
- LOG.warn(f"Not enough data found in {self.path} to compute {self.__class__.__name__}. Skipped.")
845
+ LOG.warning(f"Not enough data found in {self.path} to compute {self.__class__.__name__}. Skipped.")
850
846
  self.tmp_storage.delete()
851
847
  return
852
848
 
@@ -919,7 +915,7 @@ def multi_addition(cls):
919
915
  self.actors.append(cls(*args, delta=k, **kwargs))
920
916
 
921
917
  if not self.actors:
922
- LOG.warning("No delta found in kwargs, no addtions will be computed.")
918
+ LOG.warning("No delta found in kwargs, no additions will be computed.")
923
919
 
924
920
  def run(self):
925
921
  for actor in self.actors:
@@ -947,7 +943,9 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
947
943
  )
948
944
  start, end = np.datetime64(start), np.datetime64(end)
949
945
  dates = self.dataset.anemoi_dataset.dates
950
- assert type(dates[0]) == type(start), (type(dates[0]), type(start)) # noqa
946
+
947
+ assert type(dates[0]) is type(start), (type(dates[0]), type(start))
948
+
951
949
  dates = [d for d in dates if d >= start and d <= end]
952
950
  dates = [d for i, d in enumerate(dates) if i not in self.dataset.anemoi_dataset.missing]
953
951
  variables = self.dataset.anemoi_dataset.variables
@@ -956,7 +954,7 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
956
954
  LOG.info(stats)
957
955
 
958
956
  if not all(self.registry.get_flags(sync=False)):
959
- raise Exception(f"❗Zarr {self.path} is not fully built, not writting statistics into dataset.")
957
+ raise Exception(f"❗Zarr {self.path} is not fully built, not writing statistics into dataset.")
960
958
 
961
959
  for k in ["mean", "stdev", "minimum", "maximum", "sums", "squares", "count", "has_nans"]:
962
960
  self.dataset.add_dataset(name=k, array=stats[k], dimensions=("variable",))
@@ -215,8 +215,9 @@ def set_to_test_mode(cfg):
215
215
  NUMBER_OF_DATES = 4
216
216
 
217
217
  dates = cfg["dates"]
218
- LOG.warn(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
218
+ LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
219
219
  groups = Groups(**LoadersConfig(cfg).dates)
220
+
220
221
  dates = groups.dates
221
222
  cfg["dates"] = dict(
222
223
  start=dates[0],
@@ -234,12 +235,12 @@ def set_to_test_mode(cfg):
234
235
  if "grid" in obj:
235
236
  previous = obj["grid"]
236
237
  obj["grid"] = "20./20."
237
- LOG.warn(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
238
+ LOG.warning(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
238
239
  if "number" in obj:
239
240
  if isinstance(obj["number"], (list, tuple)):
240
241
  previous = obj["number"]
241
242
  obj["number"] = previous[0:3]
242
- LOG.warn(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
243
+ LOG.warning(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
243
244
  for k, v in obj.items():
244
245
  set_element_to_test(v)
245
246
  if "constants" in obj:
@@ -0,0 +1,57 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ from collections import defaultdict
11
+
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
16
+
17
+
18
+ def execute(context, input, t, rh, q="q"):
19
+ """Convert relative humidity on pressure levels to specific humidity"""
20
+ result = FieldArray()
21
+
22
+ params = (t, rh)
23
+ pairs = defaultdict(dict)
24
+
25
+ # Gather all necessary fields
26
+ for f in input:
27
+ key = f.metadata(namespace="mars")
28
+ param = key.pop("param")
29
+ if param in params:
30
+ key = tuple(key.items())
31
+
32
+ if param in pairs[key]:
33
+ raise ValueError(f"Duplicate field {param} for {key}")
34
+
35
+ pairs[key][param] = f
36
+ if param == t:
37
+ result.append(f)
38
+ # all other parameters
39
+ else:
40
+ result.append(f)
41
+
42
+ for keys, values in pairs.items():
43
+ # some checks
44
+
45
+ if len(values) != 2:
46
+ raise ValueError("Missing fields")
47
+
48
+ t_pl = values[t].to_numpy(flatten=True)
49
+ rh_pl = values[rh].to_numpy(flatten=True)
50
+ pressure = keys[4][1] * 100 # TODO: REMOVE HARDCODED INDICES
51
+ # print(f"Handling fields for pressure level {pressure}...")
52
+
53
+ # actual conversion from rh --> q_v
54
+ q_pl = thermo.specific_humidity_from_relative_humidity(t_pl, rh_pl, pressure)
55
+ result.append(NewDataField(values[rh], q_pl, q))
56
+
57
+ return result
@@ -0,0 +1,57 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ from collections import defaultdict
11
+
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
16
+
17
+
18
+ def execute(context, input, t, q, rh="r"):
19
+ """Convert specific humidity on pressure levels to relative humidity"""
20
+ result = FieldArray()
21
+
22
+ params = (t, q)
23
+ pairs = defaultdict(dict)
24
+
25
+ # Gather all necessary fields
26
+ for f in input:
27
+ key = f.metadata(namespace="mars")
28
+ param = key.pop("param")
29
+ if param in params:
30
+ key = tuple(key.items())
31
+
32
+ if param in pairs[key]:
33
+ raise ValueError(f"Duplicate field {param} for {key}")
34
+
35
+ pairs[key][param] = f
36
+ if param == t:
37
+ result.append(f)
38
+ # all other parameters
39
+ else:
40
+ result.append(f)
41
+
42
+ for keys, values in pairs.items():
43
+ # some checks
44
+
45
+ if len(values) != 2:
46
+ raise ValueError("Missing fields")
47
+
48
+ t_pl = values[t].to_numpy(flatten=True)
49
+ q_pl = values[q].to_numpy(flatten=True)
50
+ pressure = keys[4][1] * 100 # TODO: REMOVE HARDCODED INDICES
51
+ # print(f"Handling fields for pressure level {pressure}...")
52
+
53
+ # actual conversion from rh --> q_v
54
+ rh_pl = thermo.relative_humidity_from_specific_humidity(t_pl, q_pl, pressure)
55
+ result.append(NewDataField(values[q], rh_pl, rh))
56
+
57
+ return result
@@ -0,0 +1,54 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ from collections import defaultdict
11
+
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
16
+
17
+
18
+ def execute(context, input, t, td, rh="d"):
19
+ """Convert relative humidity on single levels to dewpoint"""
20
+ result = FieldArray()
21
+
22
+ params = (t, td)
23
+ pairs = defaultdict(dict)
24
+
25
+ # Gather all necessary fields
26
+ for f in input:
27
+ key = f.metadata(namespace="mars")
28
+ param = key.pop("param")
29
+ if param in params:
30
+ key = tuple(key.items())
31
+
32
+ if param in pairs[key]:
33
+ raise ValueError(f"Duplicate field {param} for {key}")
34
+
35
+ pairs[key][param] = f
36
+ if param == t:
37
+ result.append(f)
38
+ # all other parameters
39
+ else:
40
+ result.append(f)
41
+
42
+ for keys, values in pairs.items():
43
+ # some checks
44
+
45
+ if len(values) != 2:
46
+ raise ValueError("Missing fields")
47
+
48
+ t_values = values[t].to_numpy(flatten=True)
49
+ td_values = values[td].to_numpy(flatten=True)
50
+ # actual conversion from td --> rh
51
+ rh_values = thermo.relative_humidity_from_dewpoint(t=t_values, td=td_values)
52
+ result.append(NewDataField(values[td], rh_values, rh))
53
+
54
+ return result
@@ -0,0 +1,59 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ from collections import defaultdict
11
+
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
16
+
17
+ EPS = 1.0e-4
18
+
19
+
20
+ def execute(context, input, t, rh, td="d"):
21
+ """Convert relative humidity on single levels to dewpoint"""
22
+ result = FieldArray()
23
+
24
+ params = (t, rh)
25
+ pairs = defaultdict(dict)
26
+
27
+ # Gather all necessary fields
28
+ for f in input:
29
+ key = f.metadata(namespace="mars")
30
+ param = key.pop("param")
31
+ if param in params:
32
+ key = tuple(key.items())
33
+
34
+ if param in pairs[key]:
35
+ raise ValueError(f"Duplicate field {param} for {key}")
36
+
37
+ pairs[key][param] = f
38
+ if param == t:
39
+ result.append(f)
40
+ # all other parameters
41
+ else:
42
+ result.append(f)
43
+
44
+ for keys, values in pairs.items():
45
+ # some checks
46
+
47
+ if len(values) != 2:
48
+ raise ValueError("Missing fields")
49
+
50
+ t_values = values[t].to_numpy(flatten=True)
51
+ rh_values = values[rh].to_numpy(flatten=True)
52
+ # Prevent 0 % Relative humidity which cannot be converted to dewpoint
53
+ # Seems to happen over Egypt in the CERRA dataset
54
+ rh_values[rh_values == 0] = EPS
55
+ # actual conversion from rh --> td
56
+ td_values = thermo.dewpoint_from_relative_humidity(t=t_values, r=rh_values)
57
+ result.append(NewDataField(values[rh], td_values, td))
58
+
59
+ return result
@@ -0,0 +1,115 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+
11
+ import numpy as np
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import AutoDict
16
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
17
+ from .single_level_specific_humidity_to_relative_humidity import pressure_at_height_level
18
+
19
+
20
+ def execute(context, input, height, t, rh, sp, new_name="2q", **kwargs):
21
+ """Convert the single (height) level relative humidity to specific humidity"""
22
+ result = FieldArray()
23
+
24
+ MANDATORY_KEYS = ["A", "B"]
25
+ OPTIONAL_KEYS = ["t_ml", "q_ml"]
26
+ MISSING_KEYS = []
27
+ DEFAULTS = dict(t_ml="t", q_ml="q")
28
+
29
+ for key in OPTIONAL_KEYS:
30
+ if key not in kwargs:
31
+ print(f"key {key} not found in yaml-file, using default key: {DEFAULTS[key]}")
32
+ kwargs[key] = DEFAULTS[key]
33
+
34
+ for key in MANDATORY_KEYS:
35
+ if key not in kwargs:
36
+ MISSING_KEYS.append(key)
37
+
38
+ if MISSING_KEYS:
39
+ raise KeyError(f"Following keys are missing: {', '.join(MISSING_KEYS)}")
40
+
41
+ single_level_params = (t, rh, sp)
42
+ model_level_params = (kwargs["t_ml"], kwargs["q_ml"])
43
+
44
+ needed_fields = AutoDict()
45
+
46
+ # Gather all necessary fields
47
+ for f in input:
48
+ key = f.metadata(namespace="mars")
49
+ param = key.pop("param")
50
+ # check single level parameters
51
+ if param in single_level_params:
52
+ levtype = key.pop("levtype")
53
+ key = tuple(key.items())
54
+
55
+ if param in needed_fields[key][levtype]:
56
+ raise ValueError(f"Duplicate single level field {param} for {key}")
57
+
58
+ needed_fields[key][levtype][param] = f
59
+ if param == rh:
60
+ if kwargs.get("keep_rh", False):
61
+ result.append(f)
62
+ else:
63
+ result.append(f)
64
+
65
+ # check model level parameters
66
+ elif param in model_level_params:
67
+ levtype = key.pop("levtype")
68
+ levelist = key.pop("levelist")
69
+ key = tuple(key.items())
70
+
71
+ if param in needed_fields[key][levtype][levelist]:
72
+ raise ValueError(f"Duplicate model level field {param} for {key} at level {levelist}")
73
+
74
+ needed_fields[key][levtype][levelist][param] = f
75
+
76
+ # all other parameters
77
+ else:
78
+ result.append(f)
79
+
80
+ for _, values in needed_fields.items():
81
+ # some checks
82
+ if len(values["sfc"]) != 3:
83
+ raise ValueError("Missing surface fields")
84
+
85
+ rh_sl = values["sfc"][rh].to_numpy(flatten=True)
86
+ t_sl = values["sfc"][t].to_numpy(flatten=True)
87
+ sp_sl = values["sfc"][sp].to_numpy(flatten=True)
88
+
89
+ nlevels = len(kwargs["A"]) - 1
90
+ if len(values["ml"]) != nlevels:
91
+ raise ValueError("Missing model levels")
92
+
93
+ for key in values["ml"].keys():
94
+ if len(values["ml"][key]) != 2:
95
+ raise ValueError(f"Missing field on level {key}")
96
+
97
+ # create 3D arrays for upper air fields
98
+ levels = list(values["ml"].keys())
99
+ levels.sort()
100
+ t_ml = []
101
+ q_ml = []
102
+ for level in levels:
103
+ t_ml.append(values["ml"][level][kwargs["t_ml"]].to_numpy(flatten=True))
104
+ q_ml.append(values["ml"][level][kwargs["q_ml"]].to_numpy(flatten=True))
105
+
106
+ t_ml = np.stack(t_ml)
107
+ q_ml = np.stack(q_ml)
108
+
109
+ # actual conversion from rh --> q_v
110
+ p_sl = pressure_at_height_level(height, q_ml, t_ml, sp_sl, np.array(kwargs["A"]), np.array(kwargs["B"]))
111
+ q_sl = thermo.specific_humidity_from_relative_humidity(t_sl, rh_sl, p_sl)
112
+
113
+ result.append(NewDataField(values["sfc"][rh], q_sl, new_name))
114
+
115
+ return result