anemoi-datasets 0.4.5__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/create.py +3 -2
  3. anemoi/datasets/commands/inspect.py +1 -1
  4. anemoi/datasets/commands/publish.py +30 -0
  5. anemoi/datasets/create/__init__.py +72 -35
  6. anemoi/datasets/create/check.py +6 -0
  7. anemoi/datasets/create/config.py +4 -3
  8. anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
  9. anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
  10. anemoi/datasets/create/functions/filters/rename.py +2 -3
  11. anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
  12. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
  13. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
  14. anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
  15. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
  16. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
  17. anemoi/datasets/create/functions/sources/__init__.py +7 -1
  18. anemoi/datasets/create/functions/sources/accumulations.py +2 -0
  19. anemoi/datasets/create/functions/sources/grib.py +87 -2
  20. anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
  21. anemoi/datasets/create/functions/sources/mars.py +9 -3
  22. anemoi/datasets/create/functions/sources/xarray/__init__.py +6 -1
  23. anemoi/datasets/create/functions/sources/xarray/coordinates.py +6 -1
  24. anemoi/datasets/create/functions/sources/xarray/field.py +20 -5
  25. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +16 -16
  26. anemoi/datasets/create/functions/sources/xarray/flavour.py +126 -12
  27. anemoi/datasets/create/functions/sources/xarray/grid.py +106 -17
  28. anemoi/datasets/create/functions/sources/xarray/metadata.py +6 -12
  29. anemoi/datasets/create/functions/sources/xarray/time.py +1 -5
  30. anemoi/datasets/create/functions/sources/xarray/variable.py +10 -10
  31. anemoi/datasets/create/input/__init__.py +69 -0
  32. anemoi/datasets/create/input/action.py +123 -0
  33. anemoi/datasets/create/input/concat.py +92 -0
  34. anemoi/datasets/create/input/context.py +59 -0
  35. anemoi/datasets/create/input/data_sources.py +71 -0
  36. anemoi/datasets/create/input/empty.py +42 -0
  37. anemoi/datasets/create/input/filter.py +76 -0
  38. anemoi/datasets/create/input/function.py +122 -0
  39. anemoi/datasets/create/input/join.py +57 -0
  40. anemoi/datasets/create/input/misc.py +85 -0
  41. anemoi/datasets/create/input/pipe.py +33 -0
  42. anemoi/datasets/create/input/repeated_dates.py +217 -0
  43. anemoi/datasets/create/input/result.py +413 -0
  44. anemoi/datasets/create/input/step.py +99 -0
  45. anemoi/datasets/create/{template.py → input/template.py} +0 -42
  46. anemoi/datasets/create/persistent.py +1 -1
  47. anemoi/datasets/create/statistics/__init__.py +1 -1
  48. anemoi/datasets/create/utils.py +3 -0
  49. anemoi/datasets/create/zarr.py +4 -2
  50. anemoi/datasets/data/dataset.py +11 -1
  51. anemoi/datasets/data/debug.py +5 -1
  52. anemoi/datasets/data/masked.py +2 -2
  53. anemoi/datasets/data/rescale.py +147 -0
  54. anemoi/datasets/data/stores.py +20 -7
  55. anemoi/datasets/dates/__init__.py +113 -30
  56. anemoi/datasets/dates/groups.py +92 -19
  57. anemoi/datasets/fields.py +66 -0
  58. anemoi/datasets/utils/fields.py +47 -0
  59. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/METADATA +10 -19
  60. anemoi_datasets-0.5.5.dist-info/RECORD +121 -0
  61. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/WHEEL +1 -1
  62. anemoi/datasets/create/input.py +0 -1065
  63. anemoi_datasets-0.4.5.dist-info/RECORD +0 -96
  64. /anemoi/datasets/create/{trace.py → input/trace.py} +0 -0
  65. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/LICENSE +0 -0
  66. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/entry_points.txt +0 -0
  67. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/top_level.txt +0 -0
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.4.5'
16
- __version_tuple__ = version_tuple = (0, 4, 5)
15
+ __version__ = version = '0.5.5'
16
+ __version_tuple__ = version_tuple = (0, 5, 5)
@@ -19,7 +19,7 @@ def task(what, options, *args, **kwargs):
19
19
  """
20
20
 
21
21
  now = datetime.datetime.now()
22
- LOG.info(f"Task {what}({args},{kwargs}) starting")
22
+ LOG.info(f"🎬 Task {what}({args},{kwargs}) starting")
23
23
 
24
24
  from anemoi.datasets.create import creator_factory
25
25
 
@@ -28,7 +28,7 @@ def task(what, options, *args, **kwargs):
28
28
  c = creator_factory(what.replace("-", "_"), **options)
29
29
  result = c.run()
30
30
 
31
- LOG.debug(f"Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
31
+ LOG.info(f"🏁 Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
32
32
  return result
33
33
 
34
34
 
@@ -57,6 +57,7 @@ class Create(Command):
57
57
  command_parser.add_argument("--trace", action="store_true")
58
58
 
59
59
  def run(self, args):
60
+
60
61
  now = time.time()
61
62
  if args.threads + args.processes:
62
63
  self.parallel_create(args)
@@ -311,7 +311,7 @@ class Version:
311
311
  print(f"🕰️ Dataset initialized {when(start)}.")
312
312
  if built and latest:
313
313
  speed = (latest - start) / built
314
- eta = datetime.datetime.utcnow() + speed * (total - built)
314
+ eta = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None) + speed * (total - built)
315
315
  print(f"🏁 ETA {when(eta)}.")
316
316
  else:
317
317
  if latest:
@@ -0,0 +1,30 @@
1
+ import logging
2
+
3
+ from . import Command
4
+
5
+ LOG = logging.getLogger(__name__)
6
+
7
+
8
+ class Publish(Command):
9
+ """Publish a dataset."""
10
+
11
+ # This is a command that is used to publish a dataset.
12
+ # it is a class, inheriting from Command.
13
+
14
+ internal = True
15
+ timestamp = True
16
+
17
+ def add_arguments(self, parser):
18
+ parser.add_argument("path", help="Path of the dataset to publish.")
19
+
20
+ def run(self, args):
21
+ try:
22
+ from anemoi.registry import publish_dataset
23
+ except ImportError:
24
+ LOG.error("anemoi-registry is not installed. Please install it to use this command.")
25
+ return
26
+
27
+ publish_dataset(args.path)
28
+
29
+
30
+ command = Publish
@@ -14,6 +14,7 @@ import os
14
14
  import time
15
15
  import uuid
16
16
  import warnings
17
+ from copy import deepcopy
17
18
  from functools import cached_property
18
19
 
19
20
  import numpy as np
@@ -24,9 +25,11 @@ from anemoi.utils.dates import frequency_to_string
24
25
  from anemoi.utils.dates import frequency_to_timedelta
25
26
  from anemoi.utils.humanize import compress_dates
26
27
  from anemoi.utils.humanize import seconds_to_human
28
+ from earthkit.data.core.order import build_remapping
27
29
 
28
30
  from anemoi.datasets import MissingDateError
29
31
  from anemoi.datasets import open_dataset
32
+ from anemoi.datasets.create.input.trace import enable_trace
30
33
  from anemoi.datasets.create.persistent import build_storage
31
34
  from anemoi.datasets.data.misc import as_first_date
32
35
  from anemoi.datasets.data.misc import as_last_date
@@ -132,7 +135,7 @@ class Dataset:
132
135
  v = v.isoformat()
133
136
  z.attrs[k] = json.loads(json.dumps(v, default=json_tidy))
134
137
 
135
- @property
138
+ @cached_property
136
139
  def anemoi_dataset(self):
137
140
  return open_dataset(self.path)
138
141
 
@@ -245,9 +248,9 @@ class Actor: # TODO: rename to Creator
245
248
  missing_dates = z.attrs.get("missing_dates", [])
246
249
  missing_dates = sorted([np.datetime64(d) for d in missing_dates])
247
250
  if missing_dates != expected:
248
- LOG.warn("Missing dates given in recipe do not match the actual missing dates in the dataset.")
249
- LOG.warn(f"Missing dates in recipe: {sorted(str(x) for x in missing_dates)}")
250
- LOG.warn(f"Missing dates in dataset: {sorted(str(x) for x in expected)}")
251
+ LOG.warning("Missing dates given in recipe do not match the actual missing dates in the dataset.")
252
+ LOG.warning(f"Missing dates in recipe: {sorted(str(x) for x in missing_dates)}")
253
+ LOG.warning(f"Missing dates in dataset: {sorted(str(x) for x in expected)}")
251
254
  raise ValueError("Missing dates given in recipe do not match the actual missing dates in the dataset.")
252
255
 
253
256
  check_missing_dates(self.missing_dates)
@@ -308,7 +311,6 @@ class HasElementForDataMixin:
308
311
 
309
312
 
310
313
  def build_input_(main_config, output_config):
311
- from earthkit.data.core.order import build_remapping
312
314
 
313
315
  builder = build_input(
314
316
  main_config.input,
@@ -323,11 +325,48 @@ def build_input_(main_config, output_config):
323
325
  return builder
324
326
 
325
327
 
328
+ def tidy_recipe(config: object):
329
+ """Remove potentially private information in the config"""
330
+ config = deepcopy(config)
331
+ if isinstance(config, (tuple, list)):
332
+ return [tidy_recipe(_) for _ in config]
333
+ if isinstance(config, (dict, DotDict)):
334
+ for k, v in config.items():
335
+ if k.startswith("_"):
336
+ config[k] = "*** REMOVED FOR SECURITY ***"
337
+ else:
338
+ config[k] = tidy_recipe(v)
339
+ if isinstance(config, str):
340
+ if config.startswith("_"):
341
+ return "*** REMOVED FOR SECURITY ***"
342
+ if config.startswith("s3://"):
343
+ return "*** REMOVED FOR SECURITY ***"
344
+ if config.startswith("gs://"):
345
+ return "*** REMOVED FOR SECURITY ***"
346
+ if config.startswith("http"):
347
+ return "*** REMOVED FOR SECURITY ***"
348
+ if config.startswith("ftp"):
349
+ return "*** REMOVED FOR SECURITY ***"
350
+ if config.startswith("file"):
351
+ return "*** REMOVED FOR SECURITY ***"
352
+ if config.startswith("ssh"):
353
+ return "*** REMOVED FOR SECURITY ***"
354
+ if config.startswith("scp"):
355
+ return "*** REMOVED FOR SECURITY ***"
356
+ if config.startswith("rsync"):
357
+ return "*** REMOVED FOR SECURITY ***"
358
+ if config.startswith("/"):
359
+ return "*** REMOVED FOR SECURITY ***"
360
+ if "@" in config:
361
+ return "*** REMOVED FOR SECURITY ***"
362
+ return config
363
+
364
+
326
365
  class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
327
366
  dataset_class = NewDataset
328
367
  def __init__(self, path, config, check_name=False, overwrite=False, use_threads=False, statistics_temp_dir=None, progress=None, test=False, cache=None, **kwargs): # fmt: skip
329
368
  if _path_readable(path) and not overwrite:
330
- raise Exception(f"{self.path} already exists. Use overwrite=True to overwrite.")
369
+ raise Exception(f"{path} already exists. Use overwrite=True to overwrite.")
331
370
 
332
371
  super().__init__(path, cache=cache)
333
372
  self.config = config
@@ -345,9 +384,12 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
345
384
  assert isinstance(self.main_config.output.order_by, dict), self.main_config.output.order_by
346
385
  self.create_elements(self.main_config)
347
386
 
348
- first_date = self.groups.dates[0]
349
- self.minimal_input = self.input.select([first_date])
350
- LOG.info("Minimal input for 'init' step (using only the first date) :")
387
+ LOG.info(f"Groups: {self.groups}")
388
+
389
+ one_date = self.groups.one_date()
390
+ # assert False, (type(one_date), type(self.groups))
391
+ self.minimal_input = self.input.select(one_date)
392
+ LOG.info(f"Minimal input for 'init' step (using only the first date) : {one_date}")
351
393
  LOG.info(self.minimal_input)
352
394
 
353
395
  def run(self):
@@ -363,13 +405,15 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
363
405
  LOG.info("Config loaded ok:")
364
406
  # LOG.info(self.main_config)
365
407
 
366
- dates = self.groups.dates
367
- frequency = dates.frequency
408
+ dates = self.groups.provider.values
409
+ frequency = self.groups.provider.frequency
410
+ missing = self.groups.provider.missing
411
+
368
412
  assert isinstance(frequency, datetime.timedelta), frequency
369
413
 
370
414
  LOG.info(f"Found {len(dates)} datetimes.")
371
415
  LOG.info(f"Dates: Found {len(dates)} datetimes, in {len(self.groups)} groups: ")
372
- LOG.info(f"Missing dates: {len(dates.missing)}")
416
+ LOG.info(f"Missing dates: {len(missing)}")
373
417
  lengths = tuple(len(g) for g in self.groups)
374
418
 
375
419
  variables = self.minimal_input.variables
@@ -404,6 +448,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
404
448
  metadata.update(self.main_config.get("add_metadata", {}))
405
449
 
406
450
  metadata["_create_yaml_config"] = self.main_config.get_serialisable_dict()
451
+ metadata["recipe"] = tidy_recipe(self.main_config.get_serialisable_dict())
407
452
 
408
453
  metadata["description"] = self.main_config.description
409
454
  metadata["licence"] = self.main_config["licence"]
@@ -426,7 +471,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
426
471
  metadata["start_date"] = dates[0].isoformat()
427
472
  metadata["end_date"] = dates[-1].isoformat()
428
473
  metadata["frequency"] = frequency
429
- metadata["missing_dates"] = [_.isoformat() for _ in dates.missing]
474
+ metadata["missing_dates"] = [_.isoformat() for _ in missing]
430
475
 
431
476
  metadata["version"] = VERSION
432
477
 
@@ -481,17 +526,6 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
481
526
 
482
527
  assert chunks == self.dataset.get_zarr_chunks(), (chunks, self.dataset.get_zarr_chunks())
483
528
 
484
- def sanity_check_config(a, b):
485
- a = json.dumps(a, sort_keys=True, default=str)
486
- b = json.dumps(b, sort_keys=True, default=str)
487
- b = b.replace("T", " ") # dates are expected to be different because
488
- if a != b:
489
- print("❌❌❌ FIXME: Config serialisation to be checked")
490
- print(a)
491
- print(b)
492
-
493
- sanity_check_config(self.main_config, self.dataset.get_main_config())
494
-
495
529
  # Return the number of groups to process, so we can show a nice progress bar
496
530
  return len(lengths)
497
531
 
@@ -527,11 +561,11 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
527
561
  LOG.info(f" -> Skipping {igroup} total={len(self.groups)} (already done)")
528
562
  continue
529
563
 
530
- assert isinstance(group[0], datetime.datetime), group
564
+ # assert isinstance(group[0], datetime.datetime), type(group[0])
531
565
  LOG.debug(f"Building data for group {igroup}/{self.n_groups}")
532
566
 
533
- result = self.input.select(dates=group)
534
- assert result.dates == group, (len(result.dates), len(group))
567
+ result = self.input.select(group_of_dates=group)
568
+ assert result.group_of_dates == group, (len(result.group_of_dates), len(group), group)
535
569
 
536
570
  # There are several groups.
537
571
  # There is one result to load for each group.
@@ -545,7 +579,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
545
579
 
546
580
  def load_result(self, result):
547
581
  # There is one cube to load for each result.
548
- dates = result.dates
582
+ dates = list(result.group_of_dates)
549
583
 
550
584
  cube = result.get_cube()
551
585
  shape = cube.extended_user_shape
@@ -555,7 +589,9 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
555
589
 
556
590
  def check_shape(cube, dates, dates_in_data):
557
591
  if cube.extended_user_shape[0] != len(dates):
558
- print(f"Cube shape does not match the number of dates {cube.extended_user_shape[0]}, {len(dates)}")
592
+ print(
593
+ f"Cube shape does not match the number of dates got {cube.extended_user_shape[0]}, expected {len(dates)}"
594
+ )
559
595
  print("Requested dates", compress_dates(dates))
560
596
  print("Cube dates", compress_dates(dates_in_data))
561
597
 
@@ -566,7 +602,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
566
602
  print("Extra dates", compress_dates(b - a))
567
603
 
568
604
  raise ValueError(
569
- f"Cube shape does not match the number of dates {cube.extended_user_shape[0]}, {len(dates)}"
605
+ f"Cube shape does not match the number of dates got {cube.extended_user_shape[0]}, expected {len(dates)}"
570
606
  )
571
607
 
572
608
  check_shape(cube, dates, dates_in_data)
@@ -846,7 +882,7 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
846
882
  )
847
883
 
848
884
  if len(ifound) < 2:
849
- LOG.warn(f"Not enough data found in {self.path} to compute {self.__class__.__name__}. Skipped.")
885
+ LOG.warning(f"Not enough data found in {self.path} to compute {self.__class__.__name__}. Skipped.")
850
886
  self.tmp_storage.delete()
851
887
  return
852
888
 
@@ -919,7 +955,7 @@ def multi_addition(cls):
919
955
  self.actors.append(cls(*args, delta=k, **kwargs))
920
956
 
921
957
  if not self.actors:
922
- LOG.warning("No delta found in kwargs, no addtions will be computed.")
958
+ LOG.warning("No delta found in kwargs, no additions will be computed.")
923
959
 
924
960
  def run(self):
925
961
  for actor in self.actors:
@@ -947,7 +983,9 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
947
983
  )
948
984
  start, end = np.datetime64(start), np.datetime64(end)
949
985
  dates = self.dataset.anemoi_dataset.dates
950
- assert type(dates[0]) == type(start), (type(dates[0]), type(start)) # noqa
986
+
987
+ assert type(dates[0]) is type(start), (type(dates[0]), type(start))
988
+
951
989
  dates = [d for d in dates if d >= start and d <= end]
952
990
  dates = [d for i, d in enumerate(dates) if i not in self.dataset.anemoi_dataset.missing]
953
991
  variables = self.dataset.anemoi_dataset.variables
@@ -956,7 +994,7 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
956
994
  LOG.info(stats)
957
995
 
958
996
  if not all(self.registry.get_flags(sync=False)):
959
- raise Exception(f"❗Zarr {self.path} is not fully built, not writting statistics into dataset.")
997
+ raise Exception(f"❗Zarr {self.path} is not fully built, not writing statistics into dataset.")
960
998
 
961
999
  for k in ["mean", "stdev", "minimum", "maximum", "sums", "squares", "count", "has_nans"]:
962
1000
  self.dataset.add_dataset(name=k, array=stats[k], dimensions=("variable",))
@@ -994,7 +1032,6 @@ def chain(tasks):
994
1032
 
995
1033
  def creator_factory(name, trace=None, **kwargs):
996
1034
  if trace:
997
- from anemoi.datasets.create.trace import enable_trace
998
1035
 
999
1036
  enable_trace(trace)
1000
1037
 
@@ -140,9 +140,15 @@ class StatisticsValueError(ValueError):
140
140
 
141
141
  def check_data_values(arr, *, name: str, log=[], allow_nans=False):
142
142
 
143
+ shape = arr.shape
144
+
143
145
  if (isinstance(allow_nans, (set, list, tuple, dict)) and name in allow_nans) or allow_nans:
144
146
  arr = arr[~np.isnan(arr)]
145
147
 
148
+ if arr.size == 0:
149
+ warnings.warn(f"Empty array for {name} ({shape})")
150
+ return
151
+
146
152
  assert arr.size > 0, (name, *log)
147
153
 
148
154
  min, max = arr.min(), arr.max()
@@ -215,8 +215,9 @@ def set_to_test_mode(cfg):
215
215
  NUMBER_OF_DATES = 4
216
216
 
217
217
  dates = cfg["dates"]
218
- LOG.warn(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
218
+ LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
219
219
  groups = Groups(**LoadersConfig(cfg).dates)
220
+
220
221
  dates = groups.dates
221
222
  cfg["dates"] = dict(
222
223
  start=dates[0],
@@ -234,12 +235,12 @@ def set_to_test_mode(cfg):
234
235
  if "grid" in obj:
235
236
  previous = obj["grid"]
236
237
  obj["grid"] = "20./20."
237
- LOG.warn(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
238
+ LOG.warning(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
238
239
  if "number" in obj:
239
240
  if isinstance(obj["number"], (list, tuple)):
240
241
  previous = obj["number"]
241
242
  obj["number"] = previous[0:3]
242
- LOG.warn(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
243
+ LOG.warning(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
243
244
  for k, v in obj.items():
244
245
  set_element_to_test(v)
245
246
  if "constants" in obj:
@@ -0,0 +1,57 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ from collections import defaultdict
11
+
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
16
+
17
+
18
+ def execute(context, input, t, rh, q="q"):
19
+ """Convert relative humidity on pressure levels to specific humidity"""
20
+ result = FieldArray()
21
+
22
+ params = (t, rh)
23
+ pairs = defaultdict(dict)
24
+
25
+ # Gather all necessary fields
26
+ for f in input:
27
+ key = f.metadata(namespace="mars")
28
+ param = key.pop("param")
29
+ if param in params:
30
+ key = tuple(key.items())
31
+
32
+ if param in pairs[key]:
33
+ raise ValueError(f"Duplicate field {param} for {key}")
34
+
35
+ pairs[key][param] = f
36
+ if param == t:
37
+ result.append(f)
38
+ # all other parameters
39
+ else:
40
+ result.append(f)
41
+
42
+ for keys, values in pairs.items():
43
+ # some checks
44
+
45
+ if len(values) != 2:
46
+ raise ValueError("Missing fields")
47
+
48
+ t_pl = values[t].to_numpy(flatten=True)
49
+ rh_pl = values[rh].to_numpy(flatten=True)
50
+ pressure = keys[4][1] * 100 # TODO: REMOVE HARDCODED INDICES
51
+ # print(f"Handling fields for pressure level {pressure}...")
52
+
53
+ # actual conversion from rh --> q_v
54
+ q_pl = thermo.specific_humidity_from_relative_humidity(t_pl, rh_pl, pressure)
55
+ result.append(NewDataField(values[rh], q_pl, q))
56
+
57
+ return result
@@ -0,0 +1,57 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ from collections import defaultdict
11
+
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
16
+
17
+
18
+ def execute(context, input, t, q, rh="r"):
19
+ """Convert specific humidity on pressure levels to relative humidity"""
20
+ result = FieldArray()
21
+
22
+ params = (t, q)
23
+ pairs = defaultdict(dict)
24
+
25
+ # Gather all necessary fields
26
+ for f in input:
27
+ key = f.metadata(namespace="mars")
28
+ param = key.pop("param")
29
+ if param in params:
30
+ key = tuple(key.items())
31
+
32
+ if param in pairs[key]:
33
+ raise ValueError(f"Duplicate field {param} for {key}")
34
+
35
+ pairs[key][param] = f
36
+ if param == t:
37
+ result.append(f)
38
+ # all other parameters
39
+ else:
40
+ result.append(f)
41
+
42
+ for keys, values in pairs.items():
43
+ # some checks
44
+
45
+ if len(values) != 2:
46
+ raise ValueError("Missing fields")
47
+
48
+ t_pl = values[t].to_numpy(flatten=True)
49
+ q_pl = values[q].to_numpy(flatten=True)
50
+ pressure = keys[4][1] * 100 # TODO: REMOVE HARDCODED INDICES
51
+ # print(f"Handling fields for pressure level {pressure}...")
52
+
53
+ # actual conversion from rh --> q_v
54
+ rh_pl = thermo.relative_humidity_from_specific_humidity(t_pl, q_pl, pressure)
55
+ result.append(NewDataField(values[q], rh_pl, rh))
56
+
57
+ return result
@@ -32,7 +32,7 @@ class RenamedFieldMapping:
32
32
 
33
33
  value = self.field.metadata(key, **kwargs)
34
34
  if key == self.what:
35
- return self.renaming.get(value, value)
35
+ return self.renaming.get(self.what, {}).get(value, value)
36
36
 
37
37
  return value
38
38
 
@@ -68,8 +68,7 @@ class RenamedFieldFormat:
68
68
 
69
69
 
70
70
  def execute(context, input, what="param", **kwargs):
71
- # print('🍍🍍🍍🍍🍍🍍🍍🍍🍍🍍🍍🍍🍍 ==========', kwargs)
72
- if what in kwargs:
71
+ if what in kwargs and isinstance(kwargs[what], str):
73
72
  return FieldArray([RenamedFieldFormat(fs, kwargs[what]) for fs in input])
74
73
 
75
74
  return FieldArray([RenamedFieldMapping(fs, what, kwargs) for fs in input])
@@ -0,0 +1,54 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ from collections import defaultdict
11
+
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
16
+
17
+
18
+ def execute(context, input, t, td, rh="d"):
19
+ """Convert relative humidity on single levels to dewpoint"""
20
+ result = FieldArray()
21
+
22
+ params = (t, td)
23
+ pairs = defaultdict(dict)
24
+
25
+ # Gather all necessary fields
26
+ for f in input:
27
+ key = f.metadata(namespace="mars")
28
+ param = key.pop("param")
29
+ if param in params:
30
+ key = tuple(key.items())
31
+
32
+ if param in pairs[key]:
33
+ raise ValueError(f"Duplicate field {param} for {key}")
34
+
35
+ pairs[key][param] = f
36
+ if param == t:
37
+ result.append(f)
38
+ # all other parameters
39
+ else:
40
+ result.append(f)
41
+
42
+ for keys, values in pairs.items():
43
+ # some checks
44
+
45
+ if len(values) != 2:
46
+ raise ValueError("Missing fields")
47
+
48
+ t_values = values[t].to_numpy(flatten=True)
49
+ td_values = values[td].to_numpy(flatten=True)
50
+ # actual conversion from td --> rh
51
+ rh_values = thermo.relative_humidity_from_dewpoint(t=t_values, td=td_values)
52
+ result.append(NewDataField(values[td], rh_values, rh))
53
+
54
+ return result
@@ -0,0 +1,59 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ from collections import defaultdict
11
+
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
16
+
17
+ EPS = 1.0e-4
18
+
19
+
20
+ def execute(context, input, t, rh, td="d"):
21
+ """Convert relative humidity on single levels to dewpoint"""
22
+ result = FieldArray()
23
+
24
+ params = (t, rh)
25
+ pairs = defaultdict(dict)
26
+
27
+ # Gather all necessary fields
28
+ for f in input:
29
+ key = f.metadata(namespace="mars")
30
+ param = key.pop("param")
31
+ if param in params:
32
+ key = tuple(key.items())
33
+
34
+ if param in pairs[key]:
35
+ raise ValueError(f"Duplicate field {param} for {key}")
36
+
37
+ pairs[key][param] = f
38
+ if param == t:
39
+ result.append(f)
40
+ # all other parameters
41
+ else:
42
+ result.append(f)
43
+
44
+ for keys, values in pairs.items():
45
+ # some checks
46
+
47
+ if len(values) != 2:
48
+ raise ValueError("Missing fields")
49
+
50
+ t_values = values[t].to_numpy(flatten=True)
51
+ rh_values = values[rh].to_numpy(flatten=True)
52
+ # Prevent 0 % Relative humidity which cannot be converted to dewpoint
53
+ # Seems to happen over Egypt in the CERRA dataset
54
+ rh_values[rh_values == 0] = EPS
55
+ # actual conversion from rh --> td
56
+ td_values = thermo.dewpoint_from_relative_humidity(t=t_values, r=rh_values)
57
+ result.append(NewDataField(values[rh], td_values, td))
58
+
59
+ return result