PyPI - anemoi-datasets - Versions diffs - 0.4.5__py3-none-any.whl → 0.5.5__py3-none-any.whl - Mend

anemoi-datasets 0.4.5py3-none-any.whl → 0.5.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

anemoi/datasets/_version.py +2 -2
anemoi/datasets/commands/create.py +3 -2
anemoi/datasets/commands/inspect.py +1 -1
anemoi/datasets/commands/publish.py +30 -0
anemoi/datasets/create/__init__.py +72 -35
anemoi/datasets/create/check.py +6 -0
anemoi/datasets/create/config.py +4 -3
anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
anemoi/datasets/create/functions/filters/rename.py +2 -3
anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
anemoi/datasets/create/functions/sources/__init__.py +7 -1
anemoi/datasets/create/functions/sources/accumulations.py +2 -0
anemoi/datasets/create/functions/sources/grib.py +87 -2
anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
anemoi/datasets/create/functions/sources/mars.py +9 -3
anemoi/datasets/create/functions/sources/xarray/__init__.py +6 -1
anemoi/datasets/create/functions/sources/xarray/coordinates.py +6 -1
anemoi/datasets/create/functions/sources/xarray/field.py +20 -5
anemoi/datasets/create/functions/sources/xarray/fieldlist.py +16 -16
anemoi/datasets/create/functions/sources/xarray/flavour.py +126 -12
anemoi/datasets/create/functions/sources/xarray/grid.py +106 -17
anemoi/datasets/create/functions/sources/xarray/metadata.py +6 -12
anemoi/datasets/create/functions/sources/xarray/time.py +1 -5
anemoi/datasets/create/functions/sources/xarray/variable.py +10 -10
anemoi/datasets/create/input/__init__.py +69 -0
anemoi/datasets/create/input/action.py +123 -0
anemoi/datasets/create/input/concat.py +92 -0
anemoi/datasets/create/input/context.py +59 -0
anemoi/datasets/create/input/data_sources.py +71 -0
anemoi/datasets/create/input/empty.py +42 -0
anemoi/datasets/create/input/filter.py +76 -0
anemoi/datasets/create/input/function.py +122 -0
anemoi/datasets/create/input/join.py +57 -0
anemoi/datasets/create/input/misc.py +85 -0
anemoi/datasets/create/input/pipe.py +33 -0
anemoi/datasets/create/input/repeated_dates.py +217 -0
anemoi/datasets/create/input/result.py +413 -0
anemoi/datasets/create/input/step.py +99 -0
anemoi/datasets/create/{template.py → input/template.py} +0 -42
anemoi/datasets/create/persistent.py +1 -1
anemoi/datasets/create/statistics/__init__.py +1 -1
anemoi/datasets/create/utils.py +3 -0
anemoi/datasets/create/zarr.py +4 -2
anemoi/datasets/data/dataset.py +11 -1
anemoi/datasets/data/debug.py +5 -1
anemoi/datasets/data/masked.py +2 -2
anemoi/datasets/data/rescale.py +147 -0
anemoi/datasets/data/stores.py +20 -7
anemoi/datasets/dates/__init__.py +113 -30
anemoi/datasets/dates/groups.py +92 -19
anemoi/datasets/fields.py +66 -0
anemoi/datasets/utils/fields.py +47 -0
{anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/METADATA +10 -19
anemoi_datasets-0.5.5.dist-info/RECORD +121 -0
{anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/WHEEL +1 -1
anemoi/datasets/create/input.py +0 -1065
anemoi_datasets-0.4.5.dist-info/RECORD +0 -96
/anemoi/datasets/create/{trace.py → input/trace.py} +0 -0
{anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/LICENSE +0 -0
{anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/top_level.txt +0 -0

anemoi/datasets/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.4.5'
-__version_tuple__ = version_tuple = (0, 4, 5)
+__version__ = version = '0.5.5'
+__version_tuple__ = version_tuple = (0, 5, 5)

anemoi/datasets/commands/create.py CHANGED Viewed

@@ -19,7 +19,7 @@ def task(what, options, *args, **kwargs):
     """
     now = datetime.datetime.now()
-    LOG.info(f"Task {what}({args},{kwargs}) starting")
+    LOG.info(f"🎬 Task {what}({args},{kwargs}) starting")
     from anemoi.datasets.create import creator_factory
@@ -28,7 +28,7 @@ def task(what, options, *args, **kwargs):
     c = creator_factory(what.replace("-", "_"), **options)
     result = c.run()
-    LOG.debug(f"Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
+    LOG.info(f"🏁 Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
     return result
@@ -57,6 +57,7 @@ class Create(Command):
         command_parser.add_argument("--trace", action="store_true")
     def run(self, args):
         now = time.time()
         if args.threads + args.processes:
             self.parallel_create(args)

anemoi/datasets/commands/inspect.py CHANGED Viewed

@@ -311,7 +311,7 @@ class Version:
                 print(f"🕰️  Dataset initialized {when(start)}.")
                 if built and latest:
                     speed = (latest - start) / built
-                    eta = datetime.datetime.utcnow() + speed * (total - built)
+                    eta = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None) + speed * (total - built)
                     print(f"🏁 ETA {when(eta)}.")
         else:
             if latest:

anemoi/datasets/commands/publish.py ADDED Viewed

@@ -0,0 +1,30 @@
+import logging
+from . import Command
+LOG = logging.getLogger(__name__)
+class Publish(Command):
+    """Publish a dataset."""
+    # This is a command that is used to publish a dataset.
+    # it is a class, inheriting from Command.
+    internal = True
+    timestamp = True
+    def add_arguments(self, parser):
+        parser.add_argument("path", help="Path of the dataset to publish.")
+    def run(self, args):
+        try:
+            from anemoi.registry import publish_dataset
+        except ImportError:
+            LOG.error("anemoi-registry is not installed. Please install it to use this command.")
+            return
+        publish_dataset(args.path)
+command = Publish

anemoi/datasets/create/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@ import os
 import time
 import uuid
 import warnings
+from copy import deepcopy
 from functools import cached_property
 import numpy as np
@@ -24,9 +25,11 @@ from anemoi.utils.dates import frequency_to_string
 from anemoi.utils.dates import frequency_to_timedelta
 from anemoi.utils.humanize import compress_dates
 from anemoi.utils.humanize import seconds_to_human
+from earthkit.data.core.order import build_remapping
 from anemoi.datasets import MissingDateError
 from anemoi.datasets import open_dataset
+from anemoi.datasets.create.input.trace import enable_trace
 from anemoi.datasets.create.persistent import build_storage
 from anemoi.datasets.data.misc import as_first_date
 from anemoi.datasets.data.misc import as_last_date
@@ -132,7 +135,7 @@ class Dataset:
                 v = v.isoformat()
             z.attrs[k] = json.loads(json.dumps(v, default=json_tidy))
-    @property
+    @cached_property
     def anemoi_dataset(self):
         return open_dataset(self.path)
@@ -245,9 +248,9 @@ class Actor:  # TODO: rename to Creator
             missing_dates = z.attrs.get("missing_dates", [])
             missing_dates = sorted([np.datetime64(d) for d in missing_dates])
             if missing_dates != expected:
-                LOG.warn("Missing dates given in recipe do not match the actual missing dates in the dataset.")
-                LOG.warn(f"Missing dates in recipe: {sorted(str(x) for x in missing_dates)}")
-                LOG.warn(f"Missing dates in dataset: {sorted(str(x) for x in  expected)}")
+                LOG.warning("Missing dates given in recipe do not match the actual missing dates in the dataset.")
+                LOG.warning(f"Missing dates in recipe: {sorted(str(x) for x in missing_dates)}")
+                LOG.warning(f"Missing dates in dataset: {sorted(str(x) for x in  expected)}")
                 raise ValueError("Missing dates given in recipe do not match the actual missing dates in the dataset.")
         check_missing_dates(self.missing_dates)
@@ -308,7 +311,6 @@ class HasElementForDataMixin:
 def build_input_(main_config, output_config):
-    from earthkit.data.core.order import build_remapping
     builder = build_input(
         main_config.input,
@@ -323,11 +325,48 @@ def build_input_(main_config, output_config):
     return builder
+def tidy_recipe(config: object):
+    """Remove potentially private information in the config"""
+    config = deepcopy(config)
+    if isinstance(config, (tuple, list)):
+        return [tidy_recipe(_) for _ in config]
+    if isinstance(config, (dict, DotDict)):
+        for k, v in config.items():
+            if k.startswith("_"):
+                config[k] = "*** REMOVED FOR SECURITY ***"
+            else:
+                config[k] = tidy_recipe(v)
+    if isinstance(config, str):
+        if config.startswith("_"):
+            return "*** REMOVED FOR SECURITY ***"
+        if config.startswith("s3://"):
+            return "*** REMOVED FOR SECURITY ***"
+        if config.startswith("gs://"):
+            return "*** REMOVED FOR SECURITY ***"
+        if config.startswith("http"):
+            return "*** REMOVED FOR SECURITY ***"
+        if config.startswith("ftp"):
+            return "*** REMOVED FOR SECURITY ***"
+        if config.startswith("file"):
+            return "*** REMOVED FOR SECURITY ***"
+        if config.startswith("ssh"):
+            return "*** REMOVED FOR SECURITY ***"
+        if config.startswith("scp"):
+            return "*** REMOVED FOR SECURITY ***"
+        if config.startswith("rsync"):
+            return "*** REMOVED FOR SECURITY ***"
+        if config.startswith("/"):
+            return "*** REMOVED FOR SECURITY ***"
+        if "@" in config:
+            return "*** REMOVED FOR SECURITY ***"
+    return config
 class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
     dataset_class = NewDataset
     def __init__(self, path, config, check_name=False, overwrite=False, use_threads=False, statistics_temp_dir=None, progress=None, test=False, cache=None, **kwargs):  # fmt: skip
         if _path_readable(path) and not overwrite:
-            raise Exception(f"{self.path} already exists. Use overwrite=True to overwrite.")
+            raise Exception(f"{path} already exists. Use overwrite=True to overwrite.")
         super().__init__(path, cache=cache)
         self.config = config
@@ -345,9 +384,12 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         assert isinstance(self.main_config.output.order_by, dict), self.main_config.output.order_by
         self.create_elements(self.main_config)
-        first_date = self.groups.dates[0]
-        self.minimal_input = self.input.select([first_date])
-        LOG.info("Minimal input for 'init' step (using only the first date) :")
+        LOG.info(f"Groups: {self.groups}")
+        one_date = self.groups.one_date()
+        # assert False, (type(one_date), type(self.groups))
+        self.minimal_input = self.input.select(one_date)
+        LOG.info(f"Minimal input for 'init' step (using only the first date) : {one_date}")
         LOG.info(self.minimal_input)
     def run(self):
@@ -363,13 +405,15 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         LOG.info("Config loaded ok:")
         # LOG.info(self.main_config)
-        dates = self.groups.dates
-        frequency = dates.frequency
+        dates = self.groups.provider.values
+        frequency = self.groups.provider.frequency
+        missing = self.groups.provider.missing
         assert isinstance(frequency, datetime.timedelta), frequency
         LOG.info(f"Found {len(dates)} datetimes.")
         LOG.info(f"Dates: Found {len(dates)} datetimes, in {len(self.groups)} groups: ")
-        LOG.info(f"Missing dates: {len(dates.missing)}")
+        LOG.info(f"Missing dates: {len(missing)}")
         lengths = tuple(len(g) for g in self.groups)
         variables = self.minimal_input.variables
@@ -404,6 +448,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         metadata.update(self.main_config.get("add_metadata", {}))
         metadata["_create_yaml_config"] = self.main_config.get_serialisable_dict()
+        metadata["recipe"] = tidy_recipe(self.main_config.get_serialisable_dict())
         metadata["description"] = self.main_config.description
         metadata["licence"] = self.main_config["licence"]
@@ -426,7 +471,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         metadata["start_date"] = dates[0].isoformat()
         metadata["end_date"] = dates[-1].isoformat()
         metadata["frequency"] = frequency
-        metadata["missing_dates"] = [_.isoformat() for _ in dates.missing]
+        metadata["missing_dates"] = [_.isoformat() for _ in missing]
         metadata["version"] = VERSION
@@ -481,17 +526,6 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         assert chunks == self.dataset.get_zarr_chunks(), (chunks, self.dataset.get_zarr_chunks())
-        def sanity_check_config(a, b):
-            a = json.dumps(a, sort_keys=True, default=str)
-            b = json.dumps(b, sort_keys=True, default=str)
-            b = b.replace("T", " ")  # dates are expected to be different because
-            if a != b:
-                print("❌❌❌ FIXME: Config serialisation to be checked")
-                print(a)
-                print(b)
-        sanity_check_config(self.main_config, self.dataset.get_main_config())
         # Return the number of groups to process, so we can show a nice progress bar
         return len(lengths)
@@ -527,11 +561,11 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
                 LOG.info(f" -> Skipping {igroup} total={len(self.groups)} (already done)")
                 continue
-            assert isinstance(group[0], datetime.datetime), group
+            # assert isinstance(group[0], datetime.datetime), type(group[0])
             LOG.debug(f"Building data for group {igroup}/{self.n_groups}")
-            result = self.input.select(dates=group)
-            assert result.dates == group, (len(result.dates), len(group))
+            result = self.input.select(group_of_dates=group)
+            assert result.group_of_dates == group, (len(result.group_of_dates), len(group), group)
             # There are several groups.
             # There is one result to load for each group.
@@ -545,7 +579,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
     def load_result(self, result):
         # There is one cube to load for each result.
-        dates = result.dates
+        dates = list(result.group_of_dates)
         cube = result.get_cube()
         shape = cube.extended_user_shape
@@ -555,7 +589,9 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         def check_shape(cube, dates, dates_in_data):
             if cube.extended_user_shape[0] != len(dates):
-                print(f"Cube shape does not match the number of dates {cube.extended_user_shape[0]}, {len(dates)}")
+                print(
+                    f"Cube shape does not match the number of dates got {cube.extended_user_shape[0]}, expected {len(dates)}"
+                )
                 print("Requested dates", compress_dates(dates))
                 print("Cube dates", compress_dates(dates_in_data))
@@ -566,7 +602,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
                 print("Extra dates", compress_dates(b - a))
                 raise ValueError(
-                    f"Cube shape does not match the number of dates {cube.extended_user_shape[0]}, {len(dates)}"
+                    f"Cube shape does not match the number of dates got {cube.extended_user_shape[0]}, expected {len(dates)}"
                 )
         check_shape(cube, dates, dates_in_data)
@@ -846,7 +882,7 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
         )
         if len(ifound) < 2:
-            LOG.warn(f"Not enough data found in {self.path} to compute {self.__class__.__name__}. Skipped.")
+            LOG.warning(f"Not enough data found in {self.path} to compute {self.__class__.__name__}. Skipped.")
             self.tmp_storage.delete()
             return
@@ -919,7 +955,7 @@ def multi_addition(cls):
                 self.actors.append(cls(*args, delta=k, **kwargs))
             if not self.actors:
-                LOG.warning("No delta found in kwargs, no addtions will be computed.")
+                LOG.warning("No delta found in kwargs, no additions will be computed.")
         def run(self):
             for actor in self.actors:
@@ -947,7 +983,9 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
         )
         start, end = np.datetime64(start), np.datetime64(end)
         dates = self.dataset.anemoi_dataset.dates
-        assert type(dates[0]) == type(start), (type(dates[0]), type(start))  # noqa
+        assert type(dates[0]) is type(start), (type(dates[0]), type(start))
         dates = [d for d in dates if d >= start and d <= end]
         dates = [d for i, d in enumerate(dates) if i not in self.dataset.anemoi_dataset.missing]
         variables = self.dataset.anemoi_dataset.variables
@@ -956,7 +994,7 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
         LOG.info(stats)
         if not all(self.registry.get_flags(sync=False)):
-            raise Exception(f"❗Zarr {self.path} is not fully built, not writting statistics into dataset.")
+            raise Exception(f"❗Zarr {self.path} is not fully built, not writing statistics into dataset.")
         for k in ["mean", "stdev", "minimum", "maximum", "sums", "squares", "count", "has_nans"]:
             self.dataset.add_dataset(name=k, array=stats[k], dimensions=("variable",))
@@ -994,7 +1032,6 @@ def chain(tasks):
 def creator_factory(name, trace=None, **kwargs):
     if trace:
-        from anemoi.datasets.create.trace import enable_trace
         enable_trace(trace)

anemoi/datasets/create/check.py CHANGED Viewed

@@ -140,9 +140,15 @@ class StatisticsValueError(ValueError):
 def check_data_values(arr, *, name: str, log=[], allow_nans=False):
+    shape = arr.shape
     if (isinstance(allow_nans, (set, list, tuple, dict)) and name in allow_nans) or allow_nans:
         arr = arr[~np.isnan(arr)]
+    if arr.size == 0:
+        warnings.warn(f"Empty array for {name} ({shape})")
+        return
     assert arr.size > 0, (name, *log)
     min, max = arr.min(), arr.max()

anemoi/datasets/create/config.py CHANGED Viewed

@@ -215,8 +215,9 @@ def set_to_test_mode(cfg):
     NUMBER_OF_DATES = 4
     dates = cfg["dates"]
-    LOG.warn(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
+    LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
     groups = Groups(**LoadersConfig(cfg).dates)
     dates = groups.dates
     cfg["dates"] = dict(
         start=dates[0],
@@ -234,12 +235,12 @@ def set_to_test_mode(cfg):
             if "grid" in obj:
                 previous = obj["grid"]
                 obj["grid"] = "20./20."
-                LOG.warn(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
+                LOG.warning(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
             if "number" in obj:
                 if isinstance(obj["number"], (list, tuple)):
                     previous = obj["number"]
                     obj["number"] = previous[0:3]
-                    LOG.warn(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
+                    LOG.warning(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
             for k, v in obj.items():
                 set_element_to_test(v)
             if "constants" in obj:

anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py ADDED Viewed

@@ -0,0 +1,57 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+from collections import defaultdict
+from earthkit.data.indexing.fieldlist import FieldArray
+from earthkit.meteo import thermo
+from .single_level_specific_humidity_to_relative_humidity import NewDataField
+def execute(context, input, t, rh, q="q"):
+    """Convert relative humidity on pressure levels to specific humidity"""
+    result = FieldArray()
+    params = (t, rh)
+    pairs = defaultdict(dict)
+    # Gather all necessary fields
+    for f in input:
+        key = f.metadata(namespace="mars")
+        param = key.pop("param")
+        if param in params:
+            key = tuple(key.items())
+            if param in pairs[key]:
+                raise ValueError(f"Duplicate field {param} for {key}")
+            pairs[key][param] = f
+            if param == t:
+                result.append(f)
+        # all other parameters
+        else:
+            result.append(f)
+    for keys, values in pairs.items():
+        # some checks
+        if len(values) != 2:
+            raise ValueError("Missing fields")
+        t_pl = values[t].to_numpy(flatten=True)
+        rh_pl = values[rh].to_numpy(flatten=True)
+        pressure = keys[4][1] * 100  # TODO: REMOVE HARDCODED INDICES
+        # print(f"Handling fields for pressure level {pressure}...")
+        # actual conversion from rh --> q_v
+        q_pl = thermo.specific_humidity_from_relative_humidity(t_pl, rh_pl, pressure)
+        result.append(NewDataField(values[rh], q_pl, q))
+    return result

anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py ADDED Viewed

@@ -0,0 +1,57 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+from collections import defaultdict
+from earthkit.data.indexing.fieldlist import FieldArray
+from earthkit.meteo import thermo
+from .single_level_specific_humidity_to_relative_humidity import NewDataField
+def execute(context, input, t, q, rh="r"):
+    """Convert specific humidity on pressure levels to relative humidity"""
+    result = FieldArray()
+    params = (t, q)
+    pairs = defaultdict(dict)
+    # Gather all necessary fields
+    for f in input:
+        key = f.metadata(namespace="mars")
+        param = key.pop("param")
+        if param in params:
+            key = tuple(key.items())
+            if param in pairs[key]:
+                raise ValueError(f"Duplicate field {param} for {key}")
+            pairs[key][param] = f
+            if param == t:
+                result.append(f)
+        # all other parameters
+        else:
+            result.append(f)
+    for keys, values in pairs.items():
+        # some checks
+        if len(values) != 2:
+            raise ValueError("Missing fields")
+        t_pl = values[t].to_numpy(flatten=True)
+        q_pl = values[q].to_numpy(flatten=True)
+        pressure = keys[4][1] * 100  # TODO: REMOVE HARDCODED INDICES
+        # print(f"Handling fields for pressure level {pressure}...")
+        # actual conversion from rh --> q_v
+        rh_pl = thermo.relative_humidity_from_specific_humidity(t_pl, q_pl, pressure)
+        result.append(NewDataField(values[q], rh_pl, rh))
+    return result

anemoi/datasets/create/functions/filters/rename.py CHANGED Viewed

@@ -32,7 +32,7 @@ class RenamedFieldMapping:
         value = self.field.metadata(key, **kwargs)
         if key == self.what:
-            return self.renaming.get(value, value)
+            return self.renaming.get(self.what, {}).get(value, value)
         return value
@@ -68,8 +68,7 @@ class RenamedFieldFormat:
 def execute(context, input, what="param", **kwargs):
-    # print('🍍🍍🍍🍍🍍🍍🍍🍍🍍🍍🍍🍍🍍 ==========', kwargs)
-    if what in kwargs:
+    if what in kwargs and isinstance(kwargs[what], str):
         return FieldArray([RenamedFieldFormat(fs, kwargs[what]) for fs in input])
     return FieldArray([RenamedFieldMapping(fs, what, kwargs) for fs in input])

anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py ADDED Viewed

@@ -0,0 +1,54 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+from collections import defaultdict
+from earthkit.data.indexing.fieldlist import FieldArray
+from earthkit.meteo import thermo
+from .single_level_specific_humidity_to_relative_humidity import NewDataField
+def execute(context, input, t, td, rh="d"):
+    """Convert relative humidity on single levels to dewpoint"""
+    result = FieldArray()
+    params = (t, td)
+    pairs = defaultdict(dict)
+    # Gather all necessary fields
+    for f in input:
+        key = f.metadata(namespace="mars")
+        param = key.pop("param")
+        if param in params:
+            key = tuple(key.items())
+            if param in pairs[key]:
+                raise ValueError(f"Duplicate field {param} for {key}")
+            pairs[key][param] = f
+            if param == t:
+                result.append(f)
+        # all other parameters
+        else:
+            result.append(f)
+    for keys, values in pairs.items():
+        # some checks
+        if len(values) != 2:
+            raise ValueError("Missing fields")
+        t_values = values[t].to_numpy(flatten=True)
+        td_values = values[td].to_numpy(flatten=True)
+        # actual conversion from td --> rh
+        rh_values = thermo.relative_humidity_from_dewpoint(t=t_values, td=td_values)
+        result.append(NewDataField(values[td], rh_values, rh))
+    return result

anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py ADDED Viewed

@@ -0,0 +1,59 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+from collections import defaultdict
+from earthkit.data.indexing.fieldlist import FieldArray
+from earthkit.meteo import thermo
+from .single_level_specific_humidity_to_relative_humidity import NewDataField
+EPS = 1.0e-4
+def execute(context, input, t, rh, td="d"):
+    """Convert relative humidity on single levels to dewpoint"""
+    result = FieldArray()
+    params = (t, rh)
+    pairs = defaultdict(dict)
+    # Gather all necessary fields
+    for f in input:
+        key = f.metadata(namespace="mars")
+        param = key.pop("param")
+        if param in params:
+            key = tuple(key.items())
+            if param in pairs[key]:
+                raise ValueError(f"Duplicate field {param} for {key}")
+            pairs[key][param] = f
+            if param == t:
+                result.append(f)
+        # all other parameters
+        else:
+            result.append(f)
+    for keys, values in pairs.items():
+        # some checks
+        if len(values) != 2:
+            raise ValueError("Missing fields")
+        t_values = values[t].to_numpy(flatten=True)
+        rh_values = values[rh].to_numpy(flatten=True)
+        # Prevent 0 % Relative humidity which cannot be converted to dewpoint
+        # Seems to happen over Egypt in the CERRA dataset
+        rh_values[rh_values == 0] = EPS
+        # actual conversion from rh --> td
+        td_values = thermo.dewpoint_from_relative_humidity(t=t_values, r=rh_values)
+        result.append(NewDataField(values[rh], td_values, td))
+    return result

anemoi-datasets 0.4.5__py3-none-any.whl → 0.5.5__py3-none-any.whl

anemoi-datasets 0.4.5py3-none-any.whl → 0.5.5py3-none-any.whl