PyPI - anemoi-datasets - Versions diffs - 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

anemoi-datasets 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

anemoi/datasets/_version.py +2 -2
anemoi/datasets/commands/cleanup.py +44 -0
anemoi/datasets/commands/create.py +52 -21
anemoi/datasets/commands/finalise-additions.py +45 -0
anemoi/datasets/commands/finalise.py +39 -0
anemoi/datasets/commands/init-additions.py +45 -0
anemoi/datasets/commands/init.py +67 -0
anemoi/datasets/commands/inspect.py +1 -1
anemoi/datasets/commands/load-additions.py +47 -0
anemoi/datasets/commands/load.py +47 -0
anemoi/datasets/commands/patch.py +39 -0
anemoi/datasets/create/__init__.py +959 -146
anemoi/datasets/create/check.py +5 -3
anemoi/datasets/create/config.py +54 -2
anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
anemoi/datasets/create/functions/sources/grib.py +86 -1
anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
anemoi/datasets/create/functions/sources/mars.py +9 -3
anemoi/datasets/create/functions/sources/xarray/__init__.py +12 -2
anemoi/datasets/create/functions/sources/xarray/coordinates.py +7 -0
anemoi/datasets/create/functions/sources/xarray/field.py +8 -2
anemoi/datasets/create/functions/sources/xarray/fieldlist.py +0 -2
anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -1
anemoi/datasets/create/functions/sources/xarray/metadata.py +40 -40
anemoi/datasets/create/functions/sources/xarray/time.py +63 -30
anemoi/datasets/create/functions/sources/xarray/variable.py +15 -38
anemoi/datasets/create/input.py +62 -39
anemoi/datasets/create/persistent.py +1 -1
anemoi/datasets/create/statistics/__init__.py +39 -23
anemoi/datasets/create/utils.py +6 -2
anemoi/datasets/data/__init__.py +1 -0
anemoi/datasets/data/concat.py +46 -2
anemoi/datasets/data/dataset.py +119 -34
anemoi/datasets/data/debug.py +5 -1
anemoi/datasets/data/forwards.py +17 -8
anemoi/datasets/data/grids.py +17 -3
anemoi/datasets/data/interpolate.py +133 -0
anemoi/datasets/data/masked.py +2 -2
anemoi/datasets/data/misc.py +56 -66
anemoi/datasets/data/missing.py +240 -0
anemoi/datasets/data/rescale.py +147 -0
anemoi/datasets/data/select.py +7 -1
anemoi/datasets/data/stores.py +23 -10
anemoi/datasets/data/subset.py +47 -5
anemoi/datasets/data/unchecked.py +20 -22
anemoi/datasets/data/xy.py +125 -0
anemoi/datasets/dates/__init__.py +124 -95
anemoi/datasets/dates/groups.py +85 -20
anemoi/datasets/grids.py +66 -48
{anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/METADATA +8 -17
anemoi_datasets-0.5.0.dist-info/RECORD +105 -0
{anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/WHEEL +1 -1
anemoi/datasets/create/loaders.py +0 -936
anemoi_datasets-0.4.4.dist-info/RECORD +0 -86
{anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/LICENSE +0 -0
{anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/top_level.txt +0 -0

anemoi/datasets/create/check.py CHANGED Viewed

@@ -12,6 +12,7 @@ import re
 import warnings
 import numpy as np
+from anemoi.utils.dates import frequency_to_string
 LOG = logging.getLogger(__name__)
@@ -56,10 +57,11 @@ class DatasetName:
             raise ValueError(self.error_message)
     def _parse(self, name):
-        pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)$"
+        pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)?$"
         match = re.match(pattern, name)
-        assert match, (name, pattern)
+        if not match:
+            raise ValueError(f"the dataset name '{name}' does not follow naming convention. Does not match {pattern}")
         parsed = {}
         if match:
@@ -105,7 +107,7 @@ class DatasetName:
     def check_frequency(self, frequency):
         if frequency is None:
             return
-        frequency_str = f"{frequency}h"
+        frequency_str = frequency_to_string(frequency)
         self._check_missing("frequency", frequency_str)
         self._check_mismatch("frequency", frequency_str)

anemoi/datasets/create/config.py CHANGED Viewed

@@ -16,6 +16,8 @@ from anemoi.utils.config import DotDict
 from anemoi.utils.config import load_any_dict_format
 from earthkit.data.core.order import normalize_order_by
+from anemoi.datasets.dates.groups import Groups
 LOG = logging.getLogger(__name__)
@@ -153,6 +155,8 @@ class LoadersConfig(Config):
             raise ValueError("statistics_end is not supported anymore. Use 'statistics:end:' instead")
         self.setdefault("statistics", Config())
+        if "allow_nans" not in self.statistics:
+            self.statistics.allow_nans = []
         check_dict_value_and_set(self.output, "flatten_grid", True)
         check_dict_value_and_set(self.output, "ensemble_dimension", 2)
@@ -207,8 +211,50 @@ def _prepare_serialisation(o):
     return str(o)
-def loader_config(config):
+def set_to_test_mode(cfg):
+    NUMBER_OF_DATES = 4
+    dates = cfg["dates"]
+    LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
+    groups = Groups(**LoadersConfig(cfg).dates)
+    dates = groups.dates
+    cfg["dates"] = dict(
+        start=dates[0],
+        end=dates[NUMBER_OF_DATES - 1],
+        frequency=dates.frequency,
+        group_by=NUMBER_OF_DATES,
+    )
+    def set_element_to_test(obj):
+        if isinstance(obj, (list, tuple)):
+            for v in obj:
+                set_element_to_test(v)
+            return
+        if isinstance(obj, (dict, DotDict)):
+            if "grid" in obj:
+                previous = obj["grid"]
+                obj["grid"] = "20./20."
+                LOG.warning(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
+            if "number" in obj:
+                if isinstance(obj["number"], (list, tuple)):
+                    previous = obj["number"]
+                    obj["number"] = previous[0:3]
+                    LOG.warning(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
+            for k, v in obj.items():
+                set_element_to_test(v)
+            if "constants" in obj:
+                constants = obj["constants"]
+                if "param" in constants and isinstance(constants["param"], list):
+                    constants["param"] = ["cos_latitude"]
+    set_element_to_test(cfg)
+def loader_config(config, is_test=False):
     config = Config(config)
+    if is_test:
+        set_to_test_mode(config)
     obj = LoadersConfig(config)
     # yaml round trip to check that serialisation works as expected
@@ -216,7 +262,13 @@ def loader_config(config):
     copy = yaml.load(yaml.dump(copy), Loader=yaml.SafeLoader)
     copy = Config(copy)
     copy = LoadersConfig(config)
-    assert yaml.dump(obj) == yaml.dump(copy), (obj, copy)
+    a = yaml.dump(obj)
+    b = yaml.dump(copy)
+    if a != b:
+        print(a)
+        print(b)
+        raise ValueError("Serialisation failed")
     return copy

anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py ADDED Viewed

@@ -0,0 +1,57 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+from collections import defaultdict
+from earthkit.data.indexing.fieldlist import FieldArray
+from earthkit.meteo import thermo
+from .single_level_specific_humidity_to_relative_humidity import NewDataField
+def execute(context, input, t, rh, q="q"):
+    """Convert relative humidity on pressure levels to specific humidity"""
+    result = FieldArray()
+    params = (t, rh)
+    pairs = defaultdict(dict)
+    # Gather all necessary fields
+    for f in input:
+        key = f.metadata(namespace="mars")
+        param = key.pop("param")
+        if param in params:
+            key = tuple(key.items())
+            if param in pairs[key]:
+                raise ValueError(f"Duplicate field {param} for {key}")
+            pairs[key][param] = f
+            if param == t:
+                result.append(f)
+        # all other parameters
+        else:
+            result.append(f)
+    for keys, values in pairs.items():
+        # some checks
+        if len(values) != 2:
+            raise ValueError("Missing fields")
+        t_pl = values[t].to_numpy(flatten=True)
+        rh_pl = values[rh].to_numpy(flatten=True)
+        pressure = keys[4][1] * 100  # TODO: REMOVE HARDCODED INDICES
+        # print(f"Handling fields for pressure level {pressure}...")
+        # actual conversion from rh --> q_v
+        q_pl = thermo.specific_humidity_from_relative_humidity(t_pl, rh_pl, pressure)
+        result.append(NewDataField(values[rh], q_pl, q))
+    return result

anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py ADDED Viewed

@@ -0,0 +1,57 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+from collections import defaultdict
+from earthkit.data.indexing.fieldlist import FieldArray
+from earthkit.meteo import thermo
+from .single_level_specific_humidity_to_relative_humidity import NewDataField
+def execute(context, input, t, q, rh="r"):
+    """Convert specific humidity on pressure levels to relative humidity"""
+    result = FieldArray()
+    params = (t, q)
+    pairs = defaultdict(dict)
+    # Gather all necessary fields
+    for f in input:
+        key = f.metadata(namespace="mars")
+        param = key.pop("param")
+        if param in params:
+            key = tuple(key.items())
+            if param in pairs[key]:
+                raise ValueError(f"Duplicate field {param} for {key}")
+            pairs[key][param] = f
+            if param == t:
+                result.append(f)
+        # all other parameters
+        else:
+            result.append(f)
+    for keys, values in pairs.items():
+        # some checks
+        if len(values) != 2:
+            raise ValueError("Missing fields")
+        t_pl = values[t].to_numpy(flatten=True)
+        q_pl = values[q].to_numpy(flatten=True)
+        pressure = keys[4][1] * 100  # TODO: REMOVE HARDCODED INDICES
+        # print(f"Handling fields for pressure level {pressure}...")
+        # actual conversion from rh --> q_v
+        rh_pl = thermo.relative_humidity_from_specific_humidity(t_pl, q_pl, pressure)
+        result.append(NewDataField(values[q], rh_pl, rh))
+    return result

anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py ADDED Viewed

@@ -0,0 +1,54 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+from collections import defaultdict
+from earthkit.data.indexing.fieldlist import FieldArray
+from earthkit.meteo import thermo
+from .single_level_specific_humidity_to_relative_humidity import NewDataField
+def execute(context, input, t, td, rh="d"):
+    """Convert relative humidity on single levels to dewpoint"""
+    result = FieldArray()
+    params = (t, td)
+    pairs = defaultdict(dict)
+    # Gather all necessary fields
+    for f in input:
+        key = f.metadata(namespace="mars")
+        param = key.pop("param")
+        if param in params:
+            key = tuple(key.items())
+            if param in pairs[key]:
+                raise ValueError(f"Duplicate field {param} for {key}")
+            pairs[key][param] = f
+            if param == t:
+                result.append(f)
+        # all other parameters
+        else:
+            result.append(f)
+    for keys, values in pairs.items():
+        # some checks
+        if len(values) != 2:
+            raise ValueError("Missing fields")
+        t_values = values[t].to_numpy(flatten=True)
+        td_values = values[td].to_numpy(flatten=True)
+        # actual conversion from td --> rh
+        rh_values = thermo.relative_humidity_from_dewpoint(t=t_values, td=td_values)
+        result.append(NewDataField(values[td], rh_values, rh))
+    return result

anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py ADDED Viewed

@@ -0,0 +1,59 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+from collections import defaultdict
+from earthkit.data.indexing.fieldlist import FieldArray
+from earthkit.meteo import thermo
+from .single_level_specific_humidity_to_relative_humidity import NewDataField
+EPS = 1.0e-4
+def execute(context, input, t, rh, td="d"):
+    """Convert relative humidity on single levels to dewpoint"""
+    result = FieldArray()
+    params = (t, rh)
+    pairs = defaultdict(dict)
+    # Gather all necessary fields
+    for f in input:
+        key = f.metadata(namespace="mars")
+        param = key.pop("param")
+        if param in params:
+            key = tuple(key.items())
+            if param in pairs[key]:
+                raise ValueError(f"Duplicate field {param} for {key}")
+            pairs[key][param] = f
+            if param == t:
+                result.append(f)
+        # all other parameters
+        else:
+            result.append(f)
+    for keys, values in pairs.items():
+        # some checks
+        if len(values) != 2:
+            raise ValueError("Missing fields")
+        t_values = values[t].to_numpy(flatten=True)
+        rh_values = values[rh].to_numpy(flatten=True)
+        # Prevent 0 % Relative humidity which cannot be converted to dewpoint
+        # Seems to happen over Egypt in the CERRA dataset
+        rh_values[rh_values == 0] = EPS
+        # actual conversion from rh --> td
+        td_values = thermo.dewpoint_from_relative_humidity(t=t_values, r=rh_values)
+        result.append(NewDataField(values[rh], td_values, td))
+    return result

anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py ADDED Viewed

@@ -0,0 +1,115 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+import numpy as np
+from earthkit.data.indexing.fieldlist import FieldArray
+from earthkit.meteo import thermo
+from .single_level_specific_humidity_to_relative_humidity import AutoDict
+from .single_level_specific_humidity_to_relative_humidity import NewDataField
+from .single_level_specific_humidity_to_relative_humidity import pressure_at_height_level
+def execute(context, input, height, t, rh, sp, new_name="2q", **kwargs):
+    """Convert the single (height) level relative humidity to specific humidity"""
+    result = FieldArray()
+    MANDATORY_KEYS = ["A", "B"]
+    OPTIONAL_KEYS = ["t_ml", "q_ml"]
+    MISSING_KEYS = []
+    DEFAULTS = dict(t_ml="t", q_ml="q")
+    for key in OPTIONAL_KEYS:
+        if key not in kwargs:
+            print(f"key {key} not found in yaml-file, using default key: {DEFAULTS[key]}")
+            kwargs[key] = DEFAULTS[key]
+    for key in MANDATORY_KEYS:
+        if key not in kwargs:
+            MISSING_KEYS.append(key)
+    if MISSING_KEYS:
+        raise KeyError(f"Following keys are missing: {', '.join(MISSING_KEYS)}")
+    single_level_params = (t, rh, sp)
+    model_level_params = (kwargs["t_ml"], kwargs["q_ml"])
+    needed_fields = AutoDict()
+    # Gather all necessary fields
+    for f in input:
+        key = f.metadata(namespace="mars")
+        param = key.pop("param")
+        # check single level parameters
+        if param in single_level_params:
+            levtype = key.pop("levtype")
+            key = tuple(key.items())
+            if param in needed_fields[key][levtype]:
+                raise ValueError(f"Duplicate single level field {param} for {key}")
+            needed_fields[key][levtype][param] = f
+            if param == rh:
+                if kwargs.get("keep_rh", False):
+                    result.append(f)
+            else:
+                result.append(f)
+        # check model level parameters
+        elif param in model_level_params:
+            levtype = key.pop("levtype")
+            levelist = key.pop("levelist")
+            key = tuple(key.items())
+            if param in needed_fields[key][levtype][levelist]:
+                raise ValueError(f"Duplicate model level field {param} for {key} at level {levelist}")
+            needed_fields[key][levtype][levelist][param] = f
+        # all other parameters
+        else:
+            result.append(f)
+    for _, values in needed_fields.items():
+        # some checks
+        if len(values["sfc"]) != 3:
+            raise ValueError("Missing surface fields")
+        rh_sl = values["sfc"][rh].to_numpy(flatten=True)
+        t_sl = values["sfc"][t].to_numpy(flatten=True)
+        sp_sl = values["sfc"][sp].to_numpy(flatten=True)
+        nlevels = len(kwargs["A"]) - 1
+        if len(values["ml"]) != nlevels:
+            raise ValueError("Missing model levels")
+        for key in values["ml"].keys():
+            if len(values["ml"][key]) != 2:
+                raise ValueError(f"Missing field on level {key}")
+        # create 3D arrays for upper air fields
+        levels = list(values["ml"].keys())
+        levels.sort()
+        t_ml = []
+        q_ml = []
+        for level in levels:
+            t_ml.append(values["ml"][level][kwargs["t_ml"]].to_numpy(flatten=True))
+            q_ml.append(values["ml"][level][kwargs["q_ml"]].to_numpy(flatten=True))
+        t_ml = np.stack(t_ml)
+        q_ml = np.stack(q_ml)
+        # actual conversion from rh --> q_v
+        p_sl = pressure_at_height_level(height, q_ml, t_ml, sp_sl, np.array(kwargs["A"]), np.array(kwargs["B"]))
+        q_sl = thermo.specific_humidity_from_relative_humidity(t_sl, rh_sl, p_sl)
+        result.append(NewDataField(values["sfc"][rh], q_sl, new_name))
+    return result

anemoi-datasets 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

anemoi-datasets 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl