PyPI - anemoi-datasets - Versions diffs - 0.4.5__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

anemoi-datasets 0.4.5py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py ADDED Viewed

@@ -0,0 +1,390 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+import numpy as np
+from earthkit.data.indexing.fieldlist import FieldArray
+from earthkit.meteo import constants
+from earthkit.meteo import thermo
+# Alternative proposed by Baudouin Raoult
+class AutoDict(dict):
+    def __missing__(self, key):
+        value = self[key] = type(self)()
+        return value
+class NewDataField:
+    def __init__(self, field, data, new_name):
+        self.field = field
+        self.data = data
+        self.new_name = new_name
+    def to_numpy(self, *args, **kwargs):
+        return self.data
+    def metadata(self, key=None, **kwargs):
+        if key is None:
+            return self.field.metadata(**kwargs)
+        value = self.field.metadata(key, **kwargs)
+        if key == "param":
+            return self.new_name
+        return value
+    def __getattr__(self, name):
+        return getattr(self.field, name)
+def model_level_pressure(A, B, surface_pressure):
+    """Calculates:
+     - pressure at the model full- and half-levels
+     - delta: depth of log(pressure) at full levels
+     - alpha: alpha term #TODO: more descriptive information
+    Parameters
+    ----------
+    A : ndarray
+        A-coefficients defining the model levels
+    B : ndarray
+        B-coefficients defining the model levels
+    surface_pressure: number or ndarray
+        surface pressure (Pa)
+    Returns
+    -------
+    ndarray
+        pressure at model full-levels
+    ndarray
+        pressure at model half-levels
+    ndarray
+        delta at full-levels
+    ndarray
+        alpha at full levels
+    """
+    # constants
+    PRESSURE_TOA = 0.1  # safety when highest pressure level = 0.0
+    # make the calculation agnostic to the number of dimensions
+    ndim = surface_pressure.ndim
+    new_shape_half = (A.shape[0],) + (1,) * ndim
+    A_reshaped = A.reshape(new_shape_half)
+    B_reshaped = B.reshape(new_shape_half)
+    # calculate pressure on model half-levels
+    p_half_level = A_reshaped + B_reshaped * surface_pressure[np.newaxis, ...]
+    # calculate delta
+    new_shape_full = (A.shape[0] - 1,) + surface_pressure.shape
+    delta = np.zeros(new_shape_full)
+    delta[1:, ...] = np.log(p_half_level[2:, ...] / p_half_level[1:-1, ...])
+    # pressure at highest half level<= 0.1
+    if np.any(p_half_level[0, ...] <= PRESSURE_TOA):
+        delta[0, ...] = np.log(p_half_level[1, ...] / PRESSURE_TOA)
+    # pressure at highest half level > 0.1
+    else:
+        delta[0, ...] = np.log(p_half_level[1, ...] / p_half_level[0, ...])
+    # calculate alpha
+    alpha = np.zeros(new_shape_full)
+    alpha[1:, ...] = 1.0 - p_half_level[1:-1, ...] / (p_half_level[2:, ...] - p_half_level[1:-1, ...]) * delta[1:, ...]
+    # pressure at highest half level <= 0.1
+    if np.any(p_half_level[0, ...] <= PRESSURE_TOA):
+        alpha[0, ...] = 1.0  # ARPEGE choice, ECMWF IFS uses log(2)
+    # pressure at highest half level > 0.1
+    else:
+        alpha[0, ...] = 1.0 - p_half_level[0, ...] / (p_half_level[1, ...] - p_half_level[0, ...]) * delta[0, ...]
+    # calculate pressure on model full levels
+    # TODO: is there a faster way to calculate the averages?
+    # TODO: introduce option to calculate full levels in more complicated way
+    p_full_level = np.apply_along_axis(lambda m: np.convolve(m, np.ones(2) / 2, mode="valid"), axis=0, arr=p_half_level)
+    return p_full_level, p_half_level, delta, alpha
+def calc_specific_gas_constant(q):
+    """Calculates the specific gas constant of moist air
+    (specific content of cloud particles and hydrometeors are neglected)
+    Parameters
+    ----------
+    q : number or ndarray
+        specific humidity
+    Returns
+    -------
+    number or ndarray
+        specific gas constant of moist air
+    """
+    R = constants.Rd + (constants.Rv - constants.Rd) * q
+    return R
+def relative_geopotential_thickness(alpha, q, T):
+    """Calculates the geopotential thickness w.r.t the surface on model full-levels
+    Parameters
+    ----------
+    alpha : ndarray
+        alpha term of pressure calculations
+    q : ndarray
+        specific humidity (in kg/kg) on model full-levels
+    T : ndarray
+        temperature (in Kelvin) on model full-levels
+    Returns
+    -------
+    ndarray
+        geopotential thickness of model full-levels w.r.t. the surface
+    """
+    R = calc_specific_gas_constant(q)
+    dphi = np.cumsum(np.flip(alpha * R * T, axis=0), axis=0)
+    dphi = np.flip(dphi, axis=0)
+    return dphi
+def pressure_at_height_level(height, q, T, sp, A, B):
+    """Calculates the pressure at a height level given in meters above surface.
+    This is done by finding the model level above and below the specified height
+    and interpolating the pressure
+    Parameters
+    ----------
+    height : number
+        height (in meters) above the surface for which the pressure is wanted
+    q : ndarray
+        specific humidity (kg/kg) at model full-levels
+    T : ndarray
+        temperature (K) at model full-levels
+    sp : ndarray
+        surface pressure (Pa)
+    A : ndarray
+        A-coefficients defining the model levels
+    B : ndarray
+        B-coefficients defining the model levels
+    Returns
+    -------
+    number or ndarray
+        pressure (Pa) at the given height level
+    """
+    # geopotential thickness of the height level
+    tdphi = height * constants.g
+    # pressure(-related) variables
+    p_full, p_half, _, alpha = model_level_pressure(A, B, sp)
+    # relative geopot. thickness of full levels
+    dphi = relative_geopotential_thickness(alpha, q, T)
+    # find the model full level right above the height level
+    i_phi = (tdphi > dphi).sum(0)
+    # initialize the output array
+    p_height = np.zeros_like(i_phi, dtype=np.float64)
+    # define mask: requested height is below the lowest model full-level
+    mask = i_phi == 0
+    # CASE 1: requested height is below the lowest model full-level
+    # --> interpolation between surface pressure and lowest model full-level
+    p_height[mask] = (p_half[-1, ...] + tdphi / dphi[-1, ...] * (p_full[-1, ...] - p_half[-1, ...]))[mask]
+    # CASE 2: requested height is above the lowest model full-level
+    # --> interpolation between between model full-level above and below
+    # define some indices for masking and readability
+    i_lev = alpha.shape[0] - i_phi - 1  # convert phi index to model level index
+    indices = np.indices(i_lev.shape)
+    masked_indices = tuple(dim[~mask] for dim in indices)
+    above = (i_lev[~mask],) + masked_indices
+    below = (i_lev[~mask] + 1,) + masked_indices
+    dphi_above = dphi[above]
+    dphi_below = dphi[below]
+    factor = (tdphi - dphi_above) / (dphi_below - dphi_above)
+    p_height[~mask] = p_full[above] + factor * (p_full[below] - p_full[above])
+    return p_height
+def execute(context, input, height, t, q, sp, new_name="2r", **kwargs):
+    """Convert the single (height) level specific humidity to relative humidity"""
+    result = FieldArray()
+    MANDATORY_KEYS = ["A", "B"]
+    OPTIONAL_KEYS = ["t_ml", "q_ml"]
+    MISSING_KEYS = []
+    DEFAULTS = dict(t_ml="t", q_ml="q")
+    for key in OPTIONAL_KEYS:
+        if key not in kwargs:
+            print(f"key {key} not found in yaml-file, using default key: {DEFAULTS[key]}")
+            kwargs[key] = DEFAULTS[key]
+    for key in MANDATORY_KEYS:
+        if key not in kwargs:
+            MISSING_KEYS.append(key)
+    if MISSING_KEYS:
+        raise KeyError(f"Following keys are missing: {', '.join(MISSING_KEYS)}")
+    single_level_params = (t, q, sp)
+    model_level_params = (kwargs["t_ml"], kwargs["q_ml"])
+    needed_fields = AutoDict()
+    # Gather all necessary fields
+    for f in input:
+        key = f.metadata(namespace="mars")
+        param = key.pop("param")
+        # check single level parameters
+        if param in single_level_params:
+            levtype = key.pop("levtype")
+            key = tuple(sorted(key.items()))
+            if param in needed_fields[key][levtype]:
+                raise ValueError(f"Duplicate single level field {param} for {key}")
+            needed_fields[key][levtype][param] = f
+            if param == q:
+                if kwargs.get("keep_q", False):
+                    result.append(f)
+            else:
+                result.append(f)
+        # check model level parameters
+        elif param in model_level_params:
+            levtype = key.pop("levtype")
+            levelist = key.pop("levelist")
+            key = tuple(sorted(key.items()))
+            if param in needed_fields[key][levtype][levelist]:
+                raise ValueError(f"Duplicate model level field {param} for {key} at level {levelist}")
+            needed_fields[key][levtype][levelist][param] = f
+        # all other parameters
+        else:
+            result.append(f)
+    for _, values in needed_fields.items():
+        # some checks
+        if len(values["sfc"]) != 3:
+            raise ValueError("Missing surface fields")
+        q_sl = values["sfc"][q].to_numpy(flatten=True)
+        t_sl = values["sfc"][t].to_numpy(flatten=True)
+        sp_sl = values["sfc"][sp].to_numpy(flatten=True)
+        nlevels = len(kwargs["A"]) - 1
+        if len(values["ml"]) != nlevels:
+            raise ValueError("Missing model levels")
+        for key in values["ml"].keys():
+            if len(values["ml"][key]) != 2:
+                raise ValueError(f"Missing field on level {key}")
+        # create 3D arrays for upper air fields
+        levels = list(values["ml"].keys())
+        levels.sort()
+        t_ml = []
+        q_ml = []
+        for level in levels:
+            t_ml.append(values["ml"][level][kwargs["t_ml"]].to_numpy(flatten=True))
+            q_ml.append(values["ml"][level][kwargs["q_ml"]].to_numpy(flatten=True))
+        t_ml = np.stack(t_ml)
+        q_ml = np.stack(q_ml)
+        # actual conversion from qv --> rh
+        # FIXME:
+        # For now We need to go from qv --> td --> rh to take into account
+        # the mixed / ice phase when T ~ 0C / T < 0C
+        # See https://github.com/ecmwf/earthkit-meteo/issues/15
+        p_sl = pressure_at_height_level(height, q_ml, t_ml, sp_sl, np.array(kwargs["A"]), np.array(kwargs["B"]))
+        td_sl = thermo.dewpoint_from_specific_humidity(q=q_sl, p=p_sl)
+        rh_sl = thermo.relative_humidity_from_dewpoint(t=t_sl, td=td_sl)
+        result.append(NewDataField(values["sfc"][q], rh_sl, new_name))
+    return result
+def test():
+    from earthkit.data import from_source
+    from earthkit.data.readers.grib.index import GribFieldList
+    # IFS forecasts have both specific humidity and dewpoint
+    sl = from_source(
+        "mars",
+        {
+            "date": "2022-01-01",
+            "class": "od",
+            "expver": "1",
+            "stream": "oper",
+            "levtype": "sfc",
+            "param": "96.174/134.128/167.128/168.128",
+            "time": "00:00:00",
+            "type": "fc",
+            "step": "2",
+            "grid": "O640",
+        },
+    )
+    ml = from_source(
+        "mars",
+        {
+            "date": "2022-01-01",
+            "class": "od",
+            "expver": "1",
+            "stream": "oper",
+            "levtype": "ml",
+            "levelist": "130/131/132/133/134/135/136/137",
+            "param": "130/133",
+            "time": "00:00:00",
+            "type": "fc",
+            "step": "2",
+            "grid": "O640",
+        },
+    )
+    source = GribFieldList.merge([sl, ml])
+    # IFS A and B coeffients for level 137 - 129
+    kwargs = {
+        "A": [424.414063, 302.476563, 202.484375, 122.101563, 62.781250, 22.835938, 3.757813, 0.0, 0.0],
+        "B": [0.969513, 0.975078, 0.980072, 0.984542, 0.988500, 0.991984, 0.995003, 0.997630, 1.000000],
+    }
+    source = execute(None, source, 2, "2t", "2sh", "sp", "2r", **kwargs)
+    temperature = source[2].to_numpy(flatten=True)
+    dewpoint = source[3].to_numpy(flatten=True)
+    relhum = source[4].to_numpy()
+    newdew = thermo.dewpoint_from_relative_humidity(temperature, relhum)
+    print(f"Mean difference in dewpoint temperature: {np.abs(newdew - dewpoint).mean():02f} degC")
+    print(f"Median difference in dewpoint temperature: {np.median(np.abs(newdew - dewpoint)):02f} degC")
+    print(f"Maximum difference in dewpoint temperature: {np.abs(newdew - dewpoint).max():02f} degC")
+    # source.save("source.grib")
+if __name__ == "__main__":
+    test()

anemoi/datasets/create/functions/filters/speeddir_to_uv.py ADDED Viewed

@@ -0,0 +1,77 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+from collections import defaultdict
+import numpy as np
+from earthkit.data.indexing.fieldlist import FieldArray
+from earthkit.meteo.wind.array import polar_to_xy
+class NewDataField:
+    def __init__(self, field, data, new_name):
+        self.field = field
+        self.data = data
+        self.new_name = new_name
+    def to_numpy(self, *args, **kwargs):
+        return self.data
+    def metadata(self, key=None, **kwargs):
+        if key is None:
+            return self.field.metadata(**kwargs)
+        value = self.field.metadata(key, **kwargs)
+        if key == "param":
+            return self.new_name
+        return value
+    def __getattr__(self, name):
+        return getattr(self.field, name)
+def execute(context, input, wind_speed, wind_dir, u_component="u", v_component="v", in_radians=False):
+    result = FieldArray()
+    wind_params = (wind_speed, wind_dir)
+    wind_pairs = defaultdict(dict)
+    for f in input:
+        key = f.metadata(namespace="mars")
+        param = key.pop("param")
+        if param not in wind_params:
+            result.append(f)
+            continue
+        key = tuple(key.items())
+        if param in wind_pairs[key]:
+            raise ValueError(f"Duplicate wind component {param} for {key}")
+        wind_pairs[key][param] = f
+    for _, pairs in wind_pairs.items():
+        if len(pairs) != 2:
+            raise ValueError("Missing wind component")
+        magnitude = pairs[wind_speed]
+        direction = pairs[wind_dir]
+        # assert speed.grid_mapping == dir.grid_mapping
+        if in_radians:
+            direction = np.rad2deg(direction)
+        u, v = polar_to_xy(magnitude.to_numpy(flatten=True), direction.to_numpy(flatten=True))
+        result.append(NewDataField(magnitude, u, u_component))
+        result.append(NewDataField(direction, v, v_component))
+    return result

anemoi/datasets/create/functions/filters/uv_to_speeddir.py ADDED Viewed

@@ -0,0 +1,55 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+from collections import defaultdict
+import numpy as np
+from earthkit.data.indexing.fieldlist import FieldArray
+from earthkit.meteo.wind.array import xy_to_polar
+from anemoi.datasets.create.functions.filters.speeddir_to_uv import NewDataField
+def execute(context, input, u_component, v_component, wind_speed, wind_dir, in_radians=False):
+    result = FieldArray()
+    wind_params = (u_component, v_component)
+    wind_pairs = defaultdict(dict)
+    for f in input:
+        key = f.metadata(namespace="mars")
+        param = key.pop("param")
+        if param not in wind_params:
+            result.append(f)
+            continue
+        key = tuple(key.items())
+        if param in wind_pairs[key]:
+            raise ValueError(f"Duplicate wind component {param} for {key}")
+        wind_pairs[key][param] = f
+    for _, pairs in wind_pairs.items():
+        if len(pairs) != 2:
+            raise ValueError("Missing wind component")
+        u = pairs[u_component]
+        v = pairs[v_component]
+        # assert speed.grid_mapping == dir.grid_mapping
+        magnitude, direction = xy_to_polar(u.to_numpy(flatten=True), v.to_numpy(flatten=True))
+        if in_radians:
+            direction = np.deg2rad(direction)
+        result.append(NewDataField(u, magnitude, wind_speed))
+        result.append(NewDataField(v, direction, wind_dir))
+    return result

anemoi/datasets/create/functions/sources/grib.py CHANGED Viewed

@@ -11,9 +11,87 @@
 import glob
 from earthkit.data import from_source
+from earthkit.data.indexing.fieldlist import FieldArray
 from earthkit.data.utils.patterns import Pattern
+def _load(context, name, record):
+    ds = None
+    param = record["param"]
+    if "path" in record:
+        context.info(f"Using {name} from {record['path']} (param={param})")
+        ds = from_source("file", record["path"])
+    if "url" in record:
+        context.info(f"Using {name} from {record['url']} (param={param})")
+        ds = from_source("url", record["url"])
+    ds = ds.sel(param=param)
+    assert len(ds) == 1, f"{name} {param}, expected one field, got {len(ds)}"
+    ds = ds[0]
+    return ds.to_numpy(flatten=True), ds.metadata("uuidOfHGrid")
+class Geography:
+    """This class retrieve the latitudes and longitudes of unstructured grids,
+    and checks if the fields are compatible with the grid.
+    """
+    def __init__(self, context, latitudes, longitudes):
+        latitudes, uuidOfHGrid_lat = _load(context, "latitudes", latitudes)
+        longitudes, uuidOfHGrid_lon = _load(context, "longitudes", longitudes)
+        assert (
+            uuidOfHGrid_lat == uuidOfHGrid_lon
+        ), f"uuidOfHGrid mismatch: lat={uuidOfHGrid_lat} != lon={uuidOfHGrid_lon}"
+        context.info(f"Latitudes: {len(latitudes)}, Longitudes: {len(longitudes)}")
+        assert len(latitudes) == len(longitudes)
+        self.uuidOfHGrid = uuidOfHGrid_lat
+        self.latitudes = latitudes
+        self.longitudes = longitudes
+        self.first = True
+    def check(self, field):
+        if self.first:
+            # We only check the first field, for performance reasons
+            assert (
+                field.metadata("uuidOfHGrid") == self.uuidOfHGrid
+            ), f"uuidOfHGrid mismatch: {field.metadata('uuidOfHGrid')} != {self.uuidOfHGrid}"
+            self.first = False
+class AddGrid:
+    """An earth-kit.data.Field wrapper that adds grid information."""
+    def __init__(self, field, geography):
+        self._field = field
+        geography.check(field)
+        self._latitudes = geography.latitudes
+        self._longitudes = geography.longitudes
+    def __getattr__(self, name):
+        return getattr(self._field, name)
+    def __repr__(self) -> str:
+        return repr(self._field)
+    def grid_points(self):
+        return self._latitudes, self._longitudes
+    @property
+    def resolution(self):
+        return "unknown"
 def check(ds, paths, **kwargs):
     count = 1
     for k, v in kwargs.items():
@@ -34,9 +112,13 @@ def _expand(paths):
             yield path
-def execute(context, dates, path, *args, **kwargs):
+def execute(context, dates, path, latitudes=None, longitudes=None, *args, **kwargs):
     given_paths = path if isinstance(path, list) else [path]
+    geography = None
+    if latitudes is not None and longitudes is not None:
+        geography = Geography(context, latitudes, longitudes)
     ds = from_source("empty")
     dates = [d.isoformat() for d in dates]
@@ -56,4 +138,7 @@ def execute(context, dates, path, *args, **kwargs):
     if kwargs:
         check(ds, given_paths, valid_datetime=dates, **kwargs)
+    if geography is not None:
+        ds = FieldArray([AddGrid(_, geography) for _ in ds])
     return ds

anemoi-datasets 0.4.5__py3-none-any.whl → 0.5.0__py3-none-any.whl

anemoi-datasets 0.4.5py3-none-any.whl → 0.5.0py3-none-any.whl