PyPI - anemoi-datasets - Versions diffs - 0.5.17__py3-none-any.whl → 0.5.19__py3-none-any.whl - Mend

anemoi-datasets 0.5.17py3-none-any.whl → 0.5.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

anemoi/datasets/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.5.17'
-__version_tuple__ = version_tuple = (0, 5, 17)
+__version__ = version = '0.5.19'
+__version_tuple__ = version_tuple = (0, 5, 19)

anemoi/datasets/create/__init__.py CHANGED Viewed

@@ -294,7 +294,14 @@ class Dataset:
         import zarr
         z = zarr.open(self.path, mode="r")
-        return loader_config(z.attrs.get("_create_yaml_config"))
+        config = loader_config(z.attrs.get("_create_yaml_config"))
+        if "env" in config:
+            for k, v in config["env"].items():
+                LOG.info(f"Setting env variable {k}={v}")
+                os.environ[k] = str(v)
+        return config
 class WritableDataset(Dataset):

anemoi/datasets/create/config.py CHANGED Viewed

@@ -420,6 +420,11 @@ def loader_config(config: dict, is_test: bool = False) -> LoadersConfig:
         print(b)
         raise ValueError("Serialisation failed")
+    if "env" in copy:
+        for k, v in copy["env"].items():
+            LOG.info(f"Setting env variable {k}={v}")
+            os.environ[k] = str(v)
     return copy

anemoi/datasets/create/filters/transform.py CHANGED Viewed

@@ -33,15 +33,13 @@ class TransformFilter(Filter):
         from anemoi.transform.filters import create_filter
         self.name = name
-        self.transform_filter = create_filter(self, config)
+        self.transform_filter = create_filter(context, config)
-    def execute(self, context: Any, input: ekd.FieldList) -> ekd.FieldList:
+    def execute(self, input: ekd.FieldList) -> ekd.FieldList:
         """Execute the transformation filter.
         Parameters
         ----------
-        context : Any
-            The context in which the execution occurs.
         input : ekd.FieldList
             The input data to be transformed.

anemoi/datasets/create/input/action.py CHANGED Viewed

@@ -17,6 +17,7 @@ from earthkit.data.core.order import build_remapping
 from ...dates.groups import GroupOfDates
 from .context import Context
+from .template import substitute
 LOG = logging.getLogger(__name__)
@@ -248,7 +249,7 @@ def action_factory(config: Dict[str, Any], context: ActionContext, action_path:
     if cls is None:
         from ..sources import create_source
-        source = create_source(None, config)
+        source = create_source(None, substitute(context, config))
         return FunctionAction(context, action_path + [key], key, source)
     return cls(context, action_path + [key], *args, **kwargs)

anemoi/datasets/create/input/function.py CHANGED Viewed

@@ -20,7 +20,6 @@ from .misc import _tidy
 from .misc import assert_fieldlist
 from .result import Result
 from .template import notify_result
-from .template import resolve
 from .template import substitute
 from .trace import trace
 from .trace import trace_datasource
@@ -79,6 +78,9 @@ class FunctionContext:
         """Returns whether partial results are acceptable."""
         return self.owner.group_of_dates.partial_ok
+    def get_result(self, *args, **kwargs) -> Any:
+        return self.owner.context.get_result(*args, **kwargs)
 class FunctionAction(Action):
     """Represents an action that executes a function.
@@ -203,14 +205,12 @@ class FunctionResult(Result):
     @trace_datasource
     def datasource(self) -> FieldList:
         """Returns the datasource for the function result."""
-        args, kwargs = resolve(self.context, (self.args, self.kwargs))
+        # args, kwargs = resolve(self.context, (self.args, self.kwargs))
         self.action.source.context = FunctionContext(self)
         return _tidy(
             self.action.source.execute(
-                self.group_of_dates,  # Will provide a list of datetime objects
-                *args,
-                **kwargs,
+                list(self.group_of_dates),  # Will provide a list of datetime objects
             )
         )

anemoi/datasets/create/sources/accumulations.py CHANGED Viewed

@@ -636,6 +636,19 @@ def accumulations(
     Any
         The computed accumulations.
     """
+    if (
+        request.get("class") == "ea"
+        and request.get("stream", "oper") == "oper"
+        and request.get("accumulation_period") == 24
+    ):
+        from .accumulations2 import accumulations as accumulations2
+        LOG.warning(
+            "🧪️ Experimental features: Using accumulations2, because class=ea stream=oper and accumulation_period=24"
+        )
+        return accumulations2(context, dates, **request)
     _to_list(request["param"])
     class_ = request.get("class", "od")
     stream = request.get("stream", "oper")

anemoi/datasets/create/sources/accumulations2.py ADDED Viewed

@@ -0,0 +1,652 @@
+# (C) Copyright 2024 Anemoi contributors.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+import datetime
+import logging
+import warnings
+from abc import abstractmethod
+from copy import deepcopy
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import Tuple
+from typing import Union
+import earthkit.data as ekd
+import numpy as np
+from earthkit.data.core.temporary import temp_file
+from earthkit.data.readers.grib.output import new_grib_output
+from anemoi.datasets.create.sources.mars import mars
+from anemoi.datasets.create.utils import to_datetime_list
+from .legacy import legacy_source
+LOG = logging.getLogger(__name__)
+xprint = print
+def _member(field: Any) -> int:
+    """Retrieves the member number from the field metadata.
+    Parameters
+    ----------
+    field : Any
+        The field from which to retrieve the member number.
+    Returns
+    -------
+    int
+        The member number.
+    """
+    # Bug in eccodes has number=0 randomly
+    number = field.metadata("number", default=0)
+    if number is None:
+        number = 0
+    return number
+class Period:
+    value = None
+    def __init__(self, start_datetime, end_datetime, base_datetime):
+        assert isinstance(start_datetime, datetime.datetime)
+        assert isinstance(end_datetime, datetime.datetime)
+        assert isinstance(base_datetime, datetime.datetime)
+        self.start_datetime = start_datetime
+        self.end_datetime = end_datetime
+        self.base_datetime = base_datetime
+    @property
+    def time_request(self):
+        date = int(self.base_datetime.strftime("%Y%m%d"))
+        time = int(self.base_datetime.strftime("%H%M"))
+        end_step = self.end_datetime - self.base_datetime
+        assert end_step.total_seconds() % 3600 == 0, end_step  # only full hours supported
+        end_step = int(end_step.total_seconds() // 3600)
+        return (("date", date), ("time", time), ("step", end_step))
+    def field_to_key(self, field):
+        return (
+            ("date", field.metadata("date")),
+            ("time", field.metadata("time")),
+            ("step", field.metadata("step")),
+        )
+    def check(self, field):
+        stepType = field.metadata("stepType")
+        startStep = field.metadata("startStep")
+        endStep = field.metadata("endStep")
+        date = field.metadata("date")
+        time = field.metadata("time")
+        assert stepType == "accum", stepType
+        base_datetime = datetime.datetime.strptime(str(date) + str(time).zfill(4), "%Y%m%d%H%M")
+        start = base_datetime + datetime.timedelta(hours=startStep)
+        assert start == self.start_datetime, (start, self.start_datetime)
+        end = base_datetime + datetime.timedelta(hours=endStep)
+        assert end == self.end_datetime, (end, self.end_datetime)
+    def is_matching_field(self, field):
+        return self.field_to_key(field) == self.time_request
+    def __repr__(self):
+        return f"Period({self.start_datetime} to {self.end_datetime} -> {self.time_request})"
+    def length(self):
+        return self.end_datetime - self.start_datetime
+    def apply(self, accumulated, values):
+        if accumulated is None:
+            accumulated = np.zeros_like(values)
+        assert accumulated.shape == values.shape, (accumulated.shape, values.shape)
+        if not np.all(values >= 0):
+            warnings.warn(f"Negative values for {values}: {np.amin(values)} {np.amax(values)}")
+        return accumulated + self.sign * values
+class TodoList:
+    def __init__(self, keys):
+        self._todo = set(keys)
+        self._len = len(keys)
+        self._done = set()
+        assert self._len == len(self._todo), (self._len, len(self._todo))
+    def is_todo(self, key):
+        return key in self._todo
+    def is_done(self, key):
+        return key in self._done
+    def set_done(self, key):
+        self._done.add(key)
+        self._todo.remove(key)
+    def all_done(self):
+        if not self._todo:
+            assert len(self._done) == self._len, (len(self._done), self._len)
+            return True
+        return False
+class Periods:
+    _todo = None
+    def __init__(self, valid_date, accumulation_period, **kwargs):
+        # one Periods object for each accumulated field in the output
+        assert isinstance(valid_date, datetime.datetime), (valid_date, type(valid_date))
+        assert isinstance(accumulation_period, datetime.timedelta), (accumulation_period, type(accumulation_period))
+        self.valid_date = valid_date
+        self.accumulation_period = accumulation_period
+        self.kwargs = kwargs
+        self._periods = self.build_periods()
+        self.check_merged_interval()
+    def check_merged_interval(self):
+        global_start = self.valid_date - self.accumulation_period
+        global_end = self.valid_date
+        resolution = datetime.timedelta(hours=1)
+        timeline = np.arange(
+            np.datetime64(global_start, "s"), np.datetime64(global_end, "s"), np.timedelta64(resolution)
+        )
+        flags = np.zeros_like(timeline, dtype=int)
+        for p in self._periods:
+            segment = np.where((timeline >= p.start_datetime) & (timeline < p.end_datetime))
+            xprint(segment)
+            flags[segment] += p.sign
+        assert np.all(flags == 1), flags
+    def find_matching_period(self, field):
+        # Find a period that matches the field, or return None
+        found = [p for p in self._periods if p.is_matching_field(field)]
+        if len(found) == 1:
+            return found[0]
+        if len(found) > 1:
+            raise ValueError(f"Found more than one period for {field}")
+        return None
+    @property
+    def todo(self):
+        if self._todo is None:
+            self._todo = TodoList([p.time_request for p in self._periods])
+        return self._todo
+    def is_todo(self, period):
+        return self.todo.is_todo(period.time_request)
+    def is_done(self, period):
+        return self.todo.is_done(period.time_request)
+    def set_done(self, period):
+        self.todo.set_done(period.time_request)
+    def all_done(self):
+        return self.todo.all_done()
+    def __iter__(self):
+        return iter(self._periods)
+    @abstractmethod
+    def build_periods(self):
+        pass
+class EraPeriods(Periods):
+    def search_periods(self, start, end, debug=False):
+        # find candidate periods that can be used to accumulate the data
+        # to get the accumulation between the two dates 'start' and 'end'
+        found = []
+        if not end - start == datetime.timedelta(hours=1):
+            raise NotImplementedError("Only 1 hour period is supported")
+        for base_time, steps in self.available_steps(start, end).items():
+            for step1, step2 in steps:
+                if debug:
+                    xprint(f"❌ tring: {base_time=} {step1=} {step2=}")
+                if ((base_time + step1) % 24) != start.hour:
+                    continue
+                if ((base_time + step2) % 24) != end.hour:
+                    continue
+                base_datetime = start - datetime.timedelta(hours=step1)
+                period = Period(start, end, base_datetime)
+                found.append(period)
+                assert base_datetime.hour == base_time, (base_datetime, base_time)
+                assert period.start_datetime - period.base_datetime == datetime.timedelta(hours=step1), (
+                    period.start_datetime,
+                    period.base_datetime,
+                    step1,
+                )
+                assert period.end_datetime - period.base_datetime == datetime.timedelta(hours=step2), (
+                    period.end_datetime,
+                    period.base_datetime,
+                    step2,
+                )
+        return found
+    def build_periods(self):
+        # build the list of periods to accumulate the data
+        hours = self.accumulation_period.total_seconds() / 3600
+        assert int(hours) == hours, f"Only full hours accumulation is supported {hours}"
+        hours = int(hours)
+        lst = []
+        for wanted in [[i, i + 1] for i in range(0, hours, 1)]:
+            start = self.valid_date - datetime.timedelta(hours=wanted[1])
+            end = self.valid_date - datetime.timedelta(hours=wanted[0])
+            found = self.search_periods(start, end)
+            if not found:
+                xprint(f"❌❌❌ Cannot find accumulation for {start} {end}")
+                self.search_periods(start, end, debug=True)
+                raise ValueError(f"Cannot find accumulation for {start} {end}")
+            found = sorted(found, key=lambda x: x.base_datetime, reverse=True)
+            chosen = found[0]
+            if len(found) > 1:
+                xprint(f"  Found more than one period for {start} {end}")
+                for f in found:
+                    xprint(f"    {f}")
+                xprint(f"    Chosing {chosen}")
+            chosen.sign = 1
+            lst.append(chosen)
+        return lst
+class EaOperPeriods(EraPeriods):
+    def available_steps(self, start, end):
+        return {
+            6: [[i, i + 1] for i in range(0, 18, 1)],
+            18: [[i, i + 1] for i in range(0, 18, 1)],
+        }
+class L5OperPeriods(EraPeriods):
+    def available_steps(self, start, end):
+        print("❌❌❌ untested")
+        x = 24  # need to check if 24 is the right value
+        return {
+            0: [[i, i + 1] for i in range(0, x, 1)],
+        }
+class EaEndaPeriods(EraPeriods):
+    def available_steps(self, start, end):
+        print("❌❌❌ untested")
+        return {
+            6: [[i, i + 3] for i in range(0, 18, 1)],
+            18: [[i, i + 3] for i in range(0, 18, 1)],
+        }
+class RrOperPeriods(Periods):
+    def available_steps(self, start, end):
+        raise NotImplementedError("need to implement diff")
+        x = 24  # todo: check if 24 is the right value
+        return {
+            0: [[0, i] for i in range(0, x, 1)],
+            3: [[0, i] for i in range(0, x, 1)],
+            6: [[0, i] for i in range(0, x, 1)],
+            9: [[0, i] for i in range(0, x, 1)],
+            12: [[0, i] for i in range(0, x, 1)],
+            15: [[0, i] for i in range(0, x, 1)],
+            18: [[0, i] for i in range(0, x, 1)],
+            21: [[0, i] for i in range(0, x, 1)],
+        }
+class OdEldaPeriods(EraPeriods):
+    def available_steps(self, start, end):
+        print("❌❌❌ untested")
+        x = 24  # need to check if 24 is the right value
+        return {
+            6: [[i, i + 1] for i in range(0, x, 1)],
+            18: [[i, i + 1] for i in range(0, x, 1)],
+        }
+class DiffPeriods(Periods):
+    pass
+class OdOperPeriods(DiffPeriods):
+    def available_steps(self, start, end):
+        raise NotImplementedError("need to implement diff and _scda patch")
+class OdEnfoPeriods(DiffPeriods):
+    def available_steps(self, start, end):
+        raise NotImplementedError("need to implement diff")
+def find_accumulator_class(class_: str, stream: str) -> Periods:
+    return {
+        ("ea", "oper"): EaOperPeriods,  # runs ok
+        ("ea", "enda"): EaEndaPeriods,
+        ("rr", "oper"): RrOperPeriods,
+        ("l5", "oper"): L5OperPeriods,
+        ("od", "oper"): OdOperPeriods,
+        ("od", "enfo"): OdEnfoPeriods,
+        ("od", "elda"): OdEldaPeriods,
+    }[class_, stream]
+class Accumulator:
+    values = None
+    def __init__(self, period_class, out, valid_date, user_accumulation_period, **kwargs):
+        self.valid_date = valid_date
+        # keep the reference to the output file to be able to write the result using an input field as template
+        self.out = out
+        # key contains the mars request parameters except the one related to the time
+        # A mars request is a dictionary with three categories of keys:
+        #   - the ones related to the time (date, time, step)
+        #   - the ones related to the data (param, stream, levtype, expver, number, ...)
+        #   - the ones related to the processing to be done (grid, area, ...)
+        self.kwargs = kwargs
+        for k in ["date", "time", "step"]:
+            if k in kwargs:
+                raise ValueError(f"Cannot use {k} in kwargs for accumulations")
+        self.key = {k: v for k, v in kwargs.items() if k in ["param", "level", "levelist", "number"]}
+        self.periods = period_class(self.valid_date, user_accumulation_period, **kwargs)
+    @property
+    def requests(self):
+        for period in self.periods:
+            # build the full data requests, merging the time requests with the key
+            yield {**self.kwargs.copy(), **dict(period.time_request)}
+    def is_field_needed(self, field):
+        for k, v in self.key.items():
+            if field.metadata(k) != v:
+                LOG.debug(f"{self} does not need field {field} because of {k}={field.metadata(k)} not {v}")
+                return False
+        return True
+    def compute(self, field, values):
+        if not self.is_field_needed(field):
+            return
+        period = self.periods.find_matching_period(field)
+        if not period:
+            return
+        assert self.periods.is_todo(period), (self.periods, period)
+        assert not self.periods.is_done(period), f"Field {field} for period {period} already done"
+        period.check(field)
+        xprint(f"{self}  field ✅ ({period.sign}){field} for {period}")
+        self.values = period.apply(self.values, values)
+        self.periods.set_done(period)
+        if self.periods.all_done():
+            self.write(field)
+            xprint("accumulator", self, " : data written ✅ ")
+    def check(self, field: Any) -> None:
+        if self._check is None:
+            self._check = field.metadata(namespace="mars")
+            assert self.param == field.metadata("param"), (self.param, field.metadata("param"))
+            assert self.date == field.metadata("date"), (self.date, field.metadata("date"))
+            assert self.time == field.metadata("time"), (self.time, field.metadata("time"))
+            assert self.step == field.metadata("step"), (self.step, field.metadata("step"))
+            assert self.number == _member(field), (self.number, _member(field))
+            return
+        mars = field.metadata(namespace="mars")
+        keys1 = sorted(self._check.keys())
+        keys2 = sorted(mars.keys())
+        assert keys1 == keys2, (keys1, keys2)
+        for k in keys1:
+            if k not in ("step",):
+                assert self._check[k] == mars[k], (k, self._check[k], mars[k])
+    def write(self, template: Any) -> None:
+        assert self.periods.all_done(), self.periods
+        if np.all(self.values < 0):
+            LOG.warning(
+                f"Negative values when computing accumutation for {self}): min={np.amin(self.values)} max={np.amax(self.values)}"
+            )
+        startStep = 0
+        endStep = self.periods.accumulation_period.total_seconds() // 3600
+        assert int(endStep) == endStep, "only full hours accumulation is supported"
+        endStep = int(endStep)
+        fake_base_date = self.valid_date - self.periods.accumulation_period
+        date = int(fake_base_date.strftime("%Y%m%d"))
+        time = int(fake_base_date.strftime("%H%M"))
+        self.out.write(
+            self.values,
+            template=template,
+            stepType="accum",
+            startStep=startStep,
+            endStep=endStep,
+            date=date,
+            time=time,
+            check_nans=True,
+        )
+        self.values = None
+    def __repr__(self):
+        key = ", ".join(f"{k}={v}" for k, v in self.key.items())
+        return f"{self.__class__.__name__}({self.valid_date}, {key})"
+def _compute_accumulations(
+    context: Any,
+    dates: List[datetime.datetime],
+    request: Dict[str, Any],
+    user_accumulation_period: datetime.timedelta,
+    # data_accumulation_period: Optional[int] = None,
+    # patch: Any = _identity,
+) -> Any:
+    request = deepcopy(request)
+    param = request.pop("param")
+    assert isinstance(param, (list, tuple))
+    number = request.pop("number", [0])
+    if not isinstance(number, (list, tuple)):
+        number = [number]
+    assert isinstance(number, (list, tuple))
+    request["stream"] = request.get("stream", "oper")
+    type_ = request.get("type", "an")
+    if type_ == "an":
+        type_ = "fc"
+    request["type"] = type_
+    request["levtype"] = request.get("levtype", "sfc")
+    if request["levtype"] != "sfc":
+        # LOG.warning("'type' should be 'sfc', found %s", request['type'])
+        raise NotImplementedError("Only sfc leveltype is supported")
+    period_class = find_accumulator_class(request["class"], request["stream"])
+    tmp = temp_file()
+    path = tmp.path
+    out = new_grib_output(path)
+    # build one accumulator per output field
+    accumulators = []
+    for valid_date in dates:
+        for p in param:
+            for n in number:
+                accumulators.append(
+                    Accumulator(
+                        period_class,
+                        out,
+                        valid_date,
+                        user_accumulation_period=user_accumulation_period,
+                        param=p,
+                        number=n,
+                        **request,
+                    )
+                )
+    xprint("accumulators", len(accumulators))
+    # get all needed data requests (mars)
+    requests = []
+    for a in accumulators:
+        xprint("accumulator", a)
+        for r in a.requests:
+            xprint(" ", r)
+            requests.append(r)
+    # get the data (this will pack the requests to avoid duplicates and make a minimal number of requests)
+    ds = mars(context, dates, request_already_using_valid_datetime=True, *requests)
+    # send each field to the each accumulator, the accumulatore will use the field to the accumulation
+    # if the accumulator has requested it
+    for field in ds:
+        values = field.values  # optimisation
+        for a in accumulators:
+            a.compute(field, values)
+    out.close()
+    ds = ekd.from_source("file", path)
+    assert len(ds) / len(param) / len(number) == len(dates), (
+        len(ds),
+        len(param),
+        len(dates),
+    )
+    # keep a reference to the tmp file, or it gets deleted when the function returns
+    ds._tmp = tmp
+    return ds
+def _to_list(x: Union[List[Any], Tuple[Any], Any]) -> List[Any]:
+    """Converts the input to a list if it is not already a list or tuple.
+    Parameters
+    ----------
+    x : Union[List[Any], Tuple[Any], Any]
+        Input value.
+    Returns
+    -------
+    List[Any]
+        The input value as a list.
+    """
+    if isinstance(x, (list, tuple)):
+        return x
+    return [x]
+def _scda(request: Dict[str, Any]) -> Dict[str, Any]:
+    """Modifies the request stream based on the time.
+    Parameters
+    ----------
+    request : Dict[str, Any]
+        Request parameters.
+    Returns
+    -------
+    Dict[str, Any]
+        The modified request parameters.
+    """
+    if request["time"] in (6, 18, 600, 1800):
+        request["stream"] = "scda"
+    else:
+        request["stream"] = "oper"
+    return request
+@legacy_source(__file__)
+def accumulations(context, dates, **request):
+    _to_list(request["param"])
+    user_accumulation_period = request.pop("accumulation_period", 6)
+    user_accumulation_period = datetime.timedelta(hours=user_accumulation_period)
+    context.trace("🌧️", f"accumulations {request} {user_accumulation_period}")
+    return _compute_accumulations(
+        context,
+        dates,
+        request,
+        user_accumulation_period=user_accumulation_period,
+    )
+execute = accumulations
+if __name__ == "__main__":
+    import yaml
+    config = yaml.safe_load(
+        """
+      class: ea
+      expver: '0001'
+      grid: 20./20.
+      levtype: sfc
+#      number: [0, 1]
+#      stream: enda
+      param: [cp, tp]
+#      accumulation_period: 6h
+      accumulation_period: 2
+    """
+    )
+    dates = yaml.safe_load("[2022-12-31 00:00, 2022-12-31 06:00]")
+    # dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
+    dates = to_datetime_list(dates)
+    class Context:
+        use_grib_paramid = True
+        def trace(self, *args):
+            print(*args)
+    for f in accumulations(Context, dates, **config):
+        print(f, f.to_numpy().mean())

anemoi/datasets/create/sources/legacy.py CHANGED Viewed

@@ -14,6 +14,8 @@ import os
 from typing import Any
 from typing import Callable
+from anemoi.datasets.create.input.template import resolve
 from ..source import Source
 from . import source_registry
@@ -71,12 +73,15 @@ class legacy_source:
         def execute_wrapper(self, dates) -> Any:
             """Wrapper method to call the execute function."""
+            args, kwargs = resolve(self.context, (self.args, self.kwargs))
             try:
-                return execute(self.context, dates, *self.args, **self.kwargs)
+                return execute(self.context, dates, *args, **kwargs)
             except TypeError:
                 LOG.error(f"Error executing source {this.name} from {source}")
                 LOG.error(f"Function signature is: {inspect.signature(execute)}")
-                LOG.error(f"Arguments are: {self.args=}, {self.kwargs=}")
+                LOG.error(f"Arguments are: {args=}, {kwargs=}")
                 raise
         klass = type(

anemoi/datasets/create/sources/xarray_support/grid.py CHANGED Viewed

@@ -61,6 +61,7 @@ class LatLonGrid(Grid):
         super().__init__()
         self.lat = lat
         self.lon = lon
+        self.variable_dims = variable_dims
 class XYGrid(Grid):
@@ -86,10 +87,20 @@ class MeshedGrid(LatLonGrid):
     @cached_property
     def grid_points(self) -> Tuple[Any, Any]:
         """Get the grid points for the meshed grid."""
-        lat, lon = np.meshgrid(
-            self.lat.variable.values,
-            self.lon.variable.values,
-        )
+        if self.variable_dims == (self.lon.variable.name, self.lat.variable.name):
+            lat, lon = np.meshgrid(
+                self.lat.variable.values,
+                self.lon.variable.values,
+            )
+        elif self.variable_dims == (self.lat.variable.name, self.lon.variable.name):
+            lon, lat = np.meshgrid(
+                self.lon.variable.values,
+                self.lat.variable.values,
+            )
+        else:
+            raise NotImplementedError(f"MeshedGrid.grid_points: unrecognized variable_dims {self.variable_dims}")
         return lat.flatten(), lon.flatten()

anemoi/datasets/data/dataset.py CHANGED Viewed

@@ -310,7 +310,12 @@ class Dataset(ABC, Sized):
         """
         requested_frequency = frequency_to_seconds(frequency)
         dataset_frequency = frequency_to_seconds(self.frequency)
-        assert requested_frequency % dataset_frequency == 0
+        if requested_frequency % dataset_frequency != 0:
+            raise ValueError(
+                f"Requested frequency {frequency} is not a multiple of the dataset frequency {self.frequency}. Did you mean to use `interpolate_frequency`?"
+            )
         # Question: where do we start? first date, or first date that is a multiple of the frequency?
         step = requested_frequency // dataset_frequency

{anemoi_datasets-0.5.17.dist-info → anemoi_datasets-0.5.19.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: anemoi-datasets
-Version: 0.5.17
+Version: 0.5.19
 Summary: A package to hold various functions to support training of ML models on ECMWF data.
 Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
 License:                                  Apache License

{anemoi_datasets-0.5.17.dist-info → anemoi_datasets-0.5.19.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 anemoi/datasets/__init__.py,sha256=i_wsAT3ezEYF7o5dpqGrpoG4wmLS-QIBug18uJbSYMs,1065
 anemoi/datasets/__main__.py,sha256=ErwAqE3rBc7OaNO2JRsEOhWpB8ldjAt7BFSuRhbnlqQ,936
-anemoi/datasets/_version.py,sha256=G3jgIvqAOb3RgYTFryPuF7LF2dSXviBKmqCnS1RQzaU,513
+anemoi/datasets/_version.py,sha256=c9HpQ99YdGTjwGSu9DP-RLCHGpdS8jfmCirVxrPm0i4,513
 anemoi/datasets/grids.py,sha256=ALvRRMvu0GaDCnNlOO-cRCfbpywA-1w_wzSylqpqgNY,17795
 anemoi/datasets/testing.py,sha256=fy_JzavUwLlK_2rtXAT-UGUyo5gjyQW2y826zf334Wg,2645
 anemoi/datasets/commands/__init__.py,sha256=O5W3yHZywRoAqmRUioAr3zMCh0hGVV18wZYGvc00ioM,698
@@ -20,10 +20,10 @@ anemoi/datasets/commands/publish.py,sha256=7YusLCWYdVLuexZzvyh8ztYoBOBzVmve3uJs-
 anemoi/datasets/commands/scan.py,sha256=e5t_oxSi-II38TVQiMlWMJ8AZhDEBk5PcPD22DDbHfU,4008
 anemoi/datasets/compute/__init__.py,sha256=hCW0QcLHJmE-C1r38P27_ZOvCLNewex5iQEtZqx2ckI,393
 anemoi/datasets/compute/recentre.py,sha256=kwxDB8qpgOCFZSQJvjAmVcpH5zWsfk5FSoIureqNHd4,5915
-anemoi/datasets/create/__init__.py,sha256=1Y7IcMPXjCZkNbezhSDiurG-qvGFGgPqvgOMpQH_e0k,50501
+anemoi/datasets/create/__init__.py,sha256=D0a5Q-xv5_mtBTzEO_6IaWHCQUbZyH0NjHwEtixMCXs,50699
 anemoi/datasets/create/check.py,sha256=FrgyZP3Xyx4qXHl8_ZfM31fgNhcxMqxlE5oLweMDGU0,10003
 anemoi/datasets/create/chunks.py,sha256=kZV3dWoCuv3Bttc0wysJB7OPbXsD99exKyrrj4HGFwQ,4025
-anemoi/datasets/create/config.py,sha256=ZF7tEPT6U4ILYVekryFd612tQeMDQK6riaTYtSJrUcM,13295
+anemoi/datasets/create/config.py,sha256=xrSlaY2p5zssfLIt8A1CP9WwJReSXVWBMQM7bT1aFbU,13448
 anemoi/datasets/create/filter.py,sha256=Hu4o3Z2omIdcu5ycJqmBkY_ZSKTG5JkjbIuxXM8ADfs,1254
 anemoi/datasets/create/patch.py,sha256=u4CeIuo3Ncrbhu9CTyaUbcmaJfBfMrrFVpgEikM9pE4,5398
 anemoi/datasets/create/persistent.py,sha256=XkEBjymXrR-y9KPVLtz9xdd0IB14wSEhcANUhUUzGVw,7832
@@ -49,18 +49,18 @@ anemoi/datasets/create/filters/single_level_relative_humidity_to_specific_humidi
 anemoi/datasets/create/filters/single_level_specific_humidity_to_relative_humidity.py,sha256=bXgm5nKgBZaP1E4tcjSLqJsEl6BlJaNLr3MsR8V9sJ4,14682
 anemoi/datasets/create/filters/speeddir_to_uv.py,sha256=8NXsus1LaYOzAAr7XCHKCh8HAz8BI0A1ZZz_RNDB0-w,2762
 anemoi/datasets/create/filters/sum.py,sha256=aGT6JkdHJ3i2SKzklqiyJ4ZFV3bVMYhHOSoxkdYuzp8,2151
-anemoi/datasets/create/filters/transform.py,sha256=C8tizuYtO1Bp28dTB9mEHeADAO8zHlDFXh8XR1IO1Os,1506
+anemoi/datasets/create/filters/transform.py,sha256=gIDLvaJlnn3Nc6P29aPOvNYM6yBWcIGrR2e_1bM6_Nw,1418
 anemoi/datasets/create/filters/unrotate_winds.py,sha256=3AJf0crnVVySLlXLIdfEUxRRlQeKgheUuD-UCrSrgo8,2798
 anemoi/datasets/create/filters/uv_to_speeddir.py,sha256=Zdc34AG5Bsz-Z7JGuznyRJr6F-BnWKXPiI3mjmOpbek,2883
 anemoi/datasets/create/filters/wz_to_w.py,sha256=42AQvTHk-ISyHlwvXfU3yiTGiDkfrs5kiKNkyqqtQpg,2725
 anemoi/datasets/create/input/__init__.py,sha256=XeURpmbReQvpELltGFKzg3oZFXWRdUxW9SK3K662SBQ,3364
-anemoi/datasets/create/input/action.py,sha256=0P1aSutrzdDDtUU78YMLfdsUEOeJcLvLiH2KDR5kOxM,7565
+anemoi/datasets/create/input/action.py,sha256=xXLqVsoygxyaROiXc7TW9DCEOzVh1YgPDAqUpcOb9fs,7619
 anemoi/datasets/create/input/concat.py,sha256=bU8SWfBVfK8bRAmmN4UO9zpIGxwQvRUk9_vwrKPOTE4,5355
 anemoi/datasets/create/input/context.py,sha256=qrLccxMe9UkyQxsNuR6JSK7oLzZq21dt38AxZ9kYzsY,2714
 anemoi/datasets/create/input/data_sources.py,sha256=4xUUShM0pCXIZVPJW_cSNMUwCO_wLx996MLFpTLChm0,4385
 anemoi/datasets/create/input/empty.py,sha256=tOxe3LykoGkEAFuf4yggMpAcvFzMw3E6hCz5pyeQ8Q0,1534
 anemoi/datasets/create/input/filter.py,sha256=R19IUwTdWBueeTKAMxyYKiP-JXOFJQu2vUoEiPYK0rA,3313
-anemoi/datasets/create/input/function.py,sha256=FJ2W5DJBLmpkQ6QFo0-yfUE9iIZyimBn_cZ1b2nRu-Q,6874
+anemoi/datasets/create/input/function.py,sha256=Q15IVNJqHm_9Pf0pWnDedyJcRoz0fxbKt8d1f2IMqQA,6916
 anemoi/datasets/create/input/join.py,sha256=RAdgE4lVcC71_J47dNa1weJuWdTXSQIvo06G2J6dfdg,4016
 anemoi/datasets/create/input/misc.py,sha256=FVaH_ym52RZI_fnLSMM_dKTQmWTrInucP780E3gGqvw,3357
 anemoi/datasets/create/input/pipe.py,sha256=-tCz161IwXoI8pl1hilA9T_j5eHSr-sgbijFLp9HHNc,2083
@@ -70,14 +70,15 @@ anemoi/datasets/create/input/step.py,sha256=WcR9NgRvUKF60Fo5veLvRCAQMrOd55x1gOEA
 anemoi/datasets/create/input/template.py,sha256=Iycw9VmfA0WEIDP_Of8bp-8HsV0EUfwbnm0WjxiO4GA,4092
 anemoi/datasets/create/input/trace.py,sha256=dakPYMmwKq6s17Scww1CN-xYBD3btJTGeDknOhAcnEM,3320
 anemoi/datasets/create/sources/__init__.py,sha256=XNiiGaC6NbxnGfl6glPw-gTJASi3vsGKwVlfkMqYGk4,950
-anemoi/datasets/create/sources/accumulations.py,sha256=Fh4LJi7XptsOZ9CBv4Nxw8CPJpp_-ugRAWg3mtNcmKU,19855
+anemoi/datasets/create/sources/accumulations.py,sha256=ZA8F8RJPMHok5RpIHH4x-txwiSll8zuWwqJ3rn95JHk,20295
+anemoi/datasets/create/sources/accumulations2.py,sha256=iBORRrH0N7r3gMWm3mCkJ6XmB-dO_lEckHPwvmk9fu0,20673
 anemoi/datasets/create/sources/constants.py,sha256=5O6d9tEuAmVjl5vNkNfmkaAjKXFlw1UjeueTsF1GZCI,1528
 anemoi/datasets/create/sources/eccc_fstd.py,sha256=8HK38f444HcWMvBhooP0XqTfMXYoCbN_8G9RI_Ne5rc,659
 anemoi/datasets/create/sources/empty.py,sha256=5mVIVRUwnBfE3zp-bvNA_imXCSpyds-4mewcI8HXAiY,1020
 anemoi/datasets/create/sources/forcings.py,sha256=877OZoXUoJncQ2_AAGSijwWqM-4kJJdxdIa6SFvZBUw,1216
 anemoi/datasets/create/sources/grib.py,sha256=zFBFWNFDVPCMSDRheNuaLZ7EaInjDt9OTJwVOPj9j-U,8371
 anemoi/datasets/create/sources/hindcasts.py,sha256=_4880rgd4AsRxlDXVi6dkh8mlKXrz2i27btVlmlMFjY,2611
-anemoi/datasets/create/sources/legacy.py,sha256=O6sTbI4QBlUiuGwaUwO2kpmfJYCAs_gTid0YOnkm37I,2536
+anemoi/datasets/create/sources/legacy.py,sha256=RJce-9TwmUUCFbgC8A3Dp61nSBfB8_lWti8WNoOMIcU,2652
 anemoi/datasets/create/sources/mars.py,sha256=tesQz7Ne6SLBChE_cNJU6Sxr6e0LXFlUKQ8gCdRiCMw,13155
 anemoi/datasets/create/sources/netcdf.py,sha256=UnehMwEMJquqaOeU33zNyFUYfzqQx4Rg-GRmUcgMcbE,1222
 anemoi/datasets/create/sources/opendap.py,sha256=sTm0wXE_BHk9q8vaNNE_Y6BhTOmhxPweS8RTjP4HYjU,1254
@@ -95,7 +96,7 @@ anemoi/datasets/create/sources/xarray_support/coordinates.py,sha256=rPEuijS77mQ9
 anemoi/datasets/create/sources/xarray_support/field.py,sha256=YRxx6kh1qO2qQ6I_VyR51h3dwNiiFM7CNwQNfpp-p-E,6375
 anemoi/datasets/create/sources/xarray_support/fieldlist.py,sha256=CG8ecTXCr37pNiykoxR6Sb21Xxsz6AS5K5-KE4qMEmo,8243
 anemoi/datasets/create/sources/xarray_support/flavour.py,sha256=GYodfpKfTBBWiyXytRrin6NK07ltlyz0UF7x4gQ3Fok,31836
-anemoi/datasets/create/sources/xarray_support/grid.py,sha256=P-NPDYU0eZg_mWcEbeNL9ZhtoJHGNw0eWaC1jxYfK5o,5690
+anemoi/datasets/create/sources/xarray_support/grid.py,sha256=lsE8bQwBH9pflzvsJ89Z6ExYPdHJd54xorMNzL2gTd0,6181
 anemoi/datasets/create/sources/xarray_support/metadata.py,sha256=WRO86l-ZB7iJ7pG5Vz9kVv5h1MokfF0fuy0bNSNBRIc,10687
 anemoi/datasets/create/sources/xarray_support/patch.py,sha256=Snk8bz7gp0HrG0MrY5hrXu7VC0tKgtoiWXByi2sBYJc,2037
 anemoi/datasets/create/sources/xarray_support/time.py,sha256=Y_lZTUOXWJH4jcSgyL4WTDwrtPXi7MUiumaXfRoqqAY,12486
@@ -105,7 +106,7 @@ anemoi/datasets/create/statistics/summary.py,sha256=JdtChTmsr1Y958_nka36HltTbeZk
 anemoi/datasets/data/__init__.py,sha256=dLzKYFX0eCi7urHA9t530SqZ_GYxTUyQeEcXYV8lZho,2521
 anemoi/datasets/data/complement.py,sha256=C55ZyWO8uM-bGbZkpuh80z95XtQjIr_NBnsxiKDWWtE,9643
 anemoi/datasets/data/concat.py,sha256=eY5rujcdal00BJCv00mKSlxp0FKVvPQd7uqrBnL9fj4,8996
-anemoi/datasets/data/dataset.py,sha256=Z1P1bkscPChGNcjjkxonbw9XylixJoM0UIUjqDDvxl8,30494
+anemoi/datasets/data/dataset.py,sha256=Dz74L_RihBzHJyHqlCKcXHBa0J_PkW3YYFofhv-Rh-4,30694
 anemoi/datasets/data/debug.css,sha256=z2X_ZDSnZ9C3pyZPWnQiEyAxuMxUaxJxET4oaCImTAQ,211
 anemoi/datasets/data/debug.py,sha256=hVa1jAQ-TK7CoKJNyyUC0eZPobFG-FpkVXEaO_3B-MA,10796
 anemoi/datasets/data/ensemble.py,sha256=-36kMjuT2y5jUeSnjCRTCyE4um6DLAADBVSKSTkHZZg,5352
@@ -129,9 +130,9 @@ anemoi/datasets/data/xy.py,sha256=-jWzYismrK3eI3YCKIBpU1BCmraRncmVn0_2IUY--lk,75
 anemoi/datasets/dates/__init__.py,sha256=pEArHDQ7w5E0WC8Vvf9ypyKSdm6gnhoN9TmooITB7C4,13617
 anemoi/datasets/dates/groups.py,sha256=IOveL6IyTXZwEdXZEnRAnpu9pINY95VN7LzcpLfJ09E,10105
 anemoi/datasets/utils/__init__.py,sha256=hCW0QcLHJmE-C1r38P27_ZOvCLNewex5iQEtZqx2ckI,393
-anemoi_datasets-0.5.17.dist-info/licenses/LICENSE,sha256=8HznKF1Vi2IvfLsKNE5A2iVyiri3pRjRPvPC9kxs6qk,11354
-anemoi_datasets-0.5.17.dist-info/METADATA,sha256=jIFWwwr0VWMKPo-lTpC82vLpvagt5n2HD99Otf7CKW4,15727
-anemoi_datasets-0.5.17.dist-info/WHEEL,sha256=L0N565qmK-3nM2eBoMNFszYJ_MTx03_tQ0CQu1bHLYo,91
-anemoi_datasets-0.5.17.dist-info/entry_points.txt,sha256=yR-o-4uiPEA_GLBL81SkMYnUoxq3CAV3hHulQiRtGG0,66
-anemoi_datasets-0.5.17.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
-anemoi_datasets-0.5.17.dist-info/RECORD,,
+anemoi_datasets-0.5.19.dist-info/licenses/LICENSE,sha256=8HznKF1Vi2IvfLsKNE5A2iVyiri3pRjRPvPC9kxs6qk,11354
+anemoi_datasets-0.5.19.dist-info/METADATA,sha256=-iNWmeuYT_FuUpsKNhTMb79nXFJhh7sERHF2fW1XJGM,15727
+anemoi_datasets-0.5.19.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+anemoi_datasets-0.5.19.dist-info/entry_points.txt,sha256=yR-o-4uiPEA_GLBL81SkMYnUoxq3CAV3hHulQiRtGG0,66
+anemoi_datasets-0.5.19.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
+anemoi_datasets-0.5.19.dist-info/RECORD,,

{anemoi_datasets-0.5.17.dist-info → anemoi_datasets-0.5.19.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (78.0.1)
+Generator: setuptools (78.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{anemoi_datasets-0.5.17.dist-info → anemoi_datasets-0.5.19.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{anemoi_datasets-0.5.17.dist-info → anemoi_datasets-0.5.19.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{anemoi_datasets-0.5.17.dist-info → anemoi_datasets-0.5.19.dist-info}/top_level.txt RENAMED Viewed

File without changes

anemoi-datasets 0.5.17__py3-none-any.whl → 0.5.19__py3-none-any.whl

anemoi-datasets 0.5.17py3-none-any.whl → 0.5.19py3-none-any.whl