PyPI - anemoi-datasets - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

anemoi-datasets 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

anemoi/datasets/_version.py +2 -2
anemoi/datasets/commands/compare.py +59 -0
anemoi/datasets/commands/create.py +84 -3
anemoi/datasets/commands/inspect.py +3 -3
anemoi/datasets/create/__init__.py +44 -17
anemoi/datasets/create/check.py +6 -5
anemoi/datasets/create/chunks.py +1 -1
anemoi/datasets/create/config.py +5 -26
anemoi/datasets/create/functions/filters/rename.py +9 -1
anemoi/datasets/create/functions/filters/rotate_winds.py +10 -1
anemoi/datasets/create/functions/sources/__init__.py +39 -0
anemoi/datasets/create/functions/sources/accumulations.py +11 -41
anemoi/datasets/create/functions/sources/constants.py +3 -0
anemoi/datasets/create/functions/sources/grib.py +4 -0
anemoi/datasets/create/functions/sources/hindcasts.py +32 -377
anemoi/datasets/create/functions/sources/mars.py +53 -22
anemoi/datasets/create/functions/sources/netcdf.py +2 -60
anemoi/datasets/create/functions/sources/opendap.py +3 -2
anemoi/datasets/create/functions/sources/xarray/__init__.py +73 -0
anemoi/datasets/create/functions/sources/xarray/coordinates.py +234 -0
anemoi/datasets/create/functions/sources/xarray/field.py +109 -0
anemoi/datasets/create/functions/sources/xarray/fieldlist.py +171 -0
anemoi/datasets/create/functions/sources/xarray/flavour.py +330 -0
anemoi/datasets/create/functions/sources/xarray/grid.py +46 -0
anemoi/datasets/create/functions/sources/xarray/metadata.py +161 -0
anemoi/datasets/create/functions/sources/xarray/time.py +98 -0
anemoi/datasets/create/functions/sources/xarray/variable.py +198 -0
anemoi/datasets/create/functions/sources/xarray_kerchunk.py +42 -0
anemoi/datasets/create/functions/sources/xarray_zarr.py +15 -0
anemoi/datasets/create/functions/sources/zenodo.py +40 -0
anemoi/datasets/create/input.py +290 -172
anemoi/datasets/create/loaders.py +120 -71
anemoi/datasets/create/patch.py +17 -14
anemoi/datasets/create/persistent.py +1 -1
anemoi/datasets/create/size.py +4 -5
anemoi/datasets/create/statistics/__init__.py +49 -16
anemoi/datasets/create/template.py +11 -61
anemoi/datasets/create/trace.py +91 -0
anemoi/datasets/create/utils.py +0 -48
anemoi/datasets/create/zarr.py +24 -10
anemoi/datasets/data/misc.py +9 -37
anemoi/datasets/data/stores.py +29 -14
anemoi/datasets/dates/__init__.py +7 -1
anemoi/datasets/dates/groups.py +3 -0
{anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/METADATA +18 -3
anemoi_datasets-0.4.2.dist-info/RECORD +86 -0
{anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/WHEEL +1 -1
anemoi_datasets-0.4.0.dist-info/RECORD +0 -73
{anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/LICENSE +0 -0
{anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/top_level.txt +0 -0

anemoi/datasets/create/statistics/__init__.py CHANGED Viewed

@@ -89,20 +89,23 @@ def check_variance(x, variables_names, minimum, maximum, mean, count, sums, squa
             continue
         print("---")
         print(f"❗ Negative variance for {name=}, variance={y}")
-        print(f" max={maximum[i]} min={minimum[i]} mean={mean[i]} count={count[i]} sum={sums[i]} square={squares[i]}")
+        print(f" min={minimum[i]} max={maximum[i]} mean={mean[i]} count={count[i]} sums={sums[i]} squares={squares[i]}")
         print(f" -> sums: min={np.min(sums[i])}, max={np.max(sums[i])}, argmin={np.argmin(sums[i])}")
         print(f" -> squares: min={np.min(squares[i])}, max={np.max(squares[i])}, argmin={np.argmin(squares[i])}")
         print(f" -> count: min={np.min(count[i])}, max={np.max(count[i])}, argmin={np.argmin(count[i])}")
+        print(
+            f" squares / count - mean * mean =  {squares[i] / count[i]} - {mean[i] * mean[i]} = {squares[i] / count[i] - mean[i] * mean[i]}"
+        )
     raise ValueError("Negative variance")
-def compute_statistics(array, check_variables_names=None, allow_nan=False):
+def compute_statistics(array, check_variables_names=None, allow_nans=False):
     """Compute statistics for a given array, provides minimum, maximum, sum, squares, count and has_nans as a dictionary."""
     nvars = array.shape[1]
-    LOG.info(f"Stats {nvars}, {array.shape}, {check_variables_names}")
+    LOG.debug(f"Stats {nvars}, {array.shape}, {check_variables_names}")
     if check_variables_names:
         assert nvars == len(check_variables_names), (nvars, check_variables_names)
     stats_shape = (array.shape[0], nvars)
@@ -118,7 +121,7 @@ def compute_statistics(array, check_variables_names=None, allow_nan=False):
         values = chunk.reshape((nvars, -1))
         for j, name in enumerate(check_variables_names):
-            check_data_values(values[j, :], name=name, allow_nan=allow_nan)
+            check_data_values(values[j, :], name=name, allow_nans=allow_nans)
             if np.isnan(values[j, :]).all():
                 # LOG.warning(f"All NaN values for {name} ({j}) for date {i}")
                 raise ValueError(f"All NaN values for {name} ({j}) for date {i}")
@@ -179,12 +182,12 @@ class TmpStatistics:
             pickle.dump((key, dates, data), f)
         shutil.move(tmp_path, path)
-        LOG.info(f"Written statistics data for {len(dates)} dates in {path} ({dates})")
+        LOG.debug(f"Written statistics data for {len(dates)} dates in {path} ({dates})")
     def _gather_data(self):
         # use glob to read all pickles
         files = glob.glob(self.dirname + "/*.npz")
-        LOG.info(f"Reading stats data, found {len(files)} files in {self.dirname}")
+        LOG.debug(f"Reading stats data, found {len(files)} files in {self.dirname}")
         assert len(files) > 0, f"No files found in {self.dirname}"
         for f in files:
             with open(f, "rb") as f:
@@ -211,17 +214,17 @@ def normalise_dates(dates):
 class StatAggregator:
     NAMES = ["minimum", "maximum", "sums", "squares", "count", "has_nans"]
-    def __init__(self, owner, dates, variables_names, allow_nan):
+    def __init__(self, owner, dates, variables_names, allow_nans):
         dates = sorted(dates)
         dates = to_datetimes(dates)
         assert dates, "No dates selected"
         self.owner = owner
         self.dates = dates
         self.variables_names = variables_names
-        self.allow_nan = allow_nan
+        self.allow_nans = allow_nans
         self.shape = (len(self.dates), len(self.variables_names))
-        LOG.info(f"Aggregating statistics on shape={self.shape}. Variables : {self.variables_names}")
+        LOG.debug(f"Aggregating statistics on shape={self.shape}. Variables : {self.variables_names}")
         self.minimum = np.full(self.shape, np.nan, dtype=np.float64)
         self.maximum = np.full(self.shape, np.nan, dtype=np.float64)
@@ -284,7 +287,7 @@ class StatAggregator:
             assert d in found, f"Statistics for date {d} not precomputed."
         assert len(self.dates) == len(found), "Not all dates found in precomputed statistics"
         assert len(self.dates) == offset, "Not all dates found in precomputed statistics."
-        LOG.info(f"Statistics for {len(found)} dates found.")
+        LOG.debug(f"Statistics for {len(found)} dates found.")
     def aggregate(self):
         minimum = np.nanmin(self.minimum, axis=0)
@@ -298,13 +301,43 @@ class StatAggregator:
         assert sums.shape == count.shape == squares.shape == mean.shape == minimum.shape == maximum.shape
         x = squares / count - mean * mean
-        # remove negative variance due to numerical errors
-        # x[- 1e-15 < (x / (np.sqrt(squares / count) + np.abs(mean))) < 0] = 0
-        check_variance(x, self.variables_names, minimum, maximum, mean, count, sums, squares)
-        stdev = np.sqrt(x)
-        for j, name in enumerate(self.variables_names):
-            check_data_values(np.array([mean[j]]), name=name, allow_nan=False)
+        # def fix_variance(x, name, minimum, maximum, mean, count, sums, squares):
+        #     assert x.shape == minimum.shape == maximum.shape == mean.shape == count.shape == sums.shape == squares.shape
+        #     assert x.shape == (1,)
+        #     x, minimum, maximum, mean, count, sums, squares = x[0], minimum[0], maximum[0], mean[0], count[0], sums[0], squares[0]
+        #     if x >= 0:
+        #         return x
+        #
+        #     order = np.sqrt((squares / count + mean * mean)/2)
+        #     range = maximum - minimum
+        #     LOG.warning(f"Negative variance for {name=}, variance={x}")
+        #     LOG.warning(f"square / count - mean * mean =  {squares / count} - {mean * mean} = {squares / count - mean * mean}")
+        #     LOG.warning(f"Variable order of magnitude is {order}.")
+        #     LOG.warning(f"Range is {range} ({maximum=} - {minimum=}).")
+        #     LOG.warning(f"Count is {count}.")
+        #     if abs(x) < order * 1e-6 and abs(x) < range * 1e-6:
+        #         LOG.warning(f"Variance is negative but very small, setting to 0.")
+        #         return x*0
+        #     return x
+        for i, name in enumerate(self.variables_names):
+            # remove negative variance due to numerical errors
+            # Not needed for now, fix_variance is disabled
+            # x[i] = fix_variance(x[i:i+1], name, minimum[i:i+1], maximum[i:i+1], mean[i:i+1], count[i:i+1], sums[i:i+1], squares[i:i+1])
+            check_variance(
+                x[i : i + 1],
+                [name],
+                minimum[i : i + 1],
+                maximum[i : i + 1],
+                mean[i : i + 1],
+                count[i : i + 1],
+                sums[i : i + 1],
+                squares[i : i + 1],
+            )
+            check_data_values(np.array([mean[i]]), name=name, allow_nans=False)
+        stdev = np.sqrt(x)
         return Summary(
             minimum=minimum,

anemoi/datasets/create/template.py CHANGED Viewed

@@ -8,72 +8,16 @@
 #
 import logging
-import os
 import re
 import textwrap
 from functools import wraps
-LOG = logging.getLogger(__name__)
-TRACE_INDENT = 0
-def step(action_path):
-    return f"[{'.'.join(action_path)}]"
+from anemoi.utils.humanize import plural
+from .trace import step
+from .trace import trace
-def trace(emoji, *args):
-    if os.environ.get("ANEMOI_DATASET_TRACE_CREATE") is None:
-        return
-    print(emoji, " " * TRACE_INDENT, *args)
-def trace_datasource(method):
-    @wraps(method)
-    def wrapper(self, *args, **kwargs):
-        global TRACE_INDENT
-        trace(
-            "🌍",
-            "=>",
-            step(self.action_path),
-            self._trace_datasource(*args, **kwargs),
-        )
-        TRACE_INDENT += 1
-        result = method(self, *args, **kwargs)
-        TRACE_INDENT -= 1
-        trace(
-            "🍎",
-            "<=",
-            step(self.action_path),
-            textwrap.shorten(repr(result), 256),
-        )
-        return result
-    return wrapper
-def trace_select(method):
-    @wraps(method)
-    def wrapper(self, *args, **kwargs):
-        global TRACE_INDENT
-        trace(
-            "👓",
-            "=>",
-            ".".join(self.action_path),
-            self._trace_select(*args, **kwargs),
-        )
-        TRACE_INDENT += 1
-        result = method(self, *args, **kwargs)
-        TRACE_INDENT -= 1
-        trace(
-            "🍍",
-            "<=",
-            ".".join(self.action_path),
-            textwrap.shorten(repr(result), 256),
-        )
-        return result
-    return wrapper
+LOG = logging.getLogger(__name__)
 def notify_result(method):
@@ -99,7 +43,13 @@ class Context:
         self.used_references.add(key)
     def notify_result(self, key, result):
-        trace("🎯", step(key), "notify result", result)
+        trace(
+            "🎯",
+            step(key),
+            "notify result",
+            textwrap.shorten(repr(result).replace(",", ", "), width=40),
+            plural(len(result), "field"),
+        )
         assert isinstance(key, (list, tuple)), key
         key = tuple(key)
         if key in self.used_references:

anemoi/datasets/create/trace.py ADDED Viewed

@@ -0,0 +1,91 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+import logging
+import textwrap
+import threading
+from functools import wraps
+LOG = logging.getLogger(__name__)
+thread_local = threading.local()
+TRACE = 0
+def enable_trace(on_off):
+    global TRACE
+    TRACE = on_off
+def step(action_path):
+    return f"[{'.'.join(action_path)}]"
+def trace(emoji, *args):
+    if not TRACE:
+        return
+    if not hasattr(thread_local, "TRACE_INDENT"):
+        thread_local.TRACE_INDENT = 0
+    print(emoji, " " * thread_local.TRACE_INDENT, *args)
+def trace_datasource(method):
+    @wraps(method)
+    def wrapper(self, *args, **kwargs):
+        if not hasattr(thread_local, "TRACE_INDENT"):
+            thread_local.TRACE_INDENT = 0
+        trace(
+            "🌍",
+            "=>",
+            step(self.action_path),
+            self._trace_datasource(*args, **kwargs),
+        )
+        thread_local.TRACE_INDENT += 1
+        result = method(self, *args, **kwargs)
+        thread_local.TRACE_INDENT -= 1
+        trace(
+            "🍎",
+            "<=",
+            step(self.action_path),
+            textwrap.shorten(repr(result), 256),
+        )
+        return result
+    return wrapper
+def trace_select(method):
+    @wraps(method)
+    def wrapper(self, *args, **kwargs):
+        if not hasattr(thread_local, "TRACE_INDENT"):
+            thread_local.TRACE_INDENT = 0
+        trace(
+            "👓",
+            "=>",
+            ".".join(self.action_path),
+            self._trace_select(*args, **kwargs),
+        )
+        thread_local.TRACE_INDENT += 1
+        result = method(self, *args, **kwargs)
+        thread_local.TRACE_INDENT -= 1
+        trace(
+            "🍍",
+            "<=",
+            ".".join(self.action_path),
+            textwrap.shorten(repr(result), 256),
+        )
+        return result
+    return wrapper

anemoi/datasets/create/utils.py CHANGED Viewed

@@ -7,15 +7,11 @@
 # nor does it submit to any jurisdiction.
 #
-import json
 import os
 from contextlib import contextmanager
 import numpy as np
-import yaml
 from earthkit.data import settings
-from earthkit.data.utils.humanize import seconds  # noqa: F401
-from tqdm.auto import tqdm
 def cache_context(dirname):
@@ -31,26 +27,6 @@ def cache_context(dirname):
     return settings.temporary({"cache-policy": "user", "user-cache-directory": dirname})
-def bytes(n):
-    """>>> bytes(4096)
-    '4 KiB'
-    >>> bytes(4000)
-    '3.9 KiB'
-    """
-    if n < 0:
-        sign = "-"
-        n -= 0
-    else:
-        sign = ""
-    u = ["", " KiB", " MiB", " GiB", " TiB", " PiB", " EiB", " ZiB", " YiB"]
-    i = 0
-    while n >= 1024:
-        n /= 1024.0
-        i += 1
-    return "%s%g%s" % (sign, int(n * 10 + 0.5) / 10.0, u[i])
 def to_datetime_list(*args, **kwargs):
     from earthkit.data.utils.dates import to_datetime_list as to_datetime_list_
@@ -63,15 +39,6 @@ def to_datetime(*args, **kwargs):
     return to_datetime_(*args, **kwargs)
-def load_json_or_yaml(path):
-    with open(path, "r") as f:
-        if path.endswith(".json"):
-            return json.load(f)
-        if path.endswith(".yaml") or path.endswith(".yml"):
-            return yaml.safe_load(f)
-        raise ValueError(f"Cannot read file {path}. Need json or yaml with appropriate extension.")
 def make_list_int(value):
     if isinstance(value, str):
         if "/" not in value:
@@ -118,18 +85,3 @@ def normalize_and_check_dates(dates, start, end, frequency, dtype="datetime64[s]
         assert d1 == d2, (i, d1, d2)
     return dates_
-def progress_bar(*, iterable=None, total=None, initial=0, desc=None):
-    return tqdm(
-        iterable=iterable,
-        total=total,
-        initial=initial,
-        unit_scale=True,
-        unit_divisor=1024,
-        unit="B",
-        disable=False,
-        leave=False,
-        desc=desc,
-        # dynamic_ncols=True, # make this the default?
-    )

anemoi/datasets/create/zarr.py CHANGED Viewed

@@ -24,8 +24,12 @@ def add_zarr_dataset(
     shape=None,
     array=None,
     overwrite=True,
+    dimensions=None,
     **kwargs,
 ):
+    assert dimensions is not None, "Please pass dimensions to add_zarr_dataset."
+    assert isinstance(dimensions, (tuple, list))
     if dtype is None:
         assert array is not None, (name, shape, array, dtype, zarr_root)
         dtype = array.dtype
@@ -44,6 +48,7 @@ def add_zarr_dataset(
             **kwargs,
         )
         a[...] = array
+        a.attrs["_ARRAY_DIMENSIONS"] = dimensions
         return a
     if "fill_value" not in kwargs:
@@ -69,6 +74,7 @@ def add_zarr_dataset(
         overwrite=overwrite,
         **kwargs,
     )
+    a.attrs["_ARRAY_DIMENSIONS"] = dimensions
     return a
@@ -79,22 +85,27 @@ class ZarrBuiltRegistry:
     flags = None
     z = None
-    def __init__(self, path, synchronizer_path=None):
+    def __init__(self, path, synchronizer_path=None, use_threads=False):
         import zarr
         assert isinstance(path, str), path
         self.zarr_path = path
-        if synchronizer_path is None:
-            synchronizer_path = self.zarr_path + ".sync"
-        self.synchronizer_path = synchronizer_path
-        self.synchronizer = zarr.ProcessSynchronizer(self.synchronizer_path)
+        if use_threads:
+            self.synchronizer = zarr.ThreadSynchronizer()
+            self.synchronizer_path = None
+        else:
+            if synchronizer_path is None:
+                synchronizer_path = self.zarr_path + ".sync"
+            self.synchronizer_path = synchronizer_path
+            self.synchronizer = zarr.ProcessSynchronizer(self.synchronizer_path)
     def clean(self):
-        try:
-            shutil.rmtree(self.synchronizer_path)
-        except FileNotFoundError:
-            pass
+        if self.synchronizer_path is not None:
+            try:
+                shutil.rmtree(self.synchronizer_path)
+            except FileNotFoundError:
+                pass
     def _open_write(self):
         import zarr
@@ -112,7 +123,7 @@ class ZarrBuiltRegistry:
     def new_dataset(self, *args, **kwargs):
         z = self._open_write()
         zarr_root = z["_build"]
-        add_zarr_dataset(*args, zarr_root=zarr_root, overwrite=True, **kwargs)
+        add_zarr_dataset(*args, zarr_root=zarr_root, overwrite=True, dimensions=("tmp",), **kwargs)
     def add_to_history(self, action, **kwargs):
         new = dict(
@@ -143,6 +154,9 @@ class ZarrBuiltRegistry:
         z.attrs["latest_write_timestamp"] = datetime.datetime.utcnow().isoformat()
         z["_build"][self.name_flags][i] = value
+    def ready(self):
+        return all(self.get_flags())
     def create(self, lengths, overwrite=False):
         self.new_dataset(name=self.name_lengths, array=np.array(lengths, dtype="i4"))
         self.new_dataset(name=self.name_flags, array=np.array([False] * len(lengths), dtype=bool))

anemoi/datasets/data/misc.py CHANGED Viewed

@@ -8,63 +8,35 @@
 import calendar
 import datetime
 import logging
-import os
 import re
 from pathlib import PurePath
 import numpy as np
 import zarr
+from anemoi.utils.config import load_config as load_settings
 from .dataset import Dataset
 LOG = logging.getLogger(__name__)
-CONFIG = None
-try:
-    import tomllib  # Only available since 3.11
-except ImportError:
-    import tomli as tomllib
+def load_config():
+    return load_settings(defaults={"datasets": {"named": {}, "path": []}})
 def add_named_dataset(name, path, **kwargs):
-    load_config()
-    if name in CONFIG["datasets"]["named"]:
+    config = load_config()
+    if name["datasets"]["named"]:
         raise ValueError(f"Dataset {name} already exists")
-    CONFIG["datasets"]["named"][name] = path
+    config["datasets"]["named"][name] = path
 def add_dataset_path(path):
-    load_config()
-    if path not in CONFIG["datasets"]["path"]:
-        CONFIG["datasets"]["path"].append(path)
-    # save_config()
-def load_config():
-    global CONFIG
-    if CONFIG is not None:
-        return CONFIG
-    conf = os.path.expanduser("~/.config/anemoi/settings.toml")
-    if not os.path.exists(conf):
-        conf = os.path.expanduser("~/.anemoi.toml")
-    if os.path.exists(conf):
-        with open(conf, "rb") as f:
-            CONFIG = tomllib.load(f)
-    else:
-        CONFIG = {}
-    CONFIG.setdefault("datasets", {})
-    CONFIG["datasets"].setdefault("path", [])
-    CONFIG["datasets"].setdefault("named", {})
+    config = load_config()
-    return CONFIG
+    if path not in config["datasets"]["path"]:
+        config["datasets"]["path"].append(path)
 def _frequency_to_hours(frequency):

anemoi/datasets/data/stores.py CHANGED Viewed

@@ -9,6 +9,7 @@ import logging
 import os
 import warnings
 from functools import cached_property
+from urllib.parse import urlparse
 import numpy as np
 import zarr
@@ -40,7 +41,9 @@ class ReadOnlyStore(zarr.storage.BaseStore):
 class HTTPStore(ReadOnlyStore):
-    """We write our own HTTPStore because the one used by zarr (fsspec) does not play well with fork() and multiprocessing."""
+    """We write our own HTTPStore because the one used by zarr (s3fs)
+    does not play well with fork() and multiprocessing.
+    """
     def __init__(self, url):
         self.url = url
@@ -58,17 +61,16 @@ class HTTPStore(ReadOnlyStore):
 class S3Store(ReadOnlyStore):
-    """We write our own S3Store because the one used by zarr (fsspec)
-    does not play well with fork() and multiprocessing. Also, we get
-    to control the s3 client.
+    """We write our own S3Store because the one used by zarr (s3fs)
+    does not play well with fork(). We also get to control the s3 client
+    options using the anemoi configs.
     """
-    def __init__(self, url):
+    def __init__(self, url, region=None):
         from anemoi.utils.s3 import s3_client
         _, _, self.bucket, self.key = url.split("/", 3)
-        self.s3 = s3_client(self.bucket)
+        self.s3 = s3_client(self.bucket, region=region)
     def __getitem__(self, key):
         try:
@@ -101,15 +103,27 @@ class DebugStore(ReadOnlyStore):
         return key in self.store
-def open_zarr(path, dont_fail=False, cache=None):
-    try:
-        store = path
+def name_to_zarr_store(path_or_url):
+    store = path_or_url
+    if store.startswith("s3://"):
+        store = S3Store(store)
-        if store.startswith("http://") or store.startswith("https://"):
+    elif store.startswith("http://") or store.startswith("https://"):
+        parsed = urlparse(store)
+        bits = parsed.netloc.split(".")
+        if len(bits) == 5 and (bits[1], bits[3], bits[4]) == ("s3", "amazonaws", "com"):
+            s3_url = f"s3://{bits[0]}{parsed.path}"
+            store = S3Store(s3_url, region=bits[2])
+        else:
             store = HTTPStore(store)
-        elif store.startswith("s3://"):
-            store = S3Store(store)
+    return store
+def open_zarr(path, dont_fail=False, cache=None):
+    try:
+        store = name_to_zarr_store(path)
         if DEBUG_ZARR_LOADING:
             if isinstance(store, str):
@@ -117,7 +131,8 @@ def open_zarr(path, dont_fail=False, cache=None):
                 if not os.path.isdir(store):
                     raise NotImplementedError(
-                        "DEBUG_ZARR_LOADING is only implemented for DirectoryStore. Please disable it for other backends."
+                        "DEBUG_ZARR_LOADING is only implemented for DirectoryStore. "
+                        "Please disable it for other backends."
                     )
                 store = zarr.storage.DirectoryStore(store)
             store = DebugStore(store)

anemoi/datasets/dates/__init__.py CHANGED Viewed

@@ -96,7 +96,7 @@ class ValuesDates(Dates):
 class StartEndDates(Dates):
-    def __init__(self, start, end, frequency=1, **kwargs):
+    def __init__(self, start, end, frequency=1, months=None, **kwargs):
         frequency = frequency_to_hours(frequency)
         def _(x):
@@ -128,6 +128,12 @@ class StartEndDates(Dates):
         date = start
         self.values = []
         while date <= end:
+            if months is not None:
+                if date.month not in months:
+                    date += increment
+                    continue
             self.values.append(date)
             date += increment

anemoi/datasets/dates/groups.py CHANGED Viewed

@@ -61,6 +61,9 @@ class Groups:
             count += 1
         return count
+    def __repr__(self):
+        return f"{self.__class__.__name__}(dates={len(self)})"
 class Filter:
     def __init__(self, missing):

anemoi-datasets 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

anemoi-datasets 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl