PyPI - anemoi-datasets - Versions diffs - 0.3.10__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

anemoi-datasets 0.3.10py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

anemoi/datasets/_version.py +2 -2
anemoi/datasets/commands/compare.py +59 -0
anemoi/datasets/commands/create.py +84 -3
anemoi/datasets/commands/inspect.py +9 -9
anemoi/datasets/commands/scan.py +4 -4
anemoi/datasets/compute/recentre.py +14 -9
anemoi/datasets/create/__init__.py +44 -17
anemoi/datasets/create/check.py +6 -5
anemoi/datasets/create/chunks.py +1 -1
anemoi/datasets/create/config.py +6 -27
anemoi/datasets/create/functions/__init__.py +3 -3
anemoi/datasets/create/functions/filters/empty.py +4 -4
anemoi/datasets/create/functions/filters/rename.py +14 -6
anemoi/datasets/create/functions/filters/rotate_winds.py +16 -60
anemoi/datasets/create/functions/filters/unrotate_winds.py +14 -64
anemoi/datasets/create/functions/sources/__init__.py +39 -0
anemoi/datasets/create/functions/sources/accumulations.py +38 -56
anemoi/datasets/create/functions/sources/constants.py +11 -4
anemoi/datasets/create/functions/sources/empty.py +2 -2
anemoi/datasets/create/functions/sources/forcings.py +3 -3
anemoi/datasets/create/functions/sources/grib.py +8 -4
anemoi/datasets/create/functions/sources/hindcasts.py +32 -364
anemoi/datasets/create/functions/sources/mars.py +57 -26
anemoi/datasets/create/functions/sources/netcdf.py +2 -60
anemoi/datasets/create/functions/sources/opendap.py +3 -2
anemoi/datasets/create/functions/sources/source.py +3 -3
anemoi/datasets/create/functions/sources/tendencies.py +7 -7
anemoi/datasets/create/functions/sources/xarray/__init__.py +73 -0
anemoi/datasets/create/functions/sources/xarray/coordinates.py +234 -0
anemoi/datasets/create/functions/sources/xarray/field.py +109 -0
anemoi/datasets/create/functions/sources/xarray/fieldlist.py +171 -0
anemoi/datasets/create/functions/sources/xarray/flavour.py +330 -0
anemoi/datasets/create/functions/sources/xarray/grid.py +46 -0
anemoi/datasets/create/functions/sources/xarray/metadata.py +161 -0
anemoi/datasets/create/functions/sources/xarray/time.py +98 -0
anemoi/datasets/create/functions/sources/xarray/variable.py +198 -0
anemoi/datasets/create/functions/sources/xarray_kerchunk.py +42 -0
anemoi/datasets/create/functions/sources/xarray_zarr.py +15 -0
anemoi/datasets/create/functions/sources/zenodo.py +40 -0
anemoi/datasets/create/input.py +309 -191
anemoi/datasets/create/loaders.py +155 -77
anemoi/datasets/create/patch.py +17 -14
anemoi/datasets/create/persistent.py +1 -1
anemoi/datasets/create/size.py +4 -5
anemoi/datasets/create/statistics/__init__.py +51 -17
anemoi/datasets/create/template.py +11 -61
anemoi/datasets/create/trace.py +91 -0
anemoi/datasets/create/utils.py +5 -52
anemoi/datasets/create/zarr.py +24 -10
anemoi/datasets/data/dataset.py +4 -4
anemoi/datasets/data/misc.py +9 -37
anemoi/datasets/data/stores.py +37 -14
anemoi/datasets/dates/__init__.py +7 -1
anemoi/datasets/dates/groups.py +3 -0
{anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/METADATA +24 -8
anemoi_datasets-0.4.2.dist-info/RECORD +86 -0
{anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/WHEEL +1 -1
anemoi_datasets-0.3.10.dist-info/RECORD +0 -73
{anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/LICENSE +0 -0
{anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/top_level.txt +0 -0

anemoi/datasets/create/size.py CHANGED Viewed

@@ -10,9 +10,8 @@
 import logging
 import os
-from anemoi.utils.humanize import bytes
-from anemoi.datasets.create.utils import progress_bar
+import tqdm
+from anemoi.utils.humanize import bytes_to_human
 LOG = logging.getLogger(__name__)
@@ -22,14 +21,14 @@ def compute_directory_sizes(path):
         return None
     size, n = 0, 0
-    bar = progress_bar(iterable=os.walk(path), desc=f"Computing size of {path}")
+    bar = tqdm.tqdm(iterable=os.walk(path), desc=f"Computing size of {path}")
     for dirpath, _, filenames in bar:
         for filename in filenames:
             file_path = os.path.join(dirpath, filename)
             size += os.path.getsize(file_path)
             n += 1
-    LOG.info(f"Total size: {bytes(size)}")
+    LOG.info(f"Total size: {bytes_to_human(size)}")
     LOG.info(f"Total number of files: {n}")
     return dict(total_size=size, total_number_of_files=n)

anemoi/datasets/create/statistics/__init__.py CHANGED Viewed

@@ -71,7 +71,7 @@ def to_datetime(date):
     if isinstance(date, str):
         return np.datetime64(date)
     if isinstance(date, datetime.datetime):
-        return np.datetime64(date)
+        return np.datetime64(date, "s")
     return date
@@ -89,20 +89,23 @@ def check_variance(x, variables_names, minimum, maximum, mean, count, sums, squa
             continue
         print("---")
         print(f"❗ Negative variance for {name=}, variance={y}")
-        print(f" max={maximum[i]} min={minimum[i]} mean={mean[i]} count={count[i]} sum={sums[i]} square={squares[i]}")
+        print(f" min={minimum[i]} max={maximum[i]} mean={mean[i]} count={count[i]} sums={sums[i]} squares={squares[i]}")
         print(f" -> sums: min={np.min(sums[i])}, max={np.max(sums[i])}, argmin={np.argmin(sums[i])}")
         print(f" -> squares: min={np.min(squares[i])}, max={np.max(squares[i])}, argmin={np.argmin(squares[i])}")
         print(f" -> count: min={np.min(count[i])}, max={np.max(count[i])}, argmin={np.argmin(count[i])}")
+        print(
+            f" squares / count - mean * mean =  {squares[i] / count[i]} - {mean[i] * mean[i]} = {squares[i] / count[i] - mean[i] * mean[i]}"
+        )
     raise ValueError("Negative variance")
-def compute_statistics(array, check_variables_names=None, allow_nan=False):
+def compute_statistics(array, check_variables_names=None, allow_nans=False):
     """Compute statistics for a given array, provides minimum, maximum, sum, squares, count and has_nans as a dictionary."""
     nvars = array.shape[1]
-    LOG.info(f"Stats {nvars}, {array.shape}, {check_variables_names}")
+    LOG.debug(f"Stats {nvars}, {array.shape}, {check_variables_names}")
     if check_variables_names:
         assert nvars == len(check_variables_names), (nvars, check_variables_names)
     stats_shape = (array.shape[0], nvars)
@@ -118,7 +121,7 @@ def compute_statistics(array, check_variables_names=None, allow_nan=False):
         values = chunk.reshape((nvars, -1))
         for j, name in enumerate(check_variables_names):
-            check_data_values(values[j, :], name=name, allow_nan=allow_nan)
+            check_data_values(values[j, :], name=name, allow_nans=allow_nans)
             if np.isnan(values[j, :]).all():
                 # LOG.warning(f"All NaN values for {name} ({j}) for date {i}")
                 raise ValueError(f"All NaN values for {name} ({j}) for date {i}")
@@ -179,12 +182,12 @@ class TmpStatistics:
             pickle.dump((key, dates, data), f)
         shutil.move(tmp_path, path)
-        LOG.info(f"Written statistics data for {len(dates)} dates in {path} ({dates})")
+        LOG.debug(f"Written statistics data for {len(dates)} dates in {path} ({dates})")
     def _gather_data(self):
         # use glob to read all pickles
         files = glob.glob(self.dirname + "/*.npz")
-        LOG.info(f"Reading stats data, found {len(files)} files in {self.dirname}")
+        LOG.debug(f"Reading stats data, found {len(files)} files in {self.dirname}")
         assert len(files) > 0, f"No files found in {self.dirname}"
         for f in files:
             with open(f, "rb") as f:
@@ -211,17 +214,17 @@ def normalise_dates(dates):
 class StatAggregator:
     NAMES = ["minimum", "maximum", "sums", "squares", "count", "has_nans"]
-    def __init__(self, owner, dates, variables_names, allow_nan):
+    def __init__(self, owner, dates, variables_names, allow_nans):
         dates = sorted(dates)
         dates = to_datetimes(dates)
         assert dates, "No dates selected"
         self.owner = owner
         self.dates = dates
         self.variables_names = variables_names
-        self.allow_nan = allow_nan
+        self.allow_nans = allow_nans
         self.shape = (len(self.dates), len(self.variables_names))
-        LOG.info(f"Aggregating statistics on shape={self.shape}. Variables : {self.variables_names}")
+        LOG.debug(f"Aggregating statistics on shape={self.shape}. Variables : {self.variables_names}")
         self.minimum = np.full(self.shape, np.nan, dtype=np.float64)
         self.maximum = np.full(self.shape, np.nan, dtype=np.float64)
@@ -242,6 +245,7 @@ class StatAggregator:
         found = set()
         offset = 0
         for _, _dates, stats in self.owner._gather_data():
             assert isinstance(stats, dict), stats
             assert stats["minimum"].shape[0] == len(_dates), (stats["minimum"].shape, len(_dates))
@@ -283,7 +287,7 @@ class StatAggregator:
             assert d in found, f"Statistics for date {d} not precomputed."
         assert len(self.dates) == len(found), "Not all dates found in precomputed statistics"
         assert len(self.dates) == offset, "Not all dates found in precomputed statistics."
-        LOG.info(f"Statistics for {len(found)} dates found.")
+        LOG.debug(f"Statistics for {len(found)} dates found.")
     def aggregate(self):
         minimum = np.nanmin(self.minimum, axis=0)
@@ -297,13 +301,43 @@ class StatAggregator:
         assert sums.shape == count.shape == squares.shape == mean.shape == minimum.shape == maximum.shape
         x = squares / count - mean * mean
-        # remove negative variance due to numerical errors
-        # x[- 1e-15 < (x / (np.sqrt(squares / count) + np.abs(mean))) < 0] = 0
-        check_variance(x, self.variables_names, minimum, maximum, mean, count, sums, squares)
-        stdev = np.sqrt(x)
-        for j, name in enumerate(self.variables_names):
-            check_data_values(np.array([mean[j]]), name=name, allow_nan=False)
+        # def fix_variance(x, name, minimum, maximum, mean, count, sums, squares):
+        #     assert x.shape == minimum.shape == maximum.shape == mean.shape == count.shape == sums.shape == squares.shape
+        #     assert x.shape == (1,)
+        #     x, minimum, maximum, mean, count, sums, squares = x[0], minimum[0], maximum[0], mean[0], count[0], sums[0], squares[0]
+        #     if x >= 0:
+        #         return x
+        #
+        #     order = np.sqrt((squares / count + mean * mean)/2)
+        #     range = maximum - minimum
+        #     LOG.warning(f"Negative variance for {name=}, variance={x}")
+        #     LOG.warning(f"square / count - mean * mean =  {squares / count} - {mean * mean} = {squares / count - mean * mean}")
+        #     LOG.warning(f"Variable order of magnitude is {order}.")
+        #     LOG.warning(f"Range is {range} ({maximum=} - {minimum=}).")
+        #     LOG.warning(f"Count is {count}.")
+        #     if abs(x) < order * 1e-6 and abs(x) < range * 1e-6:
+        #         LOG.warning(f"Variance is negative but very small, setting to 0.")
+        #         return x*0
+        #     return x
+        for i, name in enumerate(self.variables_names):
+            # remove negative variance due to numerical errors
+            # Not needed for now, fix_variance is disabled
+            # x[i] = fix_variance(x[i:i+1], name, minimum[i:i+1], maximum[i:i+1], mean[i:i+1], count[i:i+1], sums[i:i+1], squares[i:i+1])
+            check_variance(
+                x[i : i + 1],
+                [name],
+                minimum[i : i + 1],
+                maximum[i : i + 1],
+                mean[i : i + 1],
+                count[i : i + 1],
+                sums[i : i + 1],
+                squares[i : i + 1],
+            )
+            check_data_values(np.array([mean[i]]), name=name, allow_nans=False)
+        stdev = np.sqrt(x)
         return Summary(
             minimum=minimum,

anemoi/datasets/create/template.py CHANGED Viewed

@@ -8,72 +8,16 @@
 #
 import logging
-import os
 import re
 import textwrap
 from functools import wraps
-LOG = logging.getLogger(__name__)
-TRACE_INDENT = 0
-def step(action_path):
-    return f"[{'.'.join(action_path)}]"
+from anemoi.utils.humanize import plural
+from .trace import step
+from .trace import trace
-def trace(emoji, *args):
-    if os.environ.get("ANEMOI_DATASET_TRACE_CREATE") is None:
-        return
-    print(emoji, " " * TRACE_INDENT, *args)
-def trace_datasource(method):
-    @wraps(method)
-    def wrapper(self, *args, **kwargs):
-        global TRACE_INDENT
-        trace(
-            "🌍",
-            "=>",
-            step(self.action_path),
-            self._trace_datasource(*args, **kwargs),
-        )
-        TRACE_INDENT += 1
-        result = method(self, *args, **kwargs)
-        TRACE_INDENT -= 1
-        trace(
-            "🍎",
-            "<=",
-            step(self.action_path),
-            textwrap.shorten(repr(result), 256),
-        )
-        return result
-    return wrapper
-def trace_select(method):
-    @wraps(method)
-    def wrapper(self, *args, **kwargs):
-        global TRACE_INDENT
-        trace(
-            "👓",
-            "=>",
-            ".".join(self.action_path),
-            self._trace_select(*args, **kwargs),
-        )
-        TRACE_INDENT += 1
-        result = method(self, *args, **kwargs)
-        TRACE_INDENT -= 1
-        trace(
-            "🍍",
-            "<=",
-            ".".join(self.action_path),
-            textwrap.shorten(repr(result), 256),
-        )
-        return result
-    return wrapper
+LOG = logging.getLogger(__name__)
 def notify_result(method):
@@ -99,7 +43,13 @@ class Context:
         self.used_references.add(key)
     def notify_result(self, key, result):
-        trace("🎯", step(key), "notify result", result)
+        trace(
+            "🎯",
+            step(key),
+            "notify result",
+            textwrap.shorten(repr(result).replace(",", ", "), width=40),
+            plural(len(result), "field"),
+        )
         assert isinstance(key, (list, tuple)), key
         key = tuple(key)
         if key in self.used_references:

anemoi/datasets/create/trace.py ADDED Viewed

@@ -0,0 +1,91 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+import logging
+import textwrap
+import threading
+from functools import wraps
+LOG = logging.getLogger(__name__)
+thread_local = threading.local()
+TRACE = 0
+def enable_trace(on_off):
+    global TRACE
+    TRACE = on_off
+def step(action_path):
+    return f"[{'.'.join(action_path)}]"
+def trace(emoji, *args):
+    if not TRACE:
+        return
+    if not hasattr(thread_local, "TRACE_INDENT"):
+        thread_local.TRACE_INDENT = 0
+    print(emoji, " " * thread_local.TRACE_INDENT, *args)
+def trace_datasource(method):
+    @wraps(method)
+    def wrapper(self, *args, **kwargs):
+        if not hasattr(thread_local, "TRACE_INDENT"):
+            thread_local.TRACE_INDENT = 0
+        trace(
+            "🌍",
+            "=>",
+            step(self.action_path),
+            self._trace_datasource(*args, **kwargs),
+        )
+        thread_local.TRACE_INDENT += 1
+        result = method(self, *args, **kwargs)
+        thread_local.TRACE_INDENT -= 1
+        trace(
+            "🍎",
+            "<=",
+            step(self.action_path),
+            textwrap.shorten(repr(result), 256),
+        )
+        return result
+    return wrapper
+def trace_select(method):
+    @wraps(method)
+    def wrapper(self, *args, **kwargs):
+        if not hasattr(thread_local, "TRACE_INDENT"):
+            thread_local.TRACE_INDENT = 0
+        trace(
+            "👓",
+            "=>",
+            ".".join(self.action_path),
+            self._trace_select(*args, **kwargs),
+        )
+        thread_local.TRACE_INDENT += 1
+        result = method(self, *args, **kwargs)
+        thread_local.TRACE_INDENT -= 1
+        trace(
+            "🍍",
+            "<=",
+            ".".join(self.action_path),
+            textwrap.shorten(repr(result), 256),
+        )
+        return result
+    return wrapper

anemoi/datasets/create/utils.py CHANGED Viewed

@@ -7,15 +7,11 @@
 # nor does it submit to any jurisdiction.
 #
-import json
 import os
 from contextlib import contextmanager
 import numpy as np
-import yaml
-from climetlab import settings
-from climetlab.utils.humanize import seconds  # noqa: F401
-from tqdm.auto import tqdm
+from earthkit.data import settings
 def cache_context(dirname):
@@ -27,50 +23,22 @@ def cache_context(dirname):
         return no_cache_context()
     os.makedirs(dirname, exist_ok=True)
-    return settings.temporary("cache-directory", dirname)
-def bytes(n):
-    """>>> bytes(4096)
-    '4 KiB'
-    >>> bytes(4000)
-    '3.9 KiB'
-    """
-    if n < 0:
-        sign = "-"
-        n -= 0
-    else:
-        sign = ""
-    u = ["", " KiB", " MiB", " GiB", " TiB", " PiB", " EiB", " ZiB", " YiB"]
-    i = 0
-    while n >= 1024:
-        n /= 1024.0
-        i += 1
-    return "%s%g%s" % (sign, int(n * 10 + 0.5) / 10.0, u[i])
+    # return settings.temporary("cache-directory", dirname)
+    return settings.temporary({"cache-policy": "user", "user-cache-directory": dirname})
 def to_datetime_list(*args, **kwargs):
-    from climetlab.utils.dates import to_datetime_list as to_datetime_list_
+    from earthkit.data.utils.dates import to_datetime_list as to_datetime_list_
     return to_datetime_list_(*args, **kwargs)
 def to_datetime(*args, **kwargs):
-    from climetlab.utils.dates import to_datetime as to_datetime_
+    from earthkit.data.utils.dates import to_datetime as to_datetime_
     return to_datetime_(*args, **kwargs)
-def load_json_or_yaml(path):
-    with open(path, "r") as f:
-        if path.endswith(".json"):
-            return json.load(f)
-        if path.endswith(".yaml") or path.endswith(".yml"):
-            return yaml.safe_load(f)
-        raise ValueError(f"Cannot read file {path}. Need json or yaml with appropriate extension.")
 def make_list_int(value):
     if isinstance(value, str):
         if "/" not in value:
@@ -117,18 +85,3 @@ def normalize_and_check_dates(dates, start, end, frequency, dtype="datetime64[s]
         assert d1 == d2, (i, d1, d2)
     return dates_
-def progress_bar(*, iterable=None, total=None, initial=0, desc=None):
-    return tqdm(
-        iterable=iterable,
-        total=total,
-        initial=initial,
-        unit_scale=True,
-        unit_divisor=1024,
-        unit="B",
-        disable=False,
-        leave=False,
-        desc=desc,
-        # dynamic_ncols=True, # make this the default?
-    )

anemoi/datasets/create/zarr.py CHANGED Viewed

@@ -24,8 +24,12 @@ def add_zarr_dataset(
     shape=None,
     array=None,
     overwrite=True,
+    dimensions=None,
     **kwargs,
 ):
+    assert dimensions is not None, "Please pass dimensions to add_zarr_dataset."
+    assert isinstance(dimensions, (tuple, list))
     if dtype is None:
         assert array is not None, (name, shape, array, dtype, zarr_root)
         dtype = array.dtype
@@ -44,6 +48,7 @@ def add_zarr_dataset(
             **kwargs,
         )
         a[...] = array
+        a.attrs["_ARRAY_DIMENSIONS"] = dimensions
         return a
     if "fill_value" not in kwargs:
@@ -69,6 +74,7 @@ def add_zarr_dataset(
         overwrite=overwrite,
         **kwargs,
     )
+    a.attrs["_ARRAY_DIMENSIONS"] = dimensions
     return a
@@ -79,22 +85,27 @@ class ZarrBuiltRegistry:
     flags = None
     z = None
-    def __init__(self, path, synchronizer_path=None):
+    def __init__(self, path, synchronizer_path=None, use_threads=False):
         import zarr
         assert isinstance(path, str), path
         self.zarr_path = path
-        if synchronizer_path is None:
-            synchronizer_path = self.zarr_path + ".sync"
-        self.synchronizer_path = synchronizer_path
-        self.synchronizer = zarr.ProcessSynchronizer(self.synchronizer_path)
+        if use_threads:
+            self.synchronizer = zarr.ThreadSynchronizer()
+            self.synchronizer_path = None
+        else:
+            if synchronizer_path is None:
+                synchronizer_path = self.zarr_path + ".sync"
+            self.synchronizer_path = synchronizer_path
+            self.synchronizer = zarr.ProcessSynchronizer(self.synchronizer_path)
     def clean(self):
-        try:
-            shutil.rmtree(self.synchronizer_path)
-        except FileNotFoundError:
-            pass
+        if self.synchronizer_path is not None:
+            try:
+                shutil.rmtree(self.synchronizer_path)
+            except FileNotFoundError:
+                pass
     def _open_write(self):
         import zarr
@@ -112,7 +123,7 @@ class ZarrBuiltRegistry:
     def new_dataset(self, *args, **kwargs):
         z = self._open_write()
         zarr_root = z["_build"]
-        add_zarr_dataset(*args, zarr_root=zarr_root, overwrite=True, **kwargs)
+        add_zarr_dataset(*args, zarr_root=zarr_root, overwrite=True, dimensions=("tmp",), **kwargs)
     def add_to_history(self, action, **kwargs):
         new = dict(
@@ -143,6 +154,9 @@ class ZarrBuiltRegistry:
         z.attrs["latest_write_timestamp"] = datetime.datetime.utcnow().isoformat()
         z["_build"][self.name_flags][i] = value
+    def ready(self):
+        return all(self.get_flags())
     def create(self, lengths, overwrite=False):
         self.new_dataset(name=self.name_lengths, array=np.array(lengths, dtype="i4"))
         self.new_dataset(name=self.name_flags, array=np.array([False] * len(lengths), dtype=bool))

anemoi/datasets/data/dataset.py CHANGED Viewed

@@ -187,8 +187,8 @@ class Dataset:
                 specific=self.metadata_specific(),
                 frequency=self.frequency,
                 variables=self.variables,
-                start_date=self.dates[0],
-                end_date=self.dates[-1],
+                start_date=self.dates[0].astype(str),
+                end_date=self.dates[-1].astype(str),
             )
         )
@@ -200,8 +200,8 @@ class Dataset:
             variables=self.variables,
             shape=self.shape,
             frequency=self.frequency,
-            start_date=self.dates[0],
-            end_date=self.dates[-1],
+            start_date=self.dates[0].astype(str),
+            end_date=self.dates[-1].astype(str),
             **kwargs,
         )

anemoi/datasets/data/misc.py CHANGED Viewed

@@ -8,63 +8,35 @@
 import calendar
 import datetime
 import logging
-import os
 import re
 from pathlib import PurePath
 import numpy as np
 import zarr
+from anemoi.utils.config import load_config as load_settings
 from .dataset import Dataset
 LOG = logging.getLogger(__name__)
-CONFIG = None
-try:
-    import tomllib  # Only available since 3.11
-except ImportError:
-    import tomli as tomllib
+def load_config():
+    return load_settings(defaults={"datasets": {"named": {}, "path": []}})
 def add_named_dataset(name, path, **kwargs):
-    load_config()
-    if name in CONFIG["datasets"]["named"]:
+    config = load_config()
+    if name["datasets"]["named"]:
         raise ValueError(f"Dataset {name} already exists")
-    CONFIG["datasets"]["named"][name] = path
+    config["datasets"]["named"][name] = path
 def add_dataset_path(path):
-    load_config()
-    if path not in CONFIG["datasets"]["path"]:
-        CONFIG["datasets"]["path"].append(path)
-    # save_config()
-def load_config():
-    global CONFIG
-    if CONFIG is not None:
-        return CONFIG
-    conf = os.path.expanduser("~/.config/anemoi/settings.toml")
-    if not os.path.exists(conf):
-        conf = os.path.expanduser("~/.anemoi.toml")
-    if os.path.exists(conf):
-        with open(conf, "rb") as f:
-            CONFIG = tomllib.load(f)
-    else:
-        CONFIG = {}
-    CONFIG.setdefault("datasets", {})
-    CONFIG["datasets"].setdefault("path", [])
-    CONFIG["datasets"].setdefault("named", {})
+    config = load_config()
-    return CONFIG
+    if path not in config["datasets"]["path"]:
+        config["datasets"]["path"].append(path)
 def _frequency_to_hours(frequency):

anemoi-datasets 0.3.10__py3-none-any.whl → 0.4.2__py3-none-any.whl

anemoi-datasets 0.3.10py3-none-any.whl → 0.4.2py3-none-any.whl