PyPI - anemoi-datasets - Versions diffs - 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl - Mend

anemoi-datasets 0.4.3py3-none-any.whl → 0.4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

anemoi/datasets/_version.py +2 -2
anemoi/datasets/commands/cleanup.py +44 -0
anemoi/datasets/commands/create.py +50 -20
anemoi/datasets/commands/finalise-additions.py +45 -0
anemoi/datasets/commands/finalise.py +39 -0
anemoi/datasets/commands/init-additions.py +45 -0
anemoi/datasets/commands/init.py +67 -0
anemoi/datasets/commands/inspect.py +1 -1
anemoi/datasets/commands/load-additions.py +47 -0
anemoi/datasets/commands/load.py +47 -0
anemoi/datasets/commands/patch.py +39 -0
anemoi/datasets/compute/recentre.py +1 -1
anemoi/datasets/create/__init__.py +961 -146
anemoi/datasets/create/check.py +5 -3
anemoi/datasets/create/config.py +53 -2
anemoi/datasets/create/functions/sources/accumulations.py +6 -22
anemoi/datasets/create/functions/sources/hindcasts.py +27 -12
anemoi/datasets/create/functions/sources/tendencies.py +1 -1
anemoi/datasets/create/functions/sources/xarray/__init__.py +12 -2
anemoi/datasets/create/functions/sources/xarray/coordinates.py +7 -0
anemoi/datasets/create/functions/sources/xarray/field.py +1 -1
anemoi/datasets/create/functions/sources/xarray/fieldlist.py +0 -2
anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -1
anemoi/datasets/create/functions/sources/xarray/metadata.py +27 -29
anemoi/datasets/create/functions/sources/xarray/time.py +63 -30
anemoi/datasets/create/functions/sources/xarray/variable.py +15 -38
anemoi/datasets/create/input.py +62 -25
anemoi/datasets/create/statistics/__init__.py +39 -23
anemoi/datasets/create/utils.py +3 -2
anemoi/datasets/data/__init__.py +1 -0
anemoi/datasets/data/concat.py +46 -2
anemoi/datasets/data/dataset.py +109 -34
anemoi/datasets/data/forwards.py +17 -8
anemoi/datasets/data/grids.py +17 -3
anemoi/datasets/data/interpolate.py +133 -0
anemoi/datasets/data/misc.py +56 -66
anemoi/datasets/data/missing.py +240 -0
anemoi/datasets/data/select.py +7 -1
anemoi/datasets/data/stores.py +3 -3
anemoi/datasets/data/subset.py +47 -5
anemoi/datasets/data/unchecked.py +20 -22
anemoi/datasets/data/xy.py +125 -0
anemoi/datasets/dates/__init__.py +33 -20
anemoi/datasets/dates/groups.py +2 -2
anemoi/datasets/grids.py +66 -48
{anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/METADATA +5 -5
{anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/RECORD +51 -41
{anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/WHEEL +1 -1
anemoi/datasets/create/loaders.py +0 -924
{anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/LICENSE +0 -0
{anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/top_level.txt +0 -0

anemoi/datasets/create/functions/sources/xarray/variable.py CHANGED Viewed

@@ -14,34 +14,32 @@ from functools import cached_property
 import numpy as np
 from earthkit.data.utils.array import ensure_backend
-from anemoi.datasets.create.functions.sources.xarray.metadata import MDMapping
 from .field import XArrayField
 LOG = logging.getLogger(__name__)
 class Variable:
-    def __init__(self, *, ds, var, coordinates, grid, time, metadata, mapping=None, array_backend=None):
+    def __init__(
+        self,
+        *,
+        ds,
+        var,
+        coordinates,
+        grid,
+        time,
+        metadata,
+        array_backend=None,
+    ):
         self.ds = ds
         self.var = var
         self.grid = grid
         self.coordinates = coordinates
-        # print("Variable", var.name)
-        # for c in coordinates:
-        #     print(" ", c)
         self._metadata = metadata.copy()
-        # self._metadata.update(var.attrs)
         self._metadata.update({"variable": var.name})
-        # self._metadata.setdefault("level", None)
-        # self._metadata.setdefault("number", 0)
-        # self._metadata.setdefault("levtype", "sfc")
-        self._mapping = mapping
         self.time = time
         self.shape = tuple(len(c.variable) for c in coordinates if c.is_dim and not c.scalar and not c.is_grid)
@@ -51,23 +49,6 @@ class Variable:
         self.length = math.prod(self.shape)
         self.array_backend = ensure_backend(array_backend)
-    def update_metadata_mapping(self, kwargs):
-        result = {}
-        for k, v in kwargs.items():
-            if k == "param":
-                result[k] = "variable"
-                continue
-            for c in self.coordinates:
-                if k in c.mars_names:
-                    for v in c.mars_names:
-                        result[v] = c.variable.name
-                    break
-        self._mapping = MDMapping(result)
     @property
     def name(self):
         return self.var.name
@@ -111,17 +92,11 @@ class Variable:
         kwargs = {k: v for k, v in zip(self.names, coords)}
         return XArrayField(self, self.var.isel(kwargs))
-    @property
-    def mapping(self):
-        return self._mapping
     def sel(self, missing, **kwargs):
         if not kwargs:
             return self
-        kwargs = self._mapping.from_user(kwargs)
         k, v = kwargs.popitem()
         c = self.by_name.get(k)
@@ -147,13 +122,15 @@ class Variable:
             grid=self.grid,
             time=self.time,
             metadata=metadata,
-            mapping=self.mapping,
         )
         return variable.sel(missing, **kwargs)
     def match(self, **kwargs):
-        kwargs = self._mapping.from_user(kwargs)
+        if "param" in kwargs:
+            assert "variable" not in kwargs
+            kwargs["variable"] = kwargs.pop("param")
         if "variable" in kwargs:
             name = kwargs.pop("variable")

anemoi/datasets/create/input.py CHANGED Viewed

@@ -106,30 +106,32 @@ def _data_request(data):
     area = grid = None
     for field in data:
-        if not hasattr(field, "as_mars"):
-            continue
-        if date is None:
-            date = field.datetime()["valid_time"]
-        if field.datetime()["valid_time"] != date:
-            continue
+        try:
+            if date is None:
+                date = field.datetime()["valid_time"]
-        as_mars = field.metadata(namespace="mars")
-        step = as_mars.get("step")
-        levtype = as_mars.get("levtype", "sfc")
-        param = as_mars["param"]
-        levelist = as_mars.get("levelist", None)
-        area = field.mars_area
-        grid = field.mars_grid
+            if field.datetime()["valid_time"] != date:
+                continue
-        if levelist is None:
-            params_levels[levtype].add(param)
-        else:
-            params_levels[levtype].add((param, levelist))
+            as_mars = field.metadata(namespace="mars")
+            if not as_mars:
+                continue
+            step = as_mars.get("step")
+            levtype = as_mars.get("levtype", "sfc")
+            param = as_mars["param"]
+            levelist = as_mars.get("levelist", None)
+            area = field.mars_area
+            grid = field.mars_grid
+            if levelist is None:
+                params_levels[levtype].add(param)
+            else:
+                params_levels[levtype].add((param, levelist))
-        if step:
-            params_steps[levtype].add((param, step))
+            if step:
+                params_steps[levtype].add((param, step))
+        except Exception:
+            LOG.error(f"Error in retrieving metadata (cannot build data request info) for {field}", exc_info=True)
     def sort(old_dic):
         new_dic = {}
@@ -277,6 +279,9 @@ class Result:
         if len(args) == 1 and isinstance(args[0], (list, tuple)):
             args = args[0]
+        # print("Executing", self.action_path)
+        # print("Dates:", compress_dates(self.dates))
         names = []
         for a in args:
             if isinstance(a, str):
@@ -285,14 +290,13 @@ class Result:
                 names += list(a.keys())
         print(f"Building a {len(names)}D hypercube using", names)
         ds = ds.order_by(*args, remapping=remapping, patches=patches)
-        user_coords = ds.unique_values(*names, remapping=remapping, patches=patches)
+        user_coords = ds.unique_values(*names, remapping=remapping, patches=patches, progress_bar=False)
         print()
         print("Number of unique values found for each coordinate:")
         for k, v in user_coords.items():
-            print(f"  {k:20}:", len(v))
+            print(f"  {k:20}:", len(v), shorten_list(v, max_length=10))
         print()
         user_shape = tuple(len(v) for k, v in user_coords.items())
         print("Shape of the hypercube           :", user_shape)
@@ -305,13 +309,18 @@ class Result:
         remapping = build_remapping(remapping, patches)
         expected = set(itertools.product(*user_coords.values()))
+        extra = set()
         if math.prod(user_shape) > len(ds):
             print(f"This means that all the fields in the datasets do not exists for all combinations of {names}.")
             for f in ds:
                 metadata = remapping(f.metadata)
-                expected.remove(tuple(metadata(n) for n in names))
+                key = tuple(metadata(n, default=None) for n in names)
+                if key in expected:
+                    expected.remove(key)
+                else:
+                    extra.add(key)
             print("Missing fields:")
             print()
@@ -321,7 +330,35 @@ class Result:
                     print("...", len(expected) - i - 1, "more")
                     break
+            print("Extra fields:")
+            print()
+            for i, f in enumerate(sorted(extra)):
+                print(" ", f)
+                if i >= 9 and len(extra) > 10:
+                    print("...", len(extra) - i - 1, "more")
+                    break
             print()
+            print("Missing values:")
+            per_name = defaultdict(set)
+            for e in expected:
+                for n, v in zip(names, e):
+                    per_name[n].add(v)
+            for n, v in per_name.items():
+                print(" ", n, len(v), shorten_list(sorted(v), max_length=10))
+            print()
+            print("Extra values:")
+            per_name = defaultdict(set)
+            for e in extra:
+                for n, v in zip(names, e):
+                    per_name[n].add(v)
+            for n, v in per_name.items():
+                print(" ", n, len(v), shorten_list(sorted(v), max_length=10))
+            print()
             print("To solve this issue, you can:")
             print(
                 "  - Provide a better selection, like 'step: 0' or 'level: 1000' to "

anemoi/datasets/create/statistics/__init__.py CHANGED Viewed

@@ -79,6 +79,37 @@ def to_datetimes(dates):
     return [to_datetime(d) for d in dates]
+def fix_variance(x, name, count, sums, squares):
+    assert count.shape == sums.shape == squares.shape
+    assert isinstance(x, float)
+    mean = sums / count
+    assert mean.shape == count.shape
+    if x >= 0:
+        return x
+    LOG.warning(f"Negative variance for {name=}, variance={x}")
+    magnitude = np.sqrt((squares / count + mean * mean) / 2)
+    LOG.warning(f"square / count - mean * mean =  {squares/count} - {mean*mean} = {squares/count - mean*mean}")
+    LOG.warning(f"Variable span order of magnitude is {magnitude}.")
+    LOG.warning(f"Count is {count}.")
+    variances = squares / count - mean * mean
+    assert variances.shape == squares.shape == mean.shape
+    if all(variances >= 0):
+        LOG.warning(f"All individual variances for {name} are positive, setting variance to 0.")
+        return 0
+    # if abs(x) < magnitude * 1e-6 and abs(x) < range * 1e-6:
+    #     LOG.warning("Variance is negative but very small.")
+    #     variances = squares / count - mean * mean
+    #     return 0
+    LOG.warning(f"ERROR at least one individual variance is negative ({np.nanmin(variances)}).")
+    return x
 def check_variance(x, variables_names, minimum, maximum, mean, count, sums, squares):
     if (x >= 0).all():
         return
@@ -292,39 +323,24 @@ class StatAggregator:
     def aggregate(self):
         minimum = np.nanmin(self.minimum, axis=0)
         maximum = np.nanmax(self.maximum, axis=0)
         sums = np.nansum(self.sums, axis=0)
         squares = np.nansum(self.squares, axis=0)
         count = np.nansum(self.count, axis=0)
         has_nans = np.any(self.has_nans, axis=0)
-        mean = sums / count
+        assert sums.shape == count.shape == squares.shape == minimum.shape == maximum.shape
-        assert sums.shape == count.shape == squares.shape == mean.shape == minimum.shape == maximum.shape
+        mean = sums / count
+        assert mean.shape == minimum.shape
         x = squares / count - mean * mean
-        # def fix_variance(x, name, minimum, maximum, mean, count, sums, squares):
-        #     assert x.shape == minimum.shape == maximum.shape == mean.shape == count.shape == sums.shape == squares.shape
-        #     assert x.shape == (1,)
-        #     x, minimum, maximum, mean, count, sums, squares = x[0], minimum[0], maximum[0], mean[0], count[0], sums[0], squares[0]
-        #     if x >= 0:
-        #         return x
-        #
-        #     order = np.sqrt((squares / count + mean * mean)/2)
-        #     range = maximum - minimum
-        #     LOG.warning(f"Negative variance for {name=}, variance={x}")
-        #     LOG.warning(f"square / count - mean * mean =  {squares / count} - {mean * mean} = {squares / count - mean * mean}")
-        #     LOG.warning(f"Variable order of magnitude is {order}.")
-        #     LOG.warning(f"Range is {range} ({maximum=} - {minimum=}).")
-        #     LOG.warning(f"Count is {count}.")
-        #     if abs(x) < order * 1e-6 and abs(x) < range * 1e-6:
-        #         LOG.warning(f"Variance is negative but very small, setting to 0.")
-        #         return x*0
-        #     return x
+        assert x.shape == minimum.shape
         for i, name in enumerate(self.variables_names):
             # remove negative variance due to numerical errors
-            # Not needed for now, fix_variance is disabled
-            # x[i] = fix_variance(x[i:i+1], name, minimum[i:i+1], maximum[i:i+1], mean[i:i+1], count[i:i+1], sums[i:i+1], squares[i:i+1])
+            x[i] = fix_variance(x[i], name, self.count[i : i + 1], self.sums[i : i + 1], self.squares[i : i + 1])
+        for i, name in enumerate(self.variables_names):
             check_variance(
                 x[i : i + 1],
                 [name],

anemoi/datasets/create/utils.py CHANGED Viewed

@@ -7,6 +7,7 @@
 # nor does it submit to any jurisdiction.
 #
+import datetime
 import os
 from contextlib import contextmanager
@@ -61,10 +62,10 @@ def make_list_int(value):
 def normalize_and_check_dates(dates, start, end, frequency, dtype="datetime64[s]"):
-    assert isinstance(frequency, int), frequency
+    assert isinstance(frequency, datetime.timedelta), frequency
     start = np.datetime64(start)
     end = np.datetime64(end)
-    delta = np.timedelta64(frequency, "h")
+    delta = np.timedelta64(frequency)
     res = []
     while start <= end:

anemoi/datasets/data/__init__.py CHANGED Viewed

@@ -27,6 +27,7 @@ class MissingDateError(Exception):
 def open_dataset(*args, **kwargs):
     ds = _open_dataset(*args, **kwargs)
+    ds = ds.mutate()
     ds.arguments = {"args": args, "kwargs": kwargs}
     ds._check()
     return ds

anemoi/datasets/data/concat.py CHANGED Viewed

@@ -9,6 +9,7 @@ import logging
 from functools import cached_property
 import numpy as np
+from anemoi.utils.dates import frequency_to_timedelta
 from .debug import Node
 from .debug import debug_indexing
@@ -102,20 +103,63 @@ class Concat(ConcatMixin, Combined):
     def tree(self):
         return Node(self, [d.tree() for d in self.datasets])
+    @classmethod
+    def check_dataset_compatibility(cls, datasets, fill_missing_gaps=False):
+        # Study the dates
+        ranges = [(d.dates[0].astype(object), d.dates[-1].astype(object)) for d in datasets]
-def concat_factory(args, kwargs, zarr_root):
+        # Make sure the dates are disjoint
+        for i in range(len(ranges)):
+            r = ranges[i]
+            for j in range(i + 1, len(ranges)):
+                s = ranges[j]
+                if r[0] <= s[0] <= r[1] or r[0] <= s[1] <= r[1]:
+                    raise ValueError(f"Overlapping dates: {r} and {s} ({datasets[i]} {datasets[j]})")
+        # For now we should have the datasets in order with no gaps
+        frequency = frequency_to_timedelta(datasets[0].frequency)
+        result = []
+        for i in range(len(ranges) - 1):
+            result.append(datasets[i])
+            r = ranges[i]
+            s = ranges[i + 1]
+            if r[1] + frequency != s[0]:
+                if fill_missing_gaps:
+                    from .missing import MissingDataset
+                    result.append(MissingDataset(datasets[i], r[1] + frequency, s[0] - frequency))
+                else:
+                    r = [str(e) for e in r]
+                    s = [str(e) for e in s]
+                    raise ValueError(
+                        "Datasets must be sorted by dates, with no gaps: "
+                        f"{r} and {s} ({datasets[i]} {datasets[i+1]})"
+                    )
+        result.append(datasets[-1])
+        assert len(result) >= len(datasets), (len(result), len(datasets))
+        return result
+def concat_factory(args, kwargs):
     datasets = kwargs.pop("concat")
+    fill_missing_gaps = kwargs.pop("fill_missing_gaps", False)
     assert isinstance(datasets, (list, tuple))
     assert len(args) == 0
     assert isinstance(datasets, (list, tuple))
-    datasets = [_open(e, zarr_root) for e in datasets]
+    datasets = [_open(e) for e in datasets]
     if len(datasets) == 1:
         return datasets[0]._subset(**kwargs)
     datasets, kwargs = _auto_adjust(datasets, kwargs)
+    datasets = Concat.check_dataset_compatibility(datasets, fill_missing_gaps)
     return Concat(datasets)._subset(**kwargs)

anemoi/datasets/data/dataset.py CHANGED Viewed

@@ -5,24 +5,37 @@
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
+import datetime
+import json
 import logging
 import os
+import pprint
 import warnings
 from functools import cached_property
+from anemoi.utils.dates import frequency_to_seconds
+from anemoi.utils.dates import frequency_to_string
+from anemoi.utils.dates import frequency_to_timedelta
 LOG = logging.getLogger(__name__)
 class Dataset:
     arguments = {}
+    def mutate(self):
+        return self
+    def swap_with_parent(self, parent):
+        return parent
     @cached_property
     def _len(self):
         return len(self)
     def _subset(self, **kwargs):
         if not kwargs:
-            return self
+            return self.mutate()
         if "start" in kwargs or "end" in kwargs:
             start = kwargs.pop("start", None)
@@ -30,37 +43,52 @@ class Dataset:
             from .subset import Subset
-            return Subset(self, self._dates_to_indices(start, end), dict(start=start, end=end))._subset(**kwargs)
+            return (
+                Subset(self, self._dates_to_indices(start, end), dict(start=start, end=end))._subset(**kwargs).mutate()
+            )
         if "frequency" in kwargs:
             from .subset import Subset
+            if "interpolate_frequency" in kwargs:
+                raise ValueError("Cannot use both `frequency` and `interpolate_frequency`")
             frequency = kwargs.pop("frequency")
-            return Subset(self, self._frequency_to_indices(frequency), dict(frequency=frequency))._subset(**kwargs)
+            return (
+                Subset(self, self._frequency_to_indices(frequency), dict(frequency=frequency))
+                ._subset(**kwargs)
+                .mutate()
+            )
+        if "interpolate_frequency" in kwargs:
+            from .interpolate import InterpolateFrequency
+            interpolate_frequency = kwargs.pop("interpolate_frequency")
+            return InterpolateFrequency(self, interpolate_frequency)._subset(**kwargs).mutate()
         if "select" in kwargs:
             from .select import Select
             select = kwargs.pop("select")
-            return Select(self, self._select_to_columns(select), {"select": select})._subset(**kwargs)
+            return Select(self, self._select_to_columns(select), {"select": select})._subset(**kwargs).mutate()
         if "drop" in kwargs:
             from .select import Select
             drop = kwargs.pop("drop")
-            return Select(self, self._drop_to_columns(drop), {"drop": drop})._subset(**kwargs)
+            return Select(self, self._drop_to_columns(drop), {"drop": drop})._subset(**kwargs).mutate()
         if "reorder" in kwargs:
             from .select import Select
             reorder = kwargs.pop("reorder")
-            return Select(self, self._reorder_to_columns(reorder), {"reoder": reorder})._subset(**kwargs)
+            return Select(self, self._reorder_to_columns(reorder), {"reoder": reorder})._subset(**kwargs).mutate()
         if "rename" in kwargs:
             from .select import Rename
             rename = kwargs.pop("rename")
-            return Rename(self, rename)._subset(**kwargs)
+            return Rename(self, rename)._subset(**kwargs).mutate()
         if "statistics" in kwargs:
             from ..data import open_dataset
@@ -68,20 +96,38 @@ class Dataset:
             statistics = kwargs.pop("statistics")
-            return Statistics(self, open_dataset(statistics))._subset(**kwargs)
+            return Statistics(self, open_dataset(statistics))._subset(**kwargs).mutate()
         if "thinning" in kwargs:
             from .masked import Thinning
             thinning = kwargs.pop("thinning")
             method = kwargs.pop("method", "every-nth")
-            return Thinning(self, thinning, method)._subset(**kwargs)
+            return Thinning(self, thinning, method)._subset(**kwargs).mutate()
         if "area" in kwargs:
             from .masked import Cropping
             bbox = kwargs.pop("area")
-            return Cropping(self, bbox)._subset(**kwargs)
+            return Cropping(self, bbox)._subset(**kwargs).mutate()
+        if "missing_dates" in kwargs:
+            from .missing import MissingDates
+            missing_dates = kwargs.pop("missing_dates")
+            return MissingDates(self, missing_dates)._subset(**kwargs).mutate()
+        if "skip_missing_dates" in kwargs:
+            from .missing import SkipMissingDates
+            if "expected_access" not in kwargs:
+                raise ValueError("`expected_access` is required with `skip_missing_dates`")
+            skip_missing_dates = kwargs.pop("skip_missing_dates")
+            expected_access = kwargs.pop("expected_access")
+            if skip_missing_dates:
+                return SkipMissingDates(self, expected_access)._subset(**kwargs).mutate()
         # Keep last
         if "shuffle" in kwargs:
@@ -90,15 +136,14 @@ class Dataset:
             shuffle = kwargs.pop("shuffle")
             if shuffle:
-                return Subset(self, self._shuffle_indices(), dict(shuffle=True))._subset(**kwargs)
+                return Subset(self, self._shuffle_indices(), dict(shuffle=True))._subset(**kwargs).mutate()
         raise NotImplementedError("Unsupported arguments: " + ", ".join(kwargs))
     def _frequency_to_indices(self, frequency):
-        from .misc import _frequency_to_hours
-        requested_frequency = _frequency_to_hours(frequency)
-        dataset_frequency = _frequency_to_hours(self.frequency)
+        requested_frequency = frequency_to_seconds(frequency)
+        dataset_frequency = frequency_to_seconds(self.frequency)
         assert requested_frequency % dataset_frequency == 0
         # Question: where do we start? first date, or first date that is a multiple of the frequency?
         step = requested_frequency // dataset_frequency
@@ -171,37 +216,71 @@ class Dataset:
         import anemoi
         def tidy(v):
-            if isinstance(v, (list, tuple)):
+            if isinstance(v, (list, tuple, set)):
                 return [tidy(i) for i in v]
             if isinstance(v, dict):
                 return {k: tidy(v) for k, v in v.items()}
             if isinstance(v, str) and v.startswith("/"):
                 return os.path.basename(v)
+            if isinstance(v, datetime.datetime):
+                return v.isoformat()
+            if isinstance(v, datetime.date):
+                return v.isoformat()
+            if isinstance(v, datetime.timedelta):
+                return frequency_to_string(v)
+            if isinstance(v, Dataset):
+                # That can happen in the `arguments`
+                # if a dataset is passed as an argument
+                return repr(v)
+            if isinstance(v, slice):
+                return (v.start, v.stop, v.step)
             return v
-        return tidy(
-            dict(
-                version=anemoi.datasets.__version__,
-                shape=self.shape,
-                arguments=self.arguments,
-                specific=self.metadata_specific(),
-                frequency=self.frequency,
-                variables=self.variables,
-                start_date=self.dates[0].astype(str),
-                end_date=self.dates[-1].astype(str),
-            )
+        md = dict(
+            version=anemoi.datasets.__version__,
+            arguments=self.arguments,
+            **self.dataset_metadata(),
+        )
+        try:
+            return json.loads(json.dumps(tidy(md)))
+        except Exception:
+            LOG.exception("Failed to serialize metadata")
+            pprint.pprint(md)
+            raise
+    @property
+    def start_date(self):
+        return self.dates[0]
+    @property
+    def end_date(self):
+        return self.dates[-1]
+    def dataset_metadata(self):
+        return dict(
+            specific=self.metadata_specific(),
+            frequency=self.frequency,
+            variables=self.variables,
+            shape=self.shape,
+            start_date=self.start_date.astype(str),
+            end_date=self.end_date.astype(str),
         )
     def metadata_specific(self, **kwargs):
         action = self.__class__.__name__.lower()
-        assert isinstance(self.frequency, int), (self.frequency, self, action)
+        # assert isinstance(self.frequency, datetime.timedelta), (self.frequency, self, action)
         return dict(
             action=action,
             variables=self.variables,
             shape=self.shape,
-            frequency=self.frequency,
-            start_date=self.dates[0].astype(str),
-            end_date=self.dates[-1].astype(str),
+            frequency=frequency_to_string(frequency_to_timedelta(self.frequency)),
+            start_date=self.start_date.astype(str),
+            end_date=self.end_date.astype(str),
             **kwargs,
         )
@@ -220,10 +299,6 @@ class Dataset:
             if n.startswith("_") and not n.startswith("__"):
                 warnings.warn(f"Private method {n} is overriden in {ds.__class__.__name__}")
-        # for n in ('metadata_specific', 'tree', 'source'):
-        #     if n not in overriden:
-        #         warnings.warn(f"Method {n} is not overriden in {ds.__class__.__name__}")
     def _repr_html_(self):
         return self.tree().html()

anemoi-datasets 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl

anemoi-datasets 0.4.3py3-none-any.whl → 0.4.5py3-none-any.whl