PyPI - anemoi-datasets - Versions diffs - 0.5.7__py3-none-any.whl → 0.5.11__py3-none-any.whl - Mend

anemoi-datasets 0.5.7py3-none-any.whl → 0.5.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

anemoi/datasets/__init__.py +11 -3
anemoi/datasets/__main__.py +2 -3
anemoi/datasets/_version.py +2 -2
anemoi/datasets/commands/__init__.py +2 -3
anemoi/datasets/commands/cleanup.py +9 -0
anemoi/datasets/commands/compare.py +3 -3
anemoi/datasets/commands/copy.py +38 -68
anemoi/datasets/commands/create.py +20 -5
anemoi/datasets/commands/finalise-additions.py +9 -0
anemoi/datasets/commands/finalise.py +9 -0
anemoi/datasets/commands/init-additions.py +9 -0
anemoi/datasets/commands/init.py +9 -0
anemoi/datasets/commands/inspect.py +3 -1
anemoi/datasets/commands/load-additions.py +9 -0
anemoi/datasets/commands/load.py +9 -0
anemoi/datasets/commands/patch.py +9 -0
anemoi/datasets/commands/publish.py +9 -0
anemoi/datasets/commands/scan.py +9 -0
anemoi/datasets/compute/__init__.py +8 -0
anemoi/datasets/compute/recentre.py +3 -2
anemoi/datasets/create/__init__.py +62 -12
anemoi/datasets/create/check.py +4 -3
anemoi/datasets/create/chunks.py +3 -2
anemoi/datasets/create/config.py +5 -5
anemoi/datasets/create/functions/__init__.py +22 -7
anemoi/datasets/create/functions/filters/__init__.py +2 -1
anemoi/datasets/create/functions/filters/empty.py +3 -2
anemoi/datasets/create/functions/filters/noop.py +2 -2
anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +3 -2
anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +3 -2
anemoi/datasets/create/functions/filters/rename.py +16 -11
anemoi/datasets/create/functions/filters/rotate_winds.py +3 -2
anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +3 -2
anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +3 -2
anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +2 -2
anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +2 -2
anemoi/datasets/create/functions/filters/speeddir_to_uv.py +3 -2
anemoi/datasets/create/functions/filters/unrotate_winds.py +3 -2
anemoi/datasets/create/functions/filters/uv_to_speeddir.py +3 -2
anemoi/datasets/create/functions/sources/__init__.py +2 -2
anemoi/datasets/create/functions/sources/accumulations.py +10 -4
anemoi/datasets/create/functions/sources/constants.py +3 -2
anemoi/datasets/create/functions/sources/empty.py +3 -2
anemoi/datasets/create/functions/sources/forcings.py +3 -2
anemoi/datasets/create/functions/sources/grib.py +8 -2
anemoi/datasets/create/functions/sources/hindcasts.py +3 -2
anemoi/datasets/create/functions/sources/mars.py +97 -17
anemoi/datasets/create/functions/sources/netcdf.py +3 -2
anemoi/datasets/create/functions/sources/opendap.py +2 -2
anemoi/datasets/create/functions/sources/recentre.py +3 -2
anemoi/datasets/create/functions/sources/source.py +3 -2
anemoi/datasets/create/functions/sources/tendencies.py +3 -2
anemoi/datasets/create/functions/sources/xarray/__init__.py +8 -3
anemoi/datasets/create/functions/sources/xarray/coordinates.py +3 -2
anemoi/datasets/create/functions/sources/xarray/field.py +6 -5
anemoi/datasets/create/functions/sources/xarray/fieldlist.py +12 -4
anemoi/datasets/create/functions/sources/xarray/flavour.py +2 -2
anemoi/datasets/create/functions/sources/xarray/grid.py +2 -2
anemoi/datasets/create/functions/sources/xarray/metadata.py +3 -2
anemoi/datasets/create/functions/sources/xarray/time.py +2 -2
anemoi/datasets/create/functions/sources/xarray/variable.py +6 -9
anemoi/datasets/create/functions/sources/xarray_kerchunk.py +2 -2
anemoi/datasets/create/functions/sources/xarray_zarr.py +2 -2
anemoi/datasets/create/functions/sources/zenodo.py +2 -2
anemoi/datasets/create/input/__init__.py +3 -17
anemoi/datasets/create/input/action.py +3 -8
anemoi/datasets/create/input/concat.py +3 -2
anemoi/datasets/create/input/context.py +3 -8
anemoi/datasets/create/input/data_sources.py +3 -9
anemoi/datasets/create/input/empty.py +3 -9
anemoi/datasets/create/input/filter.py +3 -9
anemoi/datasets/create/input/function.py +3 -9
anemoi/datasets/create/input/join.py +3 -2
anemoi/datasets/create/input/misc.py +3 -8
anemoi/datasets/create/input/pipe.py +9 -3
anemoi/datasets/create/input/repeated_dates.py +14 -8
anemoi/datasets/create/input/result.py +154 -12
anemoi/datasets/create/input/step.py +4 -9
anemoi/datasets/create/input/template.py +3 -2
anemoi/datasets/create/input/trace.py +3 -2
anemoi/datasets/create/patch.py +9 -1
anemoi/datasets/create/persistent.py +3 -2
anemoi/datasets/create/size.py +3 -2
anemoi/datasets/create/statistics/__init__.py +3 -2
anemoi/datasets/create/statistics/summary.py +3 -2
anemoi/datasets/create/utils.py +15 -2
anemoi/datasets/create/writer.py +3 -2
anemoi/datasets/create/zarr.py +3 -2
anemoi/datasets/data/__init__.py +27 -1
anemoi/datasets/data/concat.py +5 -1
anemoi/datasets/data/dataset.py +216 -37
anemoi/datasets/data/debug.py +4 -1
anemoi/datasets/data/ensemble.py +4 -1
anemoi/datasets/data/fill_missing.py +165 -0
anemoi/datasets/data/forwards.py +23 -1
anemoi/datasets/data/grids.py +236 -58
anemoi/datasets/data/indexing.py +4 -1
anemoi/datasets/data/interpolate.py +4 -1
anemoi/datasets/data/join.py +12 -9
anemoi/datasets/data/masked.py +36 -10
anemoi/datasets/data/merge.py +180 -0
anemoi/datasets/data/misc.py +18 -3
anemoi/datasets/data/missing.py +4 -1
anemoi/datasets/data/rescale.py +4 -1
anemoi/datasets/data/select.py +4 -1
anemoi/datasets/data/statistics.py +4 -1
anemoi/datasets/data/stores.py +66 -3
anemoi/datasets/data/subset.py +6 -1
anemoi/datasets/data/unchecked.py +4 -1
anemoi/datasets/data/xy.py +20 -5
anemoi/datasets/dates/__init__.py +9 -7
anemoi/datasets/dates/groups.py +4 -2
anemoi/datasets/grids.py +86 -2
anemoi/datasets/testing.py +3 -2
anemoi/datasets/utils/__init__.py +8 -0
anemoi/datasets/utils/fields.py +2 -2
{anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/METADATA +11 -29
anemoi_datasets-0.5.11.dist-info/RECORD +123 -0
{anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/WHEEL +1 -1
anemoi/datasets/fields.py +0 -66
anemoi_datasets-0.5.7.dist-info/RECORD +0 -122
{anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/LICENSE +0 -0
{anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/top_level.txt +0 -0

anemoi/datasets/data/merge.py ADDED Viewed

@@ -0,0 +1,180 @@
+# (C) Copyright 2024 Anemoi contributors.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+import logging
+from functools import cached_property
+import numpy as np
+from . import MissingDateError
+from .debug import Node
+from .debug import debug_indexing
+from .forwards import Combined
+from .indexing import apply_index_to_slices_changes
+from .indexing import expand_list_indexing
+from .indexing import index_to_slices
+from .indexing import update_tuple
+from .misc import _auto_adjust
+from .misc import _open
+LOG = logging.getLogger(__name__)
+class Merge(Combined):
+    # d0 d2 d4 d6 ...
+    # d1 d3 d5 d7 ...
+    # gives
+    # d0 d1 d2 d3 ...
+    def __init__(self, datasets, allow_gaps_in_dates=False):
+        super().__init__(datasets)
+        self.allow_gaps_in_dates = allow_gaps_in_dates
+        dates = dict()  # date -> (dataset_index, date_index)
+        for i, d in enumerate(datasets):
+            for j, date in enumerate(d.dates):
+                date = date.astype(object)
+                if date in dates:
+                    d1 = datasets[dates[date][0]]  # Selected
+                    d2 = datasets[i]  # The new one
+                    if j in d2.missing:
+                        # LOG.warning(f"Duplicate date {date} found in datasets {d1} and {d2}, but {date} is missing in {d}, ignoring")
+                        continue
+                    k = dates[date][1]
+                    if k in d1.missing:
+                        # LOG.warning(f"Duplicate date {date} found in datasets {d1} and {d2}, but {date} is missing in {d}, ignoring")
+                        dates[date] = (i, j)  # Replace the missing date with the new one
+                        continue
+                    raise ValueError(f"Duplicate date {date} found in datasets {d1} and {d2}")
+                else:
+                    dates[date] = (i, j)
+        all_dates = sorted(dates)
+        start = all_dates[0]
+        end = all_dates[-1]
+        frequency = min(d2 - d1 for d1, d2 in zip(all_dates[:-1], all_dates[1:]))
+        date = start
+        indices = []
+        _dates = []
+        self._missing_index = len(datasets)
+        while date <= end:
+            if date not in dates:
+                if self.allow_gaps_in_dates:
+                    dates[date] = (self._missing_index, -1)
+                else:
+                    raise ValueError(
+                        f"merge: date {date} not covered by dataset. Start={start}, end={end}, frequency={frequency}"
+                    )
+            indices.append(dates[date])
+            _dates.append(date)
+            date += frequency
+        self._dates = np.array(_dates, dtype="datetime64[s]")
+        self._indices = np.array(indices)
+        self._frequency = frequency  # .astype(object)
+    def __len__(self):
+        return len(self._dates)
+    @property
+    def dates(self):
+        return self._dates
+    @property
+    def frequency(self):
+        return self._frequency
+    @cached_property
+    def missing(self):
+        # TODO: optimize
+        result = set()
+        for i, (dataset, row) in enumerate(self._indices):
+            if dataset == self._missing_index:
+                result.add(i)
+                continue
+            if row in self.datasets[dataset].missing:
+                result.add(i)
+        return result
+    def check_same_lengths(self, d1, d2):
+        # Turned off because we are concatenating along the first axis
+        pass
+    def check_same_dates(self, d1, d2):
+        # Turned off because we are concatenating along the dates axis
+        pass
+    def check_compatibility(self, d1, d2):
+        super().check_compatibility(d1, d2)
+        self.check_same_sub_shapes(d1, d2, drop_axis=0)
+    def tree(self):
+        return Node(self, [d.tree() for d in self.datasets], allow_gaps_in_dates=self.allow_gaps_in_dates)
+    @debug_indexing
+    def __getitem__(self, n):
+        if isinstance(n, tuple):
+            return self._get_tuple(n)
+        if isinstance(n, slice):
+            return self._get_slice(n)
+        dataset, row = self._indices[n]
+        if dataset == self._missing_index:
+            raise MissingDateError(f"Date {self.dates[n]} is missing (index={n})")
+        return self.datasets[dataset][int(row)]
+    @debug_indexing
+    @expand_list_indexing
+    def _get_tuple(self, index):
+        index, changes = index_to_slices(index, self.shape)
+        index, previous = update_tuple(index, 0, slice(None))
+        result = self._get_slice(previous)
+        return apply_index_to_slices_changes(result[index], changes)
+    def _get_slice(self, s):
+        return np.stack([self[i] for i in range(*s.indices(self._len))])
+def merge_factory(args, kwargs):
+    datasets = kwargs.pop("merge")
+    assert isinstance(datasets, (list, tuple))
+    assert len(args) == 0
+    datasets = [_open(e) for e in datasets]
+    if len(datasets) == 1:
+        return datasets[0]._subset(**kwargs)
+    datasets, kwargs = _auto_adjust(datasets, kwargs)
+    allow_gaps_in_dates = kwargs.pop("allow_gaps_in_dates", False)
+    return Merge(datasets, allow_gaps_in_dates=allow_gaps_in_dates)._subset(**kwargs)

anemoi/datasets/data/misc.py CHANGED Viewed

@@ -1,10 +1,13 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import calendar
 import datetime
 import logging
@@ -235,16 +238,21 @@ def _auto_adjust(datasets, kwargs):
             if set(d.variables) != variables:
                 subset_kwargs[i]["select"] = sorted(variables)
+    if "start" or "end" in adjust_set:
+        common = datasets[0].dates
+        for d in datasets[0:]:
+            common = np.intersect1d(common, d.dates)
     if "start" in adjust_set:
         assert "start" not in kwargs, "Cannot use 'start' in adjust and kwargs"
-        start = max(d.dates[0] for d in datasets).astype(object)
+        start = min(common).astype(object)
         for i, d in enumerate(datasets):
             if start != d.dates[0]:
                 subset_kwargs[i]["start"] = start
     if "end" in adjust_set:
         assert "end" not in kwargs, "Cannot use 'end' in adjust and kwargs"
-        end = min(d.dates[-1] for d in datasets).astype(object)
+        end = max(common).astype(object)
         for i, d in enumerate(datasets):
             if end != d.dates[-1]:
                 subset_kwargs[i]["end"] = end
@@ -262,6 +270,7 @@ def _auto_adjust(datasets, kwargs):
 def _open_dataset(*args, **kwargs):
     sets = []
     for a in args:
         sets.append(_open(a))
@@ -302,6 +311,12 @@ def _open_dataset(*args, **kwargs):
         assert not sets, sets
         return concat_factory(args, kwargs).mutate()
+    if "merge" in kwargs:
+        from .merge import merge_factory
+        assert not sets, sets
+        return merge_factory(args, kwargs).mutate()
     if "ensemble" in kwargs:
         from .ensemble import ensemble_factory

anemoi/datasets/data/missing.py CHANGED Viewed

@@ -1,10 +1,13 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import logging
 from functools import cached_property

anemoi/datasets/data/rescale.py CHANGED Viewed

@@ -1,10 +1,13 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import logging
 from functools import cached_property

anemoi/datasets/data/select.py CHANGED Viewed

@@ -1,10 +1,13 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import logging
 from functools import cached_property

anemoi/datasets/data/statistics.py CHANGED Viewed

@@ -1,10 +1,13 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import logging
 from functools import cached_property

anemoi/datasets/data/stores.py CHANGED Viewed

@@ -1,6 +1,8 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
@@ -69,7 +71,7 @@ class S3Store(ReadOnlyStore):
     """
     def __init__(self, url, region=None):
-        from anemoi.utils.s3 import s3_client
+        from anemoi.utils.remote.s3 import s3_client
         _, _, self.bucket, self.key = url.split("/", 3)
         self.s3 = s3_client(self.bucket, region=region)
@@ -83,6 +85,41 @@ class S3Store(ReadOnlyStore):
         return response["Body"].read()
+class PlanetaryComputerStore(ReadOnlyStore):
+    """We write our own Store to access catalogs on Planetary Computer,
+    as it requires some extra arguements to use xr.open_zarr.
+    """
+    def __init__(self, data_catalog_id):
+        self.data_catalog_id = data_catalog_id
+    def __getitem__(self):
+        import planetary_computer
+        import pystac_client
+        catalog = pystac_client.Client.open(
+            "https://planetarycomputer.microsoft.com/api/stac/v1/",
+            modifier=planetary_computer.sign_inplace,
+        )
+        collection = catalog.get_collection(self.data_catalog_id)
+        asset = collection.assets["zarr-abfs"]
+        if "xarray:storage_options" in asset.extra_fields:
+            store = {
+                "store": asset.href,
+                "storage_options": asset.extra_fields["xarray:storage_options"],
+                **asset.extra_fields["xarray:open_kwargs"],
+            }
+        else:
+            store = {
+                "filename_or_obj": asset.href,
+                **asset.extra_fields["xarray:open_kwargs"],
+            }
+        return store
 class DebugStore(ReadOnlyStore):
     """A store to debug the zarr loading."""
@@ -119,6 +156,9 @@ def name_to_zarr_store(path_or_url):
         if len(bits) == 5 and (bits[1], bits[3], bits[4]) == ("s3", "amazonaws", "com"):
             s3_url = f"s3://{bits[0]}{parsed.path}"
             store = S3Store(s3_url, region=bits[2])
+        elif store.startswith("https://planetarycomputer.microsoft.com/"):
+            data_catalog_id = store.rsplit("/", 1)[-1]
+            store = PlanetaryComputerStore(data_catalog_id).__getitem__()
         else:
             store = HTTPStore(store)
@@ -302,6 +342,13 @@ class Zarr(Dataset):
             )
         ]
+    @cached_property
+    def constant_fields(self):
+        result = self.z.attrs.get("constant_fields")
+        if result is None:
+            LOG.warning("No 'constant_fields' attribute in %r, computing them", self)
+        return self.computed_constant_fields()
     @property
     def variables_metadata(self):
         return self.z.attrs.get("variables_metadata", {})
@@ -317,6 +364,7 @@ class Zarr(Dataset):
             attrs=dict(self.z.attrs),
             chunks=self.chunks,
             dtype=str(self.dtype),
+            path=self.path,
         )
     def source(self, index):
@@ -335,6 +383,12 @@ class Zarr(Dataset):
         name, _ = os.path.splitext(os.path.basename(self.path))
         names.add(name)
+    def collect_supporting_arrays(self, collected, *path):
+        pass
+    def collect_input_sources(self, collected):
+        pass
 class ZarrWithMissingDates(Zarr):
     """A zarr dataset with missing dates."""
@@ -343,7 +397,7 @@ class ZarrWithMissingDates(Zarr):
         super().__init__(path)
         missing_dates = self.z.attrs.get("missing_dates", [])
-        missing_dates = set([np.datetime64(x) for x in missing_dates])
+        missing_dates = set([np.datetime64(x, "s") for x in missing_dates])
         self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
         self.missing = set(self.missing_to_dates)
@@ -396,6 +450,9 @@ class ZarrWithMissingDates(Zarr):
         return "zarr*"
+QUIET = set()
 def zarr_lookup(name, fail=True):
     if name.endswith(".zarr") or name.endswith(".zip"):
@@ -404,6 +461,9 @@ def zarr_lookup(name, fail=True):
     config = load_config()["datasets"]
     if name in config["named"]:
+        if name not in QUIET:
+            LOG.info("Opening `%s` as `%s`", name, config["named"][name])
+            QUIET.add(name)
         return config["named"][name]
     tried = []
@@ -417,6 +477,9 @@ def zarr_lookup(name, fail=True):
             if z is not None:
                 # Cache for next time
                 config["named"][name] = full
+                if name not in QUIET:
+                    LOG.info("Opening `%s` as `%s`", name, full)
+                    QUIET.add(name)
                 return full
         except zarr.errors.PathNotFoundError:
             pass

anemoi/datasets/data/subset.py CHANGED Viewed

@@ -1,10 +1,13 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import logging
 from functools import cached_property
@@ -132,6 +135,8 @@ class Subset(Forwards):
     @cached_property
     def frequency(self):
         dates = self.dates
+        if len(dates) < 2:
+            raise ValueError(f"Cannot determine frequency of a subset with less than two dates ({self.dates}).")
         return frequency_to_timedelta(dates[1].astype(object) - dates[0].astype(object))
     def source(self, index):

anemoi/datasets/data/unchecked.py CHANGED Viewed

@@ -1,10 +1,13 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import logging
 from functools import cached_property
 from functools import wraps

anemoi/datasets/data/xy.py CHANGED Viewed

@@ -1,10 +1,13 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import logging
 from functools import cached_property
@@ -18,15 +21,19 @@ LOG = logging.getLogger(__name__)
 class ZipBase(Combined):
+    def __init__(self, datasets, check_compatibility=True):
+        self._check_compatibility = check_compatibility
+        super().__init__(datasets)
     def swap_with_parent(self, parent):
         new_parents = [parent.clone(ds) for ds in self.datasets]
         return self.clone(new_parents)
     def clone(self, datasets):
-        return self.__class__(datasets)
+        return self.__class__(datasets, check_compatibility=self._check_compatibility)
     def tree(self):
-        return Node(self, [d.tree() for d in self.datasets])
+        return Node(self, [d.tree() for d in self.datasets], check_compatibility=self._check_compatibility)
     def __len__(self):
         return min(len(d) for d in self.datasets)
@@ -86,6 +93,10 @@ class ZipBase(Combined):
     def name_to_index(self):
         return tuple(d.name_to_index for d in self.datasets)
+    def check_compatibility(self, d1, d2):
+        if self._check_compatibility:
+            super().check_compatibility(d1, d2)
 class Zip(ZipBase):
     pass
@@ -110,7 +121,9 @@ def xy_factory(args, kwargs):
     assert len(datasets) == 2
-    return XY(datasets)._subset(**kwargs)
+    check_compatibility = kwargs.pop("check_compatibility", True)
+    return XY(datasets, check_compatibility=check_compatibility)._subset(**kwargs)
 def zip_factory(args, kwargs):
@@ -122,4 +135,6 @@ def zip_factory(args, kwargs):
     datasets = [_open(e) for e in zip]
     datasets, kwargs = _auto_adjust(datasets, kwargs)
-    return Zip(datasets)._subset(**kwargs)
+    check_compatibility = kwargs.pop("check_compatibility", True)
+    return Zip(datasets, check_compatibility=check_compatibility)._subset(**kwargs)

anemoi/datasets/dates/__init__.py CHANGED Viewed

@@ -1,6 +1,8 @@
-# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
@@ -8,6 +10,8 @@
 import datetime
 import warnings
+from functools import reduce
+from math import gcd
 # from anemoi.utils.dates import as_datetime
 from anemoi.utils.dates import DateTimes
@@ -193,18 +197,16 @@ class HindcastsDates(DatesProvider):
         dates = sorted(dates)
-        mindelta = None
+        deltas = set()
         for a, b in zip(dates, dates[1:]):
             delta = b - a
             assert isinstance(delta, datetime.timedelta), delta
-            if mindelta is None:
-                mindelta = delta
-            else:
-                mindelta = min(mindelta, delta)
+            deltas.add(delta)
+        mindelta_seconds = reduce(gcd, [int(delta.total_seconds()) for delta in deltas])
+        mindelta = datetime.timedelta(seconds=mindelta_seconds)
         self.frequency = mindelta
         assert mindelta.total_seconds() > 0, mindelta
         print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", dates[0], dates[-1], mindelta)
         # Use all values between start and end by frequency, and set the ones that are missing

anemoi/datasets/dates/groups.py CHANGED Viewed

@@ -1,6 +1,8 @@
-# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
@@ -26,7 +28,7 @@ class GroupOfDates:
         assert isinstance(provider, DatesProvider), type(provider)
         assert isinstance(dates, list)
-        self.dates = dates
+        self.dates = [as_datetime(_) for _ in dates]
         self.provider = provider
         self.partial_ok = partial_ok

anemoi-datasets 0.5.7__py3-none-any.whl → 0.5.11__py3-none-any.whl

anemoi-datasets 0.5.7py3-none-any.whl → 0.5.11py3-none-any.whl