anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +22 -25
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +3 -6
- anemoi/datasets/create/filter.py +1 -2
- anemoi/datasets/create/input/__init__.py +1 -2
- anemoi/datasets/create/input/action.py +3 -5
- anemoi/datasets/create/input/concat.py +5 -8
- anemoi/datasets/create/input/context.py +3 -6
- anemoi/datasets/create/input/data_sources.py +5 -8
- anemoi/datasets/create/input/empty.py +1 -2
- anemoi/datasets/create/input/filter.py +2 -3
- anemoi/datasets/create/input/function.py +1 -2
- anemoi/datasets/create/input/join.py +4 -5
- anemoi/datasets/create/input/misc.py +4 -6
- anemoi/datasets/create/input/repeated_dates.py +13 -18
- anemoi/datasets/create/input/result.py +29 -33
- anemoi/datasets/create/input/step.py +4 -8
- anemoi/datasets/create/input/template.py +3 -4
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +47 -52
- anemoi/datasets/create/sources/accumulations2.py +4 -8
- anemoi/datasets/create/sources/constants.py +1 -3
- anemoi/datasets/create/sources/empty.py +1 -2
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +1 -2
- anemoi/datasets/create/sources/grib.py +6 -10
- anemoi/datasets/create/sources/grib_index.py +13 -15
- anemoi/datasets/create/sources/hindcasts.py +2 -5
- anemoi/datasets/create/sources/legacy.py +1 -1
- anemoi/datasets/create/sources/mars.py +17 -21
- anemoi/datasets/create/sources/netcdf.py +1 -2
- anemoi/datasets/create/sources/opendap.py +1 -3
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/recentre.py +8 -11
- anemoi/datasets/create/sources/source.py +3 -6
- anemoi/datasets/create/sources/tendencies.py +2 -5
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +12 -13
- anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
- anemoi/datasets/create/sources/xarray_support/field.py +16 -12
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
- anemoi/datasets/create/sources/xarray_zarr.py +1 -2
- anemoi/datasets/create/sources/zenodo.py +3 -5
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +4 -0
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +1 -2
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +7 -12
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +42 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +22 -26
- anemoi/datasets/data/grids.py +12 -16
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +16 -21
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +13 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/grids.py +5 -9
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/METADATA +4 -4
- anemoi_datasets-0.5.27.dist-info/RECORD +134 -0
- anemoi/datasets/utils/__init__.py +0 -8
- anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/top_level.txt +0 -0
anemoi/datasets/data/misc.py
CHANGED
|
@@ -15,11 +15,6 @@ import os
|
|
|
15
15
|
from pathlib import PurePath
|
|
16
16
|
from typing import TYPE_CHECKING
|
|
17
17
|
from typing import Any
|
|
18
|
-
from typing import Dict
|
|
19
|
-
from typing import List
|
|
20
|
-
from typing import Optional
|
|
21
|
-
from typing import Tuple
|
|
22
|
-
from typing import Union
|
|
23
18
|
|
|
24
19
|
import numpy as np
|
|
25
20
|
import zarr
|
|
@@ -33,7 +28,7 @@ if TYPE_CHECKING:
|
|
|
33
28
|
LOG = logging.getLogger(__name__)
|
|
34
29
|
|
|
35
30
|
|
|
36
|
-
def load_config() ->
|
|
31
|
+
def load_config() -> dict[str, Any]:
|
|
37
32
|
"""Load the configuration settings.
|
|
38
33
|
|
|
39
34
|
Returns
|
|
@@ -110,10 +105,10 @@ def round_datetime(d: np.datetime64, dates: NDArray[np.datetime64], up: bool) ->
|
|
|
110
105
|
|
|
111
106
|
|
|
112
107
|
def _as_date(
|
|
113
|
-
d:
|
|
108
|
+
d: int | str | np.datetime64 | datetime.date,
|
|
114
109
|
dates: NDArray[np.datetime64],
|
|
115
110
|
last: bool,
|
|
116
|
-
frequency:
|
|
111
|
+
frequency: datetime.timedelta | None = None,
|
|
117
112
|
) -> np.datetime64:
|
|
118
113
|
"""Convert a date to a numpy datetime64 object, rounding to the nearest date in a list of dates.
|
|
119
114
|
|
|
@@ -221,8 +216,8 @@ def _as_date(
|
|
|
221
216
|
|
|
222
217
|
if "-" in d and ":" in d:
|
|
223
218
|
date, time = d.replace(" ", "T").split("T")
|
|
224
|
-
year, month, day =
|
|
225
|
-
hour, minute, second =
|
|
219
|
+
year, month, day = (int(_) for _ in date.split("-"))
|
|
220
|
+
hour, minute, second = (int(_) for _ in time.split(":"))
|
|
226
221
|
return _as_date(
|
|
227
222
|
np.datetime64(f"{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}"),
|
|
228
223
|
dates,
|
|
@@ -258,9 +253,9 @@ def _as_date(
|
|
|
258
253
|
|
|
259
254
|
|
|
260
255
|
def as_first_date(
|
|
261
|
-
d:
|
|
256
|
+
d: int | str | np.datetime64 | datetime.date,
|
|
262
257
|
dates: NDArray[np.datetime64],
|
|
263
|
-
frequency:
|
|
258
|
+
frequency: datetime.timedelta | None = None,
|
|
264
259
|
) -> np.datetime64:
|
|
265
260
|
"""Convert a date to the first date in a list of dates.
|
|
266
261
|
|
|
@@ -282,9 +277,9 @@ def as_first_date(
|
|
|
282
277
|
|
|
283
278
|
|
|
284
279
|
def as_last_date(
|
|
285
|
-
d:
|
|
280
|
+
d: int | str | np.datetime64 | datetime.date,
|
|
286
281
|
dates: NDArray[np.datetime64],
|
|
287
|
-
frequency:
|
|
282
|
+
frequency: datetime.timedelta | None = None,
|
|
288
283
|
) -> np.datetime64:
|
|
289
284
|
"""Convert a date to the last date in a list of dates.
|
|
290
285
|
|
|
@@ -305,7 +300,7 @@ def as_last_date(
|
|
|
305
300
|
return _as_date(d, dates, last=True, frequency=frequency)
|
|
306
301
|
|
|
307
302
|
|
|
308
|
-
def _concat_or_join(datasets:
|
|
303
|
+
def _concat_or_join(datasets: list["Dataset"], kwargs: dict[str, Any]) -> tuple["Dataset", dict[str, Any]]:
|
|
309
304
|
"""Concatenate or join datasets based on their date ranges.
|
|
310
305
|
|
|
311
306
|
Parameters
|
|
@@ -317,7 +312,7 @@ def _concat_or_join(datasets: List["Dataset"], kwargs: Dict[str, Any]) -> Tuple[
|
|
|
317
312
|
|
|
318
313
|
Returns
|
|
319
314
|
-------
|
|
320
|
-
|
|
315
|
+
tuple[Dataset, Dict[str, Any]]
|
|
321
316
|
The concatenated or joined dataset and remaining arguments.
|
|
322
317
|
"""
|
|
323
318
|
if "adjust" in kwargs:
|
|
@@ -339,12 +334,12 @@ def _concat_or_join(datasets: List["Dataset"], kwargs: Dict[str, Any]) -> Tuple[
|
|
|
339
334
|
return Concat(datasets), kwargs
|
|
340
335
|
|
|
341
336
|
|
|
342
|
-
def _open(a:
|
|
337
|
+
def _open(a: str | PurePath | dict[str, Any] | list[Any] | tuple[Any, ...]) -> "Dataset":
|
|
343
338
|
"""Open a dataset from various input types.
|
|
344
339
|
|
|
345
340
|
Parameters
|
|
346
341
|
----------
|
|
347
|
-
a : Union[str, PurePath, Dict[str, Any], List[Any],
|
|
342
|
+
a : Union[str, PurePath, Dict[str, Any], List[Any], tuple[Any, ...]]
|
|
348
343
|
The input to open.
|
|
349
344
|
|
|
350
345
|
Returns
|
|
@@ -390,10 +385,10 @@ def _open(a: Union[str, PurePath, Dict[str, Any], List[Any], Tuple[Any, ...]]) -
|
|
|
390
385
|
|
|
391
386
|
|
|
392
387
|
def _auto_adjust(
|
|
393
|
-
datasets:
|
|
394
|
-
kwargs:
|
|
395
|
-
exclude:
|
|
396
|
-
) ->
|
|
388
|
+
datasets: list["Dataset"],
|
|
389
|
+
kwargs: dict[str, Any],
|
|
390
|
+
exclude: list[str] | None = None,
|
|
391
|
+
) -> tuple[list["Dataset"], dict[str, Any]]:
|
|
397
392
|
"""Automatically adjust datasets based on specified criteria.
|
|
398
393
|
|
|
399
394
|
Parameters
|
|
@@ -407,7 +402,7 @@ def _auto_adjust(
|
|
|
407
402
|
|
|
408
403
|
Returns
|
|
409
404
|
-------
|
|
410
|
-
|
|
405
|
+
tuple[List[Dataset], Dict[str, Any]]
|
|
411
406
|
The adjusted datasets and remaining arguments.
|
|
412
407
|
"""
|
|
413
408
|
if "adjust" not in kwargs:
|
|
@@ -620,7 +615,7 @@ def append_to_zarr(new_data: np.ndarray, new_dates: np.ndarray, zarr_path: str)
|
|
|
620
615
|
# Re-open the zarr store to avoid root object accumulating memory.
|
|
621
616
|
root = zarr.open(zarr_path, mode="a")
|
|
622
617
|
# Convert new dates to strings (using str) regardless of input dtype.
|
|
623
|
-
new_dates = np.array(new_dates, dtype="datetime64[
|
|
618
|
+
new_dates = np.array(new_dates, dtype="datetime64[s]")
|
|
624
619
|
dates_ds = root["dates"]
|
|
625
620
|
old_len = dates_ds.shape[0]
|
|
626
621
|
dates_ds.resize((old_len + len(new_dates),))
|
|
@@ -633,19 +628,19 @@ def append_to_zarr(new_data: np.ndarray, new_dates: np.ndarray, zarr_path: str)
|
|
|
633
628
|
data_ds[old_shape[0] :] = new_data
|
|
634
629
|
|
|
635
630
|
|
|
636
|
-
def process_date(date: Any, big_dataset:
|
|
631
|
+
def process_date(date: Any, big_dataset: "Dataset") -> tuple[np.ndarray, np.ndarray]:
|
|
637
632
|
"""Open the subset corresponding to the given date and return (date, subset).
|
|
638
633
|
|
|
639
634
|
Parameters
|
|
640
635
|
----------
|
|
641
636
|
date : Any
|
|
642
637
|
The date to process.
|
|
643
|
-
big_dataset :
|
|
638
|
+
big_dataset : Dataset
|
|
644
639
|
The dataset to process.
|
|
645
640
|
|
|
646
641
|
Returns
|
|
647
642
|
-------
|
|
648
|
-
|
|
643
|
+
tuple[np.ndarray, np.ndarray]
|
|
649
644
|
The subset and the date.
|
|
650
645
|
"""
|
|
651
646
|
print("Processing:", date, flush=True)
|
|
@@ -655,26 +650,24 @@ def process_date(date: Any, big_dataset: Any) -> Tuple[np.ndarray, np.ndarray]:
|
|
|
655
650
|
return s, date
|
|
656
651
|
|
|
657
652
|
|
|
658
|
-
def initialize_zarr_store(root: Any, big_dataset:
|
|
653
|
+
def initialize_zarr_store(root: Any, big_dataset: "Dataset") -> None:
|
|
659
654
|
"""Initialize the Zarr store with the given dataset and recipe.
|
|
660
655
|
|
|
661
656
|
Parameters
|
|
662
657
|
----------
|
|
663
658
|
root : Any
|
|
664
|
-
The root
|
|
665
|
-
big_dataset :
|
|
659
|
+
The root Zarr store.
|
|
660
|
+
big_dataset : Dataset
|
|
666
661
|
The dataset to initialize the store with.
|
|
667
|
-
recipe : Dict[str, Any]
|
|
668
|
-
The recipe for initializing the store.
|
|
669
662
|
"""
|
|
670
|
-
ensembles = big_dataset.shape[
|
|
663
|
+
ensembles = big_dataset.shape[2]
|
|
671
664
|
# Create or append to "dates" dataset.
|
|
672
665
|
if "dates" not in root:
|
|
673
666
|
full_length = len(big_dataset.dates)
|
|
674
667
|
root.create_dataset("dates", data=np.array([], dtype="datetime64[s]"), chunks=(full_length,))
|
|
675
668
|
|
|
676
669
|
if "data" not in root:
|
|
677
|
-
dims = (1, len(big_dataset.variables), ensembles, big_dataset.
|
|
670
|
+
dims = (1, len(big_dataset.variables), ensembles, big_dataset.shape[-1])
|
|
678
671
|
root.create_dataset(
|
|
679
672
|
"data",
|
|
680
673
|
shape=dims,
|
|
@@ -694,25 +687,28 @@ def initialize_zarr_store(root: Any, big_dataset: Any, recipe: Dict[str, Any]) -
|
|
|
694
687
|
if "latitudes" not in root or "longitudes" not in root:
|
|
695
688
|
root.create_dataset("latitudes", data=big_dataset.latitudes, compressor=None)
|
|
696
689
|
root.create_dataset("longitudes", data=big_dataset.longitudes, compressor=None)
|
|
697
|
-
|
|
690
|
+
for k, v in big_dataset.metadata().items():
|
|
691
|
+
if k not in root.attrs:
|
|
692
|
+
root.attrs[k] = v
|
|
698
693
|
# Set store-wide attributes if not already set.
|
|
699
|
-
if "
|
|
700
|
-
root.attrs["
|
|
701
|
-
root.attrs["
|
|
694
|
+
if "first_date" not in root.attrs:
|
|
695
|
+
root.attrs["first_date"] = big_dataset.metadata()["start_date"]
|
|
696
|
+
root.attrs["last_date"] = big_dataset.metadata()["end_date"]
|
|
697
|
+
root.attrs["resolution"] = big_dataset.resolution
|
|
702
698
|
root.attrs["name_to_index"] = {k: i for i, k in enumerate(big_dataset.variables)}
|
|
703
|
-
root.attrs["ensemble_dimension"] =
|
|
699
|
+
root.attrs["ensemble_dimension"] = 2
|
|
704
700
|
root.attrs["field_shape"] = big_dataset.field_shape
|
|
705
701
|
root.attrs["flatten_grid"] = True
|
|
706
|
-
root.attrs["recipe"] =
|
|
702
|
+
root.attrs["recipe"] = {}
|
|
707
703
|
|
|
708
704
|
|
|
709
|
-
def _save_dataset(
|
|
705
|
+
def _save_dataset(dataset: "Dataset", zarr_path: str, n_workers: int = 1) -> None:
|
|
710
706
|
"""Incrementally create (or update) a Zarr store from an Anemoi dataset.
|
|
711
707
|
|
|
712
708
|
Parameters
|
|
713
709
|
----------
|
|
714
|
-
|
|
715
|
-
|
|
710
|
+
dataset : Dataset
|
|
711
|
+
anemoi-dataset opened from python to save to Zarr store
|
|
716
712
|
zarr_path : str
|
|
717
713
|
The path to the Zarr store.
|
|
718
714
|
n_workers : int, optional
|
|
@@ -728,13 +724,13 @@ def _save_dataset(recipe: Dict[str, Any], zarr_path: str, n_workers: int = 1) ->
|
|
|
728
724
|
"""
|
|
729
725
|
from concurrent.futures import ProcessPoolExecutor
|
|
730
726
|
|
|
731
|
-
full_ds =
|
|
727
|
+
full_ds = dataset
|
|
732
728
|
print("Opened full dataset.", flush=True)
|
|
733
729
|
|
|
734
730
|
# Use ProcessPoolExecutor for parallel data extraction.
|
|
735
731
|
# Workers return (date, subset) tuples.
|
|
736
732
|
root = zarr.open(zarr_path, mode="a")
|
|
737
|
-
initialize_zarr_store(root, full_ds
|
|
733
|
+
initialize_zarr_store(root, full_ds)
|
|
738
734
|
print("Zarr store initialised.", flush=True)
|
|
739
735
|
|
|
740
736
|
existing_dates = np.array(sorted(root["dates"]), dtype="datetime64[s]")
|
anemoi/datasets/data/missing.py
CHANGED
|
@@ -12,11 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Set
|
|
18
|
-
from typing import Tuple
|
|
19
|
-
from typing import Union
|
|
20
15
|
|
|
21
16
|
import numpy as np
|
|
22
17
|
from numpy.typing import NDArray
|
|
@@ -49,7 +44,7 @@ class MissingDates(Forwards):
|
|
|
49
44
|
List of missing dates.
|
|
50
45
|
"""
|
|
51
46
|
|
|
52
|
-
def __init__(self, dataset: Dataset, missing_dates:
|
|
47
|
+
def __init__(self, dataset: Dataset, missing_dates: list[int | str]) -> None:
|
|
53
48
|
"""Initializes the MissingDates class.
|
|
54
49
|
|
|
55
50
|
Parameters
|
|
@@ -80,13 +75,13 @@ class MissingDates(Forwards):
|
|
|
80
75
|
self.missing_dates.append(date)
|
|
81
76
|
|
|
82
77
|
n = self.forward._len
|
|
83
|
-
self._missing =
|
|
78
|
+
self._missing = {i for i in self._missing if 0 <= i < n}
|
|
84
79
|
self.missing_dates = sorted(to_datetime(x) for x in self.missing_dates)
|
|
85
80
|
|
|
86
81
|
assert len(self._missing), "No dates to force missing"
|
|
87
82
|
|
|
88
83
|
@cached_property
|
|
89
|
-
def missing(self) ->
|
|
84
|
+
def missing(self) -> set[int]:
|
|
90
85
|
"""Returns the set of missing indices."""
|
|
91
86
|
return self._missing.union(self.forward.missing)
|
|
92
87
|
|
|
@@ -148,7 +143,7 @@ class MissingDates(Forwards):
|
|
|
148
143
|
raise MissingDateError(f"Date {self.forward.dates[n]} is missing (index={n})")
|
|
149
144
|
|
|
150
145
|
@property
|
|
151
|
-
def reason(self) ->
|
|
146
|
+
def reason(self) -> dict[str, Any]:
|
|
152
147
|
"""Provides the reason for missing dates."""
|
|
153
148
|
return {"missing_dates": self.missing_dates}
|
|
154
149
|
|
|
@@ -162,7 +157,7 @@ class MissingDates(Forwards):
|
|
|
162
157
|
"""
|
|
163
158
|
return Node(self, [self.forward.tree()], **self.reason)
|
|
164
159
|
|
|
165
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
160
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
166
161
|
"""Provides metadata specific to the subclass.
|
|
167
162
|
|
|
168
163
|
Returns
|
|
@@ -184,7 +179,7 @@ class SkipMissingDates(Forwards):
|
|
|
184
179
|
The expected access pattern.
|
|
185
180
|
"""
|
|
186
181
|
|
|
187
|
-
def __init__(self, dataset: Dataset, expected_access:
|
|
182
|
+
def __init__(self, dataset: Dataset, expected_access: int | slice) -> None:
|
|
188
183
|
"""Initializes the SkipMissingDates class.
|
|
189
184
|
|
|
190
185
|
Parameters
|
|
@@ -285,7 +280,7 @@ class SkipMissingDates(Forwards):
|
|
|
285
280
|
return tuple(np.stack(_) for _ in result)
|
|
286
281
|
|
|
287
282
|
@debug_indexing
|
|
288
|
-
def _get_slice(self, s: slice) ->
|
|
283
|
+
def _get_slice(self, s: slice) -> tuple[NDArray[Any], ...]:
|
|
289
284
|
"""Retrieves a slice of items.
|
|
290
285
|
|
|
291
286
|
Parameters
|
|
@@ -303,7 +298,7 @@ class SkipMissingDates(Forwards):
|
|
|
303
298
|
return tuple(np.stack(_) for _ in result)
|
|
304
299
|
|
|
305
300
|
@debug_indexing
|
|
306
|
-
def __getitem__(self, n: FullIndex) ->
|
|
301
|
+
def __getitem__(self, n: FullIndex) -> tuple[NDArray[Any], ...]:
|
|
307
302
|
"""Retrieves the item at the given index.
|
|
308
303
|
|
|
309
304
|
Parameters
|
|
@@ -339,7 +334,7 @@ class SkipMissingDates(Forwards):
|
|
|
339
334
|
"""
|
|
340
335
|
return Node(self, [self.forward.tree()], expected_access=self.expected_access)
|
|
341
336
|
|
|
342
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
337
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
343
338
|
"""Provides metadata specific to the subclass.
|
|
344
339
|
|
|
345
340
|
Returns
|
|
@@ -404,7 +399,7 @@ class MissingDataset(Forwards):
|
|
|
404
399
|
return self._dates
|
|
405
400
|
|
|
406
401
|
@property
|
|
407
|
-
def missing(self) ->
|
|
402
|
+
def missing(self) -> set[int]:
|
|
408
403
|
"""Returns the set of missing indices."""
|
|
409
404
|
return self._missing
|
|
410
405
|
|
|
@@ -436,7 +431,7 @@ class MissingDataset(Forwards):
|
|
|
436
431
|
"""
|
|
437
432
|
return Node(self, [self.forward.tree()], start=self.start, end=self.end)
|
|
438
433
|
|
|
439
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
434
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
440
435
|
"""Provides metadata specific to the subclass.
|
|
441
436
|
|
|
442
437
|
Returns
|
|
@@ -10,8 +10,6 @@ import logging
|
|
|
10
10
|
import os
|
|
11
11
|
from functools import cached_property
|
|
12
12
|
from typing import Any
|
|
13
|
-
from typing import Dict
|
|
14
|
-
from typing import Tuple
|
|
15
13
|
|
|
16
14
|
import numpy as np
|
|
17
15
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
@@ -82,10 +80,8 @@ class ObservationsBase(Dataset):
|
|
|
82
80
|
# return [self.getitem(j) for j in i]
|
|
83
81
|
|
|
84
82
|
raise ValueError(
|
|
85
|
-
(
|
|
86
|
-
|
|
87
|
-
"observations datasets. Please use a second [] to select part of the data [i][a,b,c]"
|
|
88
|
-
)
|
|
83
|
+
f"Expected int, got {i} of type {type(i)}. Only int is supported to index "
|
|
84
|
+
"observations datasets. Please use a second [] to select part of the data [i][a,b,c]"
|
|
89
85
|
)
|
|
90
86
|
|
|
91
87
|
@property
|
|
@@ -195,13 +191,11 @@ class ObservationsZarr(ObservationsBase):
|
|
|
195
191
|
|
|
196
192
|
if len(self.forward) != len(self.dates):
|
|
197
193
|
raise ValueError(
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
f"{self.dates[0]}, {self.dates[1]}, ..., {self.dates[-2]}, {self.dates[-1]} "
|
|
204
|
-
)
|
|
194
|
+
f"Dates are not consistent with the number of items in the dataset. "
|
|
195
|
+
f"The dataset contains {len(self.forward)} time windows. "
|
|
196
|
+
f"This is not compatible with the "
|
|
197
|
+
f"{len(self.dates)} requested dates with frequency={frequency_hours}"
|
|
198
|
+
f"{self.dates[0]}, {self.dates[1]}, ..., {self.dates[-2]}, {self.dates[-1]} "
|
|
205
199
|
)
|
|
206
200
|
|
|
207
201
|
@property
|
|
@@ -307,7 +301,7 @@ class ObservationsZarr(ObservationsBase):
|
|
|
307
301
|
return f"Observations({os.path.basename(self.path)}, {self.dates[0]};{self.dates[-1]}, {len(self)})"
|
|
308
302
|
|
|
309
303
|
|
|
310
|
-
def observations_factory(args:
|
|
304
|
+
def observations_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> ObservationsBase:
|
|
311
305
|
observations = kwargs.pop("observations")
|
|
312
306
|
|
|
313
307
|
if not isinstance(observations, dict):
|
anemoi/datasets/data/padded.py
CHANGED
|
@@ -12,8 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import Set
|
|
17
15
|
|
|
18
16
|
import numpy as np
|
|
19
17
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
@@ -38,7 +36,7 @@ class Padded(Forwards):
|
|
|
38
36
|
_after: int = 0
|
|
39
37
|
_inside: int = 0
|
|
40
38
|
|
|
41
|
-
def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason:
|
|
39
|
+
def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason: dict[str, Any]) -> None:
|
|
42
40
|
"""Create a padded subset of a dataset.
|
|
43
41
|
|
|
44
42
|
Attributes:
|
|
@@ -195,7 +193,7 @@ class Padded(Forwards):
|
|
|
195
193
|
return (len(self.dates),) + self.dataset.shape[1:]
|
|
196
194
|
|
|
197
195
|
@cached_property
|
|
198
|
-
def missing(self) ->
|
|
196
|
+
def missing(self) -> set[int]:
|
|
199
197
|
raise NotImplementedError("Need to decide whether to include the added dates as missing or not")
|
|
200
198
|
# return self.forward.missing
|
|
201
199
|
|
|
@@ -207,7 +205,7 @@ class Padded(Forwards):
|
|
|
207
205
|
"""
|
|
208
206
|
return Node(self, [self.dataset.tree()], **self.reason)
|
|
209
207
|
|
|
210
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
208
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
211
209
|
"""Get the metadata specific to the forwards subclass.
|
|
212
210
|
|
|
213
211
|
Returns:
|
|
@@ -35,7 +35,7 @@ class Npz1Backend(Backend):
|
|
|
35
35
|
return dict(np.load(f))
|
|
36
36
|
|
|
37
37
|
def read_metadata(self):
|
|
38
|
-
with open(os.path.join(self.path, "metadata.json")
|
|
38
|
+
with open(os.path.join(self.path, "metadata.json")) as f:
|
|
39
39
|
return json.load(f)
|
|
40
40
|
|
|
41
41
|
def read_statistics(self):
|
|
@@ -56,7 +56,7 @@ class Npz2Backend(Backend):
|
|
|
56
56
|
return dict(np.load(f))
|
|
57
57
|
|
|
58
58
|
def read_metadata(self):
|
|
59
|
-
with open(os.path.join(self.path, "metadata.json")
|
|
59
|
+
with open(os.path.join(self.path, "metadata.json")) as f:
|
|
60
60
|
return json.load(f)
|
|
61
61
|
|
|
62
62
|
def read_statistics(self):
|
anemoi/datasets/data/rescale.py
CHANGED
|
@@ -12,11 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Optional
|
|
18
|
-
from typing import Tuple
|
|
19
|
-
from typing import Union
|
|
20
15
|
|
|
21
16
|
import numpy as np
|
|
22
17
|
from numpy.typing import NDArray
|
|
@@ -35,9 +30,7 @@ from .indexing import update_tuple
|
|
|
35
30
|
LOG = logging.getLogger(__name__)
|
|
36
31
|
|
|
37
32
|
|
|
38
|
-
def make_rescale(
|
|
39
|
-
variable: str, rescale: Union[Tuple[float, float], List[str], Dict[str, float]]
|
|
40
|
-
) -> Tuple[float, float]:
|
|
33
|
+
def make_rescale(variable: str, rescale: tuple[float, float] | list[str] | dict[str, float]) -> tuple[float, float]:
|
|
41
34
|
"""Create rescale parameters (scale and offset) based on the input rescale specification.
|
|
42
35
|
|
|
43
36
|
Parameters
|
|
@@ -86,7 +79,7 @@ class Rescale(Forwards):
|
|
|
86
79
|
"""A class to apply rescaling to dataset variables."""
|
|
87
80
|
|
|
88
81
|
def __init__(
|
|
89
|
-
self, dataset: Dataset, rescale:
|
|
82
|
+
self, dataset: Dataset, rescale: dict[str, tuple[float, float] | list[str] | dict[str, float]]
|
|
90
83
|
) -> None:
|
|
91
84
|
"""Initialize the Rescale object.
|
|
92
85
|
|
|
@@ -129,7 +122,7 @@ class Rescale(Forwards):
|
|
|
129
122
|
"""
|
|
130
123
|
return Node(self, [self.forward.tree()], rescale=self.rescale)
|
|
131
124
|
|
|
132
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
125
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
133
126
|
"""Get the metadata specific to the rescale subclass.
|
|
134
127
|
|
|
135
128
|
Returns
|
|
@@ -204,7 +197,7 @@ class Rescale(Forwards):
|
|
|
204
197
|
return data * self._a[0] + self._b[0]
|
|
205
198
|
|
|
206
199
|
@cached_property
|
|
207
|
-
def statistics(self) ->
|
|
200
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
208
201
|
"""Get the statistics of the rescaled data."""
|
|
209
202
|
result = {}
|
|
210
203
|
a = self._a.squeeze()
|
|
@@ -224,7 +217,7 @@ class Rescale(Forwards):
|
|
|
224
217
|
|
|
225
218
|
return result
|
|
226
219
|
|
|
227
|
-
def statistics_tendencies(self, delta:
|
|
220
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
228
221
|
"""Get the tendencies of the statistics of the rescaled data.
|
|
229
222
|
|
|
230
223
|
Parameters
|
anemoi/datasets/data/select.py
CHANGED
|
@@ -12,9 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Optional
|
|
18
15
|
|
|
19
16
|
from numpy.typing import NDArray
|
|
20
17
|
|
|
@@ -37,7 +34,7 @@ LOG = logging.getLogger(__name__)
|
|
|
37
34
|
class Select(Forwards):
|
|
38
35
|
"""Class to select a subset of variables from a dataset."""
|
|
39
36
|
|
|
40
|
-
def __init__(self, dataset: Dataset, indices:
|
|
37
|
+
def __init__(self, dataset: Dataset, indices: list[int], reason: dict[str, Any]) -> None:
|
|
41
38
|
"""Initialize the Select class.
|
|
42
39
|
|
|
43
40
|
Parameters
|
|
@@ -140,26 +137,26 @@ class Select(Forwards):
|
|
|
140
137
|
return (len(self), len(self.indices)) + self.dataset.shape[2:]
|
|
141
138
|
|
|
142
139
|
@cached_property
|
|
143
|
-
def variables(self) ->
|
|
140
|
+
def variables(self) -> list[str]:
|
|
144
141
|
"""Get the variables of the dataset."""
|
|
145
142
|
return [self.dataset.variables[i] for i in self.indices]
|
|
146
143
|
|
|
147
144
|
@cached_property
|
|
148
|
-
def variables_metadata(self) ->
|
|
145
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
149
146
|
"""Get the metadata of the variables."""
|
|
150
147
|
return {k: v for k, v in self.dataset.variables_metadata.items() if k in self.variables}
|
|
151
148
|
|
|
152
149
|
@cached_property
|
|
153
|
-
def name_to_index(self) ->
|
|
150
|
+
def name_to_index(self) -> dict[str, int]:
|
|
154
151
|
"""Get the mapping of variable names to indices."""
|
|
155
152
|
return {k: i for i, k in enumerate(self.variables)}
|
|
156
153
|
|
|
157
154
|
@cached_property
|
|
158
|
-
def statistics(self) ->
|
|
155
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
159
156
|
"""Get the statistics of the dataset."""
|
|
160
157
|
return {k: v[self.indices] for k, v in self.dataset.statistics.items()}
|
|
161
158
|
|
|
162
|
-
def statistics_tendencies(self, delta:
|
|
159
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
163
160
|
"""Get the statistical tendencies of the dataset.
|
|
164
161
|
|
|
165
162
|
Parameters
|
|
@@ -176,7 +173,7 @@ class Select(Forwards):
|
|
|
176
173
|
delta = self.frequency
|
|
177
174
|
return {k: v[self.indices] for k, v in self.dataset.statistics_tendencies(delta).items()}
|
|
178
175
|
|
|
179
|
-
def metadata_specific(self, **kwargs: Any) ->
|
|
176
|
+
def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
|
|
180
177
|
"""Get the specific metadata of the dataset.
|
|
181
178
|
|
|
182
179
|
Parameters
|
|
@@ -216,7 +213,7 @@ class Select(Forwards):
|
|
|
216
213
|
"""
|
|
217
214
|
return Node(self, [self.dataset.tree()], **self.reason)
|
|
218
215
|
|
|
219
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
216
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
220
217
|
"""Get the metadata specific to the subclass.
|
|
221
218
|
|
|
222
219
|
Returns
|
|
@@ -231,7 +228,7 @@ class Select(Forwards):
|
|
|
231
228
|
class Rename(Forwards):
|
|
232
229
|
"""Class to rename variables in a dataset."""
|
|
233
230
|
|
|
234
|
-
def __init__(self, dataset: Dataset, rename:
|
|
231
|
+
def __init__(self, dataset: Dataset, rename: dict[str, str]) -> None:
|
|
235
232
|
"""Initialize the Rename class.
|
|
236
233
|
|
|
237
234
|
Parameters
|
|
@@ -251,17 +248,17 @@ class Rename(Forwards):
|
|
|
251
248
|
self.rename = rename
|
|
252
249
|
|
|
253
250
|
@property
|
|
254
|
-
def variables(self) ->
|
|
251
|
+
def variables(self) -> list[str]:
|
|
255
252
|
"""Get the renamed variables."""
|
|
256
253
|
return self._variables
|
|
257
254
|
|
|
258
255
|
@property
|
|
259
|
-
def variables_metadata(self) ->
|
|
256
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
260
257
|
"""Get the renamed variables metadata."""
|
|
261
258
|
return self._variables_metadata
|
|
262
259
|
|
|
263
260
|
@cached_property
|
|
264
|
-
def name_to_index(self) ->
|
|
261
|
+
def name_to_index(self) -> dict[str, int]:
|
|
265
262
|
"""Get the mapping of renamed variable names to indices."""
|
|
266
263
|
return {k: i for i, k in enumerate(self.variables)}
|
|
267
264
|
|
|
@@ -273,7 +270,7 @@ class Rename(Forwards):
|
|
|
273
270
|
"""
|
|
274
271
|
return Node(self, [self.forward.tree()], rename=self.rename)
|
|
275
272
|
|
|
276
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
273
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
277
274
|
"""Get the metadata specific to the subclass.
|
|
278
275
|
|
|
279
276
|
Returns:
|
|
@@ -12,9 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import Optional
|
|
17
|
-
from typing import Set
|
|
18
15
|
|
|
19
16
|
from numpy.typing import NDArray
|
|
20
17
|
|
|
@@ -56,11 +53,11 @@ class Statistics(Forwards):
|
|
|
56
53
|
)
|
|
57
54
|
|
|
58
55
|
@cached_property
|
|
59
|
-
def statistics(self) ->
|
|
56
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
60
57
|
"""Get the statistics."""
|
|
61
58
|
return self._statistic.statistics
|
|
62
59
|
|
|
63
|
-
def statistics_tendencies(self, delta:
|
|
60
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
64
61
|
"""Get the statistics tendencies.
|
|
65
62
|
|
|
66
63
|
Parameters
|
|
@@ -77,7 +74,7 @@ class Statistics(Forwards):
|
|
|
77
74
|
delta = self.frequency
|
|
78
75
|
return self._statistic.statistics_tendencies(delta)
|
|
79
76
|
|
|
80
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
77
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
81
78
|
"""Get the metadata specific to the forwards subclass.
|
|
82
79
|
|
|
83
80
|
Returns
|
|
@@ -97,7 +94,7 @@ class Statistics(Forwards):
|
|
|
97
94
|
"""
|
|
98
95
|
return Node(self, [self.forward.tree()])
|
|
99
96
|
|
|
100
|
-
def get_dataset_names(self, names:
|
|
97
|
+
def get_dataset_names(self, names: set[str]) -> None:
|
|
101
98
|
"""Get the dataset names.
|
|
102
99
|
|
|
103
100
|
Parameters
|