anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/recipe/__init__.py +93 -0
- anemoi/datasets/commands/recipe/format.py +55 -0
- anemoi/datasets/commands/recipe/migrate.py +555 -0
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +64 -26
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +5 -6
- anemoi/datasets/create/input/__init__.py +44 -65
- anemoi/datasets/create/input/action.py +296 -238
- anemoi/datasets/create/input/context/__init__.py +71 -0
- anemoi/datasets/create/input/context/field.py +54 -0
- anemoi/datasets/create/input/data_sources.py +7 -9
- anemoi/datasets/create/input/misc.py +2 -75
- anemoi/datasets/create/input/repeated_dates.py +11 -130
- anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
- anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +120 -145
- anemoi/datasets/create/sources/accumulations2.py +20 -53
- anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
- anemoi/datasets/create/sources/constants.py +39 -40
- anemoi/datasets/create/sources/empty.py +22 -19
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +29 -29
- anemoi/datasets/create/sources/grib.py +94 -78
- anemoi/datasets/create/sources/grib_index.py +57 -55
- anemoi/datasets/create/sources/hindcasts.py +57 -59
- anemoi/datasets/create/sources/legacy.py +10 -62
- anemoi/datasets/create/sources/mars.py +121 -149
- anemoi/datasets/create/sources/netcdf.py +28 -25
- anemoi/datasets/create/sources/opendap.py +28 -26
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/recentre.py +46 -48
- anemoi/datasets/create/sources/repeated_dates.py +44 -0
- anemoi/datasets/create/sources/source.py +26 -51
- anemoi/datasets/create/sources/tendencies.py +68 -98
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
- anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
- anemoi/datasets/create/sources/xarray_support/field.py +20 -16
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
- anemoi/datasets/create/sources/xarray_zarr.py +28 -25
- anemoi/datasets/create/sources/zenodo.py +43 -41
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +4 -0
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +0 -43
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +7 -12
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +48 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +22 -26
- anemoi/datasets/data/grids.py +12 -168
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/rolling_average.py +141 -0
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +22 -29
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +15 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/dumper.py +76 -0
- anemoi/datasets/grids.py +4 -185
- anemoi/datasets/schemas/recipe.json +131 -0
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
- anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
- anemoi/datasets/create/filter.py +0 -48
- anemoi/datasets/create/input/concat.py +0 -164
- anemoi/datasets/create/input/context.py +0 -89
- anemoi/datasets/create/input/empty.py +0 -54
- anemoi/datasets/create/input/filter.py +0 -118
- anemoi/datasets/create/input/function.py +0 -233
- anemoi/datasets/create/input/join.py +0 -130
- anemoi/datasets/create/input/pipe.py +0 -66
- anemoi/datasets/create/input/step.py +0 -177
- anemoi/datasets/create/input/template.py +0 -162
- anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
|
@@ -10,8 +10,6 @@ import logging
|
|
|
10
10
|
import os
|
|
11
11
|
from functools import cached_property
|
|
12
12
|
from typing import Any
|
|
13
|
-
from typing import Dict
|
|
14
|
-
from typing import Tuple
|
|
15
13
|
|
|
16
14
|
import numpy as np
|
|
17
15
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
@@ -82,10 +80,8 @@ class ObservationsBase(Dataset):
|
|
|
82
80
|
# return [self.getitem(j) for j in i]
|
|
83
81
|
|
|
84
82
|
raise ValueError(
|
|
85
|
-
(
|
|
86
|
-
|
|
87
|
-
"observations datasets. Please use a second [] to select part of the data [i][a,b,c]"
|
|
88
|
-
)
|
|
83
|
+
f"Expected int, got {i} of type {type(i)}. Only int is supported to index "
|
|
84
|
+
"observations datasets. Please use a second [] to select part of the data [i][a,b,c]"
|
|
89
85
|
)
|
|
90
86
|
|
|
91
87
|
@property
|
|
@@ -195,13 +191,11 @@ class ObservationsZarr(ObservationsBase):
|
|
|
195
191
|
|
|
196
192
|
if len(self.forward) != len(self.dates):
|
|
197
193
|
raise ValueError(
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
f"{self.dates[0]}, {self.dates[1]}, ..., {self.dates[-2]}, {self.dates[-1]} "
|
|
204
|
-
)
|
|
194
|
+
f"Dates are not consistent with the number of items in the dataset. "
|
|
195
|
+
f"The dataset contains {len(self.forward)} time windows. "
|
|
196
|
+
f"This is not compatible with the "
|
|
197
|
+
f"{len(self.dates)} requested dates with frequency={frequency_hours}"
|
|
198
|
+
f"{self.dates[0]}, {self.dates[1]}, ..., {self.dates[-2]}, {self.dates[-1]} "
|
|
205
199
|
)
|
|
206
200
|
|
|
207
201
|
@property
|
|
@@ -307,7 +301,7 @@ class ObservationsZarr(ObservationsBase):
|
|
|
307
301
|
return f"Observations({os.path.basename(self.path)}, {self.dates[0]};{self.dates[-1]}, {len(self)})"
|
|
308
302
|
|
|
309
303
|
|
|
310
|
-
def observations_factory(args:
|
|
304
|
+
def observations_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> ObservationsBase:
|
|
311
305
|
observations = kwargs.pop("observations")
|
|
312
306
|
|
|
313
307
|
if not isinstance(observations, dict):
|
anemoi/datasets/data/padded.py
CHANGED
|
@@ -12,8 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import Set
|
|
17
15
|
|
|
18
16
|
import numpy as np
|
|
19
17
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
@@ -38,7 +36,7 @@ class Padded(Forwards):
|
|
|
38
36
|
_after: int = 0
|
|
39
37
|
_inside: int = 0
|
|
40
38
|
|
|
41
|
-
def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason:
|
|
39
|
+
def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason: dict[str, Any]) -> None:
|
|
42
40
|
"""Create a padded subset of a dataset.
|
|
43
41
|
|
|
44
42
|
Attributes:
|
|
@@ -195,7 +193,7 @@ class Padded(Forwards):
|
|
|
195
193
|
return (len(self.dates),) + self.dataset.shape[1:]
|
|
196
194
|
|
|
197
195
|
@cached_property
|
|
198
|
-
def missing(self) ->
|
|
196
|
+
def missing(self) -> set[int]:
|
|
199
197
|
raise NotImplementedError("Need to decide whether to include the added dates as missing or not")
|
|
200
198
|
# return self.forward.missing
|
|
201
199
|
|
|
@@ -207,7 +205,7 @@ class Padded(Forwards):
|
|
|
207
205
|
"""
|
|
208
206
|
return Node(self, [self.dataset.tree()], **self.reason)
|
|
209
207
|
|
|
210
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
208
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
211
209
|
"""Get the metadata specific to the forwards subclass.
|
|
212
210
|
|
|
213
211
|
Returns:
|
|
@@ -35,7 +35,7 @@ class Npz1Backend(Backend):
|
|
|
35
35
|
return dict(np.load(f))
|
|
36
36
|
|
|
37
37
|
def read_metadata(self):
|
|
38
|
-
with open(os.path.join(self.path, "metadata.json")
|
|
38
|
+
with open(os.path.join(self.path, "metadata.json")) as f:
|
|
39
39
|
return json.load(f)
|
|
40
40
|
|
|
41
41
|
def read_statistics(self):
|
|
@@ -56,7 +56,7 @@ class Npz2Backend(Backend):
|
|
|
56
56
|
return dict(np.load(f))
|
|
57
57
|
|
|
58
58
|
def read_metadata(self):
|
|
59
|
-
with open(os.path.join(self.path, "metadata.json")
|
|
59
|
+
with open(os.path.join(self.path, "metadata.json")) as f:
|
|
60
60
|
return json.load(f)
|
|
61
61
|
|
|
62
62
|
def read_statistics(self):
|
anemoi/datasets/data/rescale.py
CHANGED
|
@@ -12,11 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Optional
|
|
18
|
-
from typing import Tuple
|
|
19
|
-
from typing import Union
|
|
20
15
|
|
|
21
16
|
import numpy as np
|
|
22
17
|
from numpy.typing import NDArray
|
|
@@ -35,9 +30,7 @@ from .indexing import update_tuple
|
|
|
35
30
|
LOG = logging.getLogger(__name__)
|
|
36
31
|
|
|
37
32
|
|
|
38
|
-
def make_rescale(
|
|
39
|
-
variable: str, rescale: Union[Tuple[float, float], List[str], Dict[str, float]]
|
|
40
|
-
) -> Tuple[float, float]:
|
|
33
|
+
def make_rescale(variable: str, rescale: tuple[float, float] | list[str] | dict[str, float]) -> tuple[float, float]:
|
|
41
34
|
"""Create rescale parameters (scale and offset) based on the input rescale specification.
|
|
42
35
|
|
|
43
36
|
Parameters
|
|
@@ -86,7 +79,7 @@ class Rescale(Forwards):
|
|
|
86
79
|
"""A class to apply rescaling to dataset variables."""
|
|
87
80
|
|
|
88
81
|
def __init__(
|
|
89
|
-
self, dataset: Dataset, rescale:
|
|
82
|
+
self, dataset: Dataset, rescale: dict[str, tuple[float, float] | list[str] | dict[str, float]]
|
|
90
83
|
) -> None:
|
|
91
84
|
"""Initialize the Rescale object.
|
|
92
85
|
|
|
@@ -129,7 +122,7 @@ class Rescale(Forwards):
|
|
|
129
122
|
"""
|
|
130
123
|
return Node(self, [self.forward.tree()], rescale=self.rescale)
|
|
131
124
|
|
|
132
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
125
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
133
126
|
"""Get the metadata specific to the rescale subclass.
|
|
134
127
|
|
|
135
128
|
Returns
|
|
@@ -204,7 +197,7 @@ class Rescale(Forwards):
|
|
|
204
197
|
return data * self._a[0] + self._b[0]
|
|
205
198
|
|
|
206
199
|
@cached_property
|
|
207
|
-
def statistics(self) ->
|
|
200
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
208
201
|
"""Get the statistics of the rescaled data."""
|
|
209
202
|
result = {}
|
|
210
203
|
a = self._a.squeeze()
|
|
@@ -224,7 +217,7 @@ class Rescale(Forwards):
|
|
|
224
217
|
|
|
225
218
|
return result
|
|
226
219
|
|
|
227
|
-
def statistics_tendencies(self, delta:
|
|
220
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
228
221
|
"""Get the tendencies of the statistics of the rescaled data.
|
|
229
222
|
|
|
230
223
|
Parameters
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# (C) Copyright 2025 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from functools import cached_property
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
from numpy.typing import NDArray
|
|
17
|
+
|
|
18
|
+
from anemoi.datasets.data.indexing import expand_list_indexing
|
|
19
|
+
|
|
20
|
+
from .dataset import Dataset
|
|
21
|
+
from .dataset import FullIndex
|
|
22
|
+
from .debug import Node
|
|
23
|
+
from .debug import debug_indexing
|
|
24
|
+
from .forwards import Forwards
|
|
25
|
+
|
|
26
|
+
LOG = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class RollingAverage(Forwards):
|
|
30
|
+
"""A class to represent a dataset with interpolated frequency."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, dataset: Dataset, window: str | tuple[int, int, str]) -> None:
|
|
33
|
+
"""Initialize the RollingAverage class.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
dataset : Dataset
|
|
38
|
+
The dataset to be averaged with a rolling window.
|
|
39
|
+
window : (int, int, str)
|
|
40
|
+
The rolling average window (start, end, 'freq').
|
|
41
|
+
'freq' means the window is in number of time steps in the dataset.
|
|
42
|
+
Both start and end are inclusive, i.e. window = (-2, 2, 'freq') means a window of 5 time steps.
|
|
43
|
+
For now, only 'freq' is supported, in the future other units may be supported.
|
|
44
|
+
Windows such as "[-2h, +2h]" are not supported yet.
|
|
45
|
+
"""
|
|
46
|
+
super().__init__(dataset)
|
|
47
|
+
if not (isinstance(window, (list, tuple)) and len(window) == 3):
|
|
48
|
+
raise ValueError(f"Window must be (int, int, str), got {window}")
|
|
49
|
+
if not isinstance(window[0], int) or not isinstance(window[1], int) or not isinstance(window[2], str):
|
|
50
|
+
raise ValueError(f"Window must be (int, int, str), got {window}")
|
|
51
|
+
if window[2] not in ["freq", "frequency"]:
|
|
52
|
+
raise NotImplementedError(f"Window must be (int, int, 'freq'), got {window}")
|
|
53
|
+
|
|
54
|
+
# window = (0, 0, 'freq') means no change
|
|
55
|
+
self.i_start = -window[0]
|
|
56
|
+
self.i_end = window[1] + 1
|
|
57
|
+
if self.i_start <= 0:
|
|
58
|
+
raise ValueError(f"Window start must be negative, got {window}")
|
|
59
|
+
if self.i_end <= 0:
|
|
60
|
+
raise ValueError(f"Window end must be positive, got {window}")
|
|
61
|
+
|
|
62
|
+
self.window_str = f"-{self.i_start}-to-{self.i_end}"
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def shape(self):
|
|
66
|
+
shape = list(self.forward.shape)
|
|
67
|
+
shape[0] = len(self)
|
|
68
|
+
return tuple(shape)
|
|
69
|
+
|
|
70
|
+
@debug_indexing
|
|
71
|
+
@expand_list_indexing
|
|
72
|
+
def __getitem__(self, n: FullIndex) -> NDArray[Any]:
|
|
73
|
+
def f(array):
|
|
74
|
+
return np.nanmean(array, axis=0)
|
|
75
|
+
|
|
76
|
+
if isinstance(n, slice):
|
|
77
|
+
n = (n,)
|
|
78
|
+
|
|
79
|
+
if isinstance(n, tuple):
|
|
80
|
+
first = n[0]
|
|
81
|
+
if len(n) > 1:
|
|
82
|
+
rest = n[1:]
|
|
83
|
+
else:
|
|
84
|
+
rest = ()
|
|
85
|
+
|
|
86
|
+
if isinstance(first, int):
|
|
87
|
+
slice_ = slice(first, first + self.i_start + self.i_end)
|
|
88
|
+
data = self.forward[(slice_,) + rest]
|
|
89
|
+
return f(data)
|
|
90
|
+
|
|
91
|
+
if isinstance(first, slice):
|
|
92
|
+
first = list(range(first.start or 0, first.stop or len(self), first.step or 1))
|
|
93
|
+
|
|
94
|
+
if isinstance(first, (list, tuple)):
|
|
95
|
+
first = [i if i >= 0 else len(self) + i for i in first]
|
|
96
|
+
if any(i >= len(self) for i in first):
|
|
97
|
+
raise IndexError(f"Index out of range: {first}")
|
|
98
|
+
slices = [slice(i, i + self.i_start + self.i_end) for i in first]
|
|
99
|
+
data = [self.forward[(slice_,) + rest] for slice_ in slices]
|
|
100
|
+
res = [f(d) for d in data]
|
|
101
|
+
return np.array(res)
|
|
102
|
+
|
|
103
|
+
assert False, f"Expected int, slice, list or tuple as first element of tuple, got {type(first)}"
|
|
104
|
+
|
|
105
|
+
assert isinstance(n, int), f"Expected int, slice, tuple, got {type(n)}"
|
|
106
|
+
|
|
107
|
+
if n < 0:
|
|
108
|
+
n = len(self) + n
|
|
109
|
+
if n >= len(self):
|
|
110
|
+
raise IndexError(f"Index out of range: {n}")
|
|
111
|
+
|
|
112
|
+
slice_ = slice(n, n + self.i_start + self.i_end)
|
|
113
|
+
data = self.forward[slice_]
|
|
114
|
+
return f(data)
|
|
115
|
+
|
|
116
|
+
def __len__(self) -> int:
|
|
117
|
+
return len(self.forward) - (self.i_end + self.i_start - 1)
|
|
118
|
+
|
|
119
|
+
@cached_property
|
|
120
|
+
def dates(self) -> NDArray[np.datetime64]:
|
|
121
|
+
"""Get the interpolated dates."""
|
|
122
|
+
dates = self.forward.dates
|
|
123
|
+
return dates[self.i_start : len(dates) - self.i_end + 1]
|
|
124
|
+
|
|
125
|
+
def tree(self) -> Node:
|
|
126
|
+
return Node(self, [self.forward.tree()], window=self.window_str)
|
|
127
|
+
|
|
128
|
+
@cached_property
|
|
129
|
+
def missing(self) -> set[int]:
|
|
130
|
+
"""Get the missing data indices."""
|
|
131
|
+
result = []
|
|
132
|
+
|
|
133
|
+
for i in self.forward.missing:
|
|
134
|
+
for j in range(0, self.i_end + self.i_start):
|
|
135
|
+
result.append(i + j)
|
|
136
|
+
|
|
137
|
+
result = {x for x in result if x < self._len}
|
|
138
|
+
return result
|
|
139
|
+
|
|
140
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
141
|
+
return {}
|
anemoi/datasets/data/select.py
CHANGED
|
@@ -12,9 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Optional
|
|
18
15
|
|
|
19
16
|
from numpy.typing import NDArray
|
|
20
17
|
|
|
@@ -37,7 +34,7 @@ LOG = logging.getLogger(__name__)
|
|
|
37
34
|
class Select(Forwards):
|
|
38
35
|
"""Class to select a subset of variables from a dataset."""
|
|
39
36
|
|
|
40
|
-
def __init__(self, dataset: Dataset, indices:
|
|
37
|
+
def __init__(self, dataset: Dataset, indices: list[int], reason: dict[str, Any]) -> None:
|
|
41
38
|
"""Initialize the Select class.
|
|
42
39
|
|
|
43
40
|
Parameters
|
|
@@ -140,26 +137,26 @@ class Select(Forwards):
|
|
|
140
137
|
return (len(self), len(self.indices)) + self.dataset.shape[2:]
|
|
141
138
|
|
|
142
139
|
@cached_property
|
|
143
|
-
def variables(self) ->
|
|
140
|
+
def variables(self) -> list[str]:
|
|
144
141
|
"""Get the variables of the dataset."""
|
|
145
142
|
return [self.dataset.variables[i] for i in self.indices]
|
|
146
143
|
|
|
147
144
|
@cached_property
|
|
148
|
-
def variables_metadata(self) ->
|
|
145
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
149
146
|
"""Get the metadata of the variables."""
|
|
150
147
|
return {k: v for k, v in self.dataset.variables_metadata.items() if k in self.variables}
|
|
151
148
|
|
|
152
149
|
@cached_property
|
|
153
|
-
def name_to_index(self) ->
|
|
150
|
+
def name_to_index(self) -> dict[str, int]:
|
|
154
151
|
"""Get the mapping of variable names to indices."""
|
|
155
152
|
return {k: i for i, k in enumerate(self.variables)}
|
|
156
153
|
|
|
157
154
|
@cached_property
|
|
158
|
-
def statistics(self) ->
|
|
155
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
159
156
|
"""Get the statistics of the dataset."""
|
|
160
157
|
return {k: v[self.indices] for k, v in self.dataset.statistics.items()}
|
|
161
158
|
|
|
162
|
-
def statistics_tendencies(self, delta:
|
|
159
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
163
160
|
"""Get the statistical tendencies of the dataset.
|
|
164
161
|
|
|
165
162
|
Parameters
|
|
@@ -176,7 +173,7 @@ class Select(Forwards):
|
|
|
176
173
|
delta = self.frequency
|
|
177
174
|
return {k: v[self.indices] for k, v in self.dataset.statistics_tendencies(delta).items()}
|
|
178
175
|
|
|
179
|
-
def metadata_specific(self, **kwargs: Any) ->
|
|
176
|
+
def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
|
|
180
177
|
"""Get the specific metadata of the dataset.
|
|
181
178
|
|
|
182
179
|
Parameters
|
|
@@ -216,7 +213,7 @@ class Select(Forwards):
|
|
|
216
213
|
"""
|
|
217
214
|
return Node(self, [self.dataset.tree()], **self.reason)
|
|
218
215
|
|
|
219
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
216
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
220
217
|
"""Get the metadata specific to the subclass.
|
|
221
218
|
|
|
222
219
|
Returns
|
|
@@ -231,7 +228,7 @@ class Select(Forwards):
|
|
|
231
228
|
class Rename(Forwards):
|
|
232
229
|
"""Class to rename variables in a dataset."""
|
|
233
230
|
|
|
234
|
-
def __init__(self, dataset: Dataset, rename:
|
|
231
|
+
def __init__(self, dataset: Dataset, rename: dict[str, str]) -> None:
|
|
235
232
|
"""Initialize the Rename class.
|
|
236
233
|
|
|
237
234
|
Parameters
|
|
@@ -251,17 +248,17 @@ class Rename(Forwards):
|
|
|
251
248
|
self.rename = rename
|
|
252
249
|
|
|
253
250
|
@property
|
|
254
|
-
def variables(self) ->
|
|
251
|
+
def variables(self) -> list[str]:
|
|
255
252
|
"""Get the renamed variables."""
|
|
256
253
|
return self._variables
|
|
257
254
|
|
|
258
255
|
@property
|
|
259
|
-
def variables_metadata(self) ->
|
|
256
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
260
257
|
"""Get the renamed variables metadata."""
|
|
261
258
|
return self._variables_metadata
|
|
262
259
|
|
|
263
260
|
@cached_property
|
|
264
|
-
def name_to_index(self) ->
|
|
261
|
+
def name_to_index(self) -> dict[str, int]:
|
|
265
262
|
"""Get the mapping of renamed variable names to indices."""
|
|
266
263
|
return {k: i for i, k in enumerate(self.variables)}
|
|
267
264
|
|
|
@@ -273,7 +270,7 @@ class Rename(Forwards):
|
|
|
273
270
|
"""
|
|
274
271
|
return Node(self, [self.forward.tree()], rename=self.rename)
|
|
275
272
|
|
|
276
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
273
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
277
274
|
"""Get the metadata specific to the subclass.
|
|
278
275
|
|
|
279
276
|
Returns:
|
|
@@ -12,9 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import Optional
|
|
17
|
-
from typing import Set
|
|
18
15
|
|
|
19
16
|
from numpy.typing import NDArray
|
|
20
17
|
|
|
@@ -56,11 +53,11 @@ class Statistics(Forwards):
|
|
|
56
53
|
)
|
|
57
54
|
|
|
58
55
|
@cached_property
|
|
59
|
-
def statistics(self) ->
|
|
56
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
60
57
|
"""Get the statistics."""
|
|
61
58
|
return self._statistic.statistics
|
|
62
59
|
|
|
63
|
-
def statistics_tendencies(self, delta:
|
|
60
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
64
61
|
"""Get the statistics tendencies.
|
|
65
62
|
|
|
66
63
|
Parameters
|
|
@@ -77,7 +74,7 @@ class Statistics(Forwards):
|
|
|
77
74
|
delta = self.frequency
|
|
78
75
|
return self._statistic.statistics_tendencies(delta)
|
|
79
76
|
|
|
80
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
77
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
81
78
|
"""Get the metadata specific to the forwards subclass.
|
|
82
79
|
|
|
83
80
|
Returns
|
|
@@ -97,7 +94,7 @@ class Statistics(Forwards):
|
|
|
97
94
|
"""
|
|
98
95
|
return Node(self, [self.forward.tree()])
|
|
99
96
|
|
|
100
|
-
def get_dataset_names(self, names:
|
|
97
|
+
def get_dataset_names(self, names: set[str]) -> None:
|
|
101
98
|
"""Get the dataset names.
|
|
102
99
|
|
|
103
100
|
Parameters
|
anemoi/datasets/data/stores.py
CHANGED
|
@@ -15,11 +15,6 @@ import tempfile
|
|
|
15
15
|
import warnings
|
|
16
16
|
from functools import cached_property
|
|
17
17
|
from typing import Any
|
|
18
|
-
from typing import Dict
|
|
19
|
-
from typing import List
|
|
20
|
-
from typing import Optional
|
|
21
|
-
from typing import Set
|
|
22
|
-
from typing import Union
|
|
23
18
|
from urllib.parse import urlparse
|
|
24
19
|
|
|
25
20
|
import numpy as np
|
|
@@ -90,22 +85,20 @@ class S3Store(ReadOnlyStore):
|
|
|
90
85
|
options using the anemoi configs.
|
|
91
86
|
"""
|
|
92
87
|
|
|
93
|
-
def __init__(self, url: str
|
|
94
|
-
"""Initialize the S3Store with a URL
|
|
95
|
-
from anemoi.utils.remote.s3 import s3_client
|
|
88
|
+
def __init__(self, url: str) -> None:
|
|
89
|
+
"""Initialize the S3Store with a URL."""
|
|
96
90
|
|
|
97
|
-
|
|
98
|
-
self.s3 = s3_client(self.bucket, region=region)
|
|
91
|
+
self.url = url
|
|
99
92
|
|
|
100
93
|
def __getitem__(self, key: str) -> bytes:
|
|
101
94
|
"""Retrieve an item from the store."""
|
|
95
|
+
from anemoi.utils.remote.s3 import get_object
|
|
96
|
+
|
|
102
97
|
try:
|
|
103
|
-
|
|
104
|
-
except
|
|
98
|
+
return get_object(os.path.join(self.url, key))
|
|
99
|
+
except FileNotFoundError:
|
|
105
100
|
raise KeyError(key)
|
|
106
101
|
|
|
107
|
-
return response["Body"].read()
|
|
108
|
-
|
|
109
102
|
|
|
110
103
|
class DebugStore(ReadOnlyStore):
|
|
111
104
|
"""A store to debug the zarr loading."""
|
|
@@ -199,7 +192,7 @@ def open_zarr(path: str, dont_fail: bool = False, cache: int = None) -> zarr.hie
|
|
|
199
192
|
class Zarr(Dataset):
|
|
200
193
|
"""A zarr dataset."""
|
|
201
194
|
|
|
202
|
-
def __init__(self, path:
|
|
195
|
+
def __init__(self, path: str | zarr.hierarchy.Group) -> None:
|
|
203
196
|
"""Initialize the Zarr dataset with a path or zarr group."""
|
|
204
197
|
if isinstance(path, zarr.hierarchy.Group):
|
|
205
198
|
self.was_zarr = True
|
|
@@ -215,7 +208,7 @@ class Zarr(Dataset):
|
|
|
215
208
|
self._missing = set()
|
|
216
209
|
|
|
217
210
|
@property
|
|
218
|
-
def missing(self) ->
|
|
211
|
+
def missing(self) -> set[int]:
|
|
219
212
|
"""Return the missing dates of the dataset."""
|
|
220
213
|
return self._missing
|
|
221
214
|
|
|
@@ -236,7 +229,7 @@ class Zarr(Dataset):
|
|
|
236
229
|
"""Retrieve an item from the dataset."""
|
|
237
230
|
return self.data[n]
|
|
238
231
|
|
|
239
|
-
def _unwind(self, index:
|
|
232
|
+
def _unwind(self, index: int | slice | list | tuple, rest: list, shape: tuple, axis: int, axes: list) -> iter:
|
|
240
233
|
"""Unwind the index for multi-dimensional indexing."""
|
|
241
234
|
if not isinstance(index, (int, slice, list, tuple)):
|
|
242
235
|
try:
|
|
@@ -298,7 +291,7 @@ class Zarr(Dataset):
|
|
|
298
291
|
return self.z.longitude[:]
|
|
299
292
|
|
|
300
293
|
@property
|
|
301
|
-
def statistics(self) ->
|
|
294
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
302
295
|
"""Return the statistics of the dataset."""
|
|
303
296
|
return dict(
|
|
304
297
|
mean=self.z.mean[:],
|
|
@@ -307,7 +300,7 @@ class Zarr(Dataset):
|
|
|
307
300
|
minimum=self.z.minimum[:],
|
|
308
301
|
)
|
|
309
302
|
|
|
310
|
-
def statistics_tendencies(self, delta:
|
|
303
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
311
304
|
"""Return the statistical tendencies of the dataset."""
|
|
312
305
|
if delta is None:
|
|
313
306
|
delta = self.frequency
|
|
@@ -354,14 +347,14 @@ class Zarr(Dataset):
|
|
|
354
347
|
return dates[1].astype(object) - dates[0].astype(object)
|
|
355
348
|
|
|
356
349
|
@property
|
|
357
|
-
def name_to_index(self) ->
|
|
350
|
+
def name_to_index(self) -> dict[str, int]:
|
|
358
351
|
"""Return the name to index mapping of the dataset."""
|
|
359
352
|
if "variables" in self.z.attrs:
|
|
360
353
|
return {n: i for i, n in enumerate(self.z.attrs["variables"])}
|
|
361
354
|
return self.z.attrs["name_to_index"]
|
|
362
355
|
|
|
363
356
|
@property
|
|
364
|
-
def variables(self) ->
|
|
357
|
+
def variables(self) -> list[str]:
|
|
365
358
|
"""Return the variables of the dataset."""
|
|
366
359
|
return [
|
|
367
360
|
k
|
|
@@ -372,7 +365,7 @@ class Zarr(Dataset):
|
|
|
372
365
|
]
|
|
373
366
|
|
|
374
367
|
@cached_property
|
|
375
|
-
def constant_fields(self) ->
|
|
368
|
+
def constant_fields(self) -> list[str]:
|
|
376
369
|
"""Return the constant fields of the dataset."""
|
|
377
370
|
result = self.z.attrs.get("constant_fields")
|
|
378
371
|
if result is None:
|
|
@@ -380,7 +373,7 @@ class Zarr(Dataset):
|
|
|
380
373
|
return self.computed_constant_fields()
|
|
381
374
|
|
|
382
375
|
@property
|
|
383
|
-
def variables_metadata(self) ->
|
|
376
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
384
377
|
"""Return the metadata of the variables."""
|
|
385
378
|
return self.z.attrs.get("variables_metadata", {})
|
|
386
379
|
|
|
@@ -392,7 +385,7 @@ class Zarr(Dataset):
|
|
|
392
385
|
"""Return the end date of the statistics."""
|
|
393
386
|
return self.dates[-1]
|
|
394
387
|
|
|
395
|
-
def metadata_specific(self, **kwargs: Any) ->
|
|
388
|
+
def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
|
|
396
389
|
"""Return the specific metadata of the dataset."""
|
|
397
390
|
return super().metadata_specific(
|
|
398
391
|
attrs=dict(self.z.attrs),
|
|
@@ -416,7 +409,7 @@ class Zarr(Dataset):
|
|
|
416
409
|
"""Return the tree representation of the dataset."""
|
|
417
410
|
return Node(self, [], path=self.path)
|
|
418
411
|
|
|
419
|
-
def get_dataset_names(self, names:
|
|
412
|
+
def get_dataset_names(self, names: set[str]) -> None:
|
|
420
413
|
"""Get the names of the datasets."""
|
|
421
414
|
name, _ = os.path.splitext(os.path.basename(self.path))
|
|
422
415
|
names.add(name)
|
|
@@ -433,17 +426,17 @@ class Zarr(Dataset):
|
|
|
433
426
|
class ZarrWithMissingDates(Zarr):
|
|
434
427
|
"""A zarr dataset with missing dates."""
|
|
435
428
|
|
|
436
|
-
def __init__(self, path:
|
|
429
|
+
def __init__(self, path: str | zarr.hierarchy.Group) -> None:
|
|
437
430
|
"""Initialize the ZarrWithMissingDates dataset with a path or zarr group."""
|
|
438
431
|
super().__init__(path)
|
|
439
432
|
|
|
440
433
|
missing_dates = self.z.attrs.get("missing_dates", [])
|
|
441
|
-
missing_dates =
|
|
434
|
+
missing_dates = {np.datetime64(x, "s") for x in missing_dates}
|
|
442
435
|
self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
|
|
443
436
|
self._missing = set(self.missing_to_dates)
|
|
444
437
|
|
|
445
438
|
@property
|
|
446
|
-
def missing(self) ->
|
|
439
|
+
def missing(self) -> set[int]:
|
|
447
440
|
"""Return the missing dates of the dataset."""
|
|
448
441
|
return self._missing
|
|
449
442
|
|
|
@@ -506,7 +499,7 @@ class ZarrWithMissingDates(Zarr):
|
|
|
506
499
|
QUIET = set()
|
|
507
500
|
|
|
508
501
|
|
|
509
|
-
def zarr_lookup(name: str, fail: bool = True) ->
|
|
502
|
+
def zarr_lookup(name: str, fail: bool = True) -> str | None:
|
|
510
503
|
"""Look up a zarr dataset by name."""
|
|
511
504
|
|
|
512
505
|
config = load_config()["datasets"]
|