anemoi-datasets 0.5.25__py3-none-any.whl → 0.5.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/grib-index.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +22 -25
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +3 -6
- anemoi/datasets/create/filter.py +21 -24
- anemoi/datasets/create/input/__init__.py +1 -2
- anemoi/datasets/create/input/action.py +3 -5
- anemoi/datasets/create/input/concat.py +5 -8
- anemoi/datasets/create/input/context.py +3 -6
- anemoi/datasets/create/input/data_sources.py +5 -8
- anemoi/datasets/create/input/empty.py +1 -2
- anemoi/datasets/create/input/filter.py +2 -3
- anemoi/datasets/create/input/function.py +1 -2
- anemoi/datasets/create/input/join.py +4 -5
- anemoi/datasets/create/input/misc.py +4 -6
- anemoi/datasets/create/input/repeated_dates.py +13 -18
- anemoi/datasets/create/input/result.py +29 -33
- anemoi/datasets/create/input/step.py +6 -24
- anemoi/datasets/create/input/template.py +3 -4
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +47 -52
- anemoi/datasets/create/sources/accumulations2.py +4 -8
- anemoi/datasets/create/sources/constants.py +1 -3
- anemoi/datasets/create/sources/empty.py +1 -2
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +1 -2
- anemoi/datasets/create/sources/grib.py +6 -10
- anemoi/datasets/create/sources/grib_index.py +13 -15
- anemoi/datasets/create/sources/hindcasts.py +2 -5
- anemoi/datasets/create/sources/legacy.py +1 -1
- anemoi/datasets/create/sources/mars.py +17 -21
- anemoi/datasets/create/sources/netcdf.py +1 -2
- anemoi/datasets/create/sources/opendap.py +1 -3
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/planetary_computer.py +44 -0
- anemoi/datasets/create/sources/recentre.py +8 -11
- anemoi/datasets/create/sources/source.py +3 -6
- anemoi/datasets/create/sources/tendencies.py +2 -5
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +15 -32
- anemoi/datasets/create/sources/xarray_support/coordinates.py +16 -12
- anemoi/datasets/create/sources/xarray_support/field.py +17 -16
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +83 -45
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +47 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +27 -23
- anemoi/datasets/create/sources/xarray_zarr.py +1 -2
- anemoi/datasets/create/sources/zenodo.py +3 -5
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +2 -74
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +1 -2
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +52 -23
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +42 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +30 -28
- anemoi/datasets/data/grids.py +12 -16
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +23 -77
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +13 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/grids.py +11 -12
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/METADATA +5 -4
- anemoi_datasets-0.5.27.dist-info/RECORD +134 -0
- anemoi/datasets/create/filters/__init__.py +0 -33
- anemoi/datasets/create/filters/empty.py +0 -37
- anemoi/datasets/create/filters/legacy.py +0 -93
- anemoi/datasets/create/filters/noop.py +0 -37
- anemoi/datasets/create/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/filters/pressure_level_relative_humidity_to_specific_humidity.py +0 -83
- anemoi/datasets/create/filters/pressure_level_specific_humidity_to_relative_humidity.py +0 -84
- anemoi/datasets/create/filters/rename.py +0 -205
- anemoi/datasets/create/filters/rotate_winds.py +0 -105
- anemoi/datasets/create/filters/single_level_dewpoint_to_relative_humidity.py +0 -78
- anemoi/datasets/create/filters/single_level_relative_humidity_to_dewpoint.py +0 -84
- anemoi/datasets/create/filters/single_level_relative_humidity_to_specific_humidity.py +0 -163
- anemoi/datasets/create/filters/single_level_specific_humidity_to_relative_humidity.py +0 -451
- anemoi/datasets/create/filters/speeddir_to_uv.py +0 -95
- anemoi/datasets/create/filters/sum.py +0 -68
- anemoi/datasets/create/filters/transform.py +0 -51
- anemoi/datasets/create/filters/unrotate_winds.py +0 -105
- anemoi/datasets/create/filters/uv_to_speeddir.py +0 -94
- anemoi/datasets/create/filters/wz_to_w.py +0 -98
- anemoi/datasets/utils/__init__.py +0 -8
- anemoi_datasets-0.5.25.dist-info/RECORD +0 -150
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/top_level.txt +0 -0
anemoi/datasets/data/missing.py
CHANGED
|
@@ -12,11 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Set
|
|
18
|
-
from typing import Tuple
|
|
19
|
-
from typing import Union
|
|
20
15
|
|
|
21
16
|
import numpy as np
|
|
22
17
|
from numpy.typing import NDArray
|
|
@@ -49,7 +44,7 @@ class MissingDates(Forwards):
|
|
|
49
44
|
List of missing dates.
|
|
50
45
|
"""
|
|
51
46
|
|
|
52
|
-
def __init__(self, dataset: Dataset, missing_dates:
|
|
47
|
+
def __init__(self, dataset: Dataset, missing_dates: list[int | str]) -> None:
|
|
53
48
|
"""Initializes the MissingDates class.
|
|
54
49
|
|
|
55
50
|
Parameters
|
|
@@ -80,13 +75,13 @@ class MissingDates(Forwards):
|
|
|
80
75
|
self.missing_dates.append(date)
|
|
81
76
|
|
|
82
77
|
n = self.forward._len
|
|
83
|
-
self._missing =
|
|
78
|
+
self._missing = {i for i in self._missing if 0 <= i < n}
|
|
84
79
|
self.missing_dates = sorted(to_datetime(x) for x in self.missing_dates)
|
|
85
80
|
|
|
86
81
|
assert len(self._missing), "No dates to force missing"
|
|
87
82
|
|
|
88
83
|
@cached_property
|
|
89
|
-
def missing(self) ->
|
|
84
|
+
def missing(self) -> set[int]:
|
|
90
85
|
"""Returns the set of missing indices."""
|
|
91
86
|
return self._missing.union(self.forward.missing)
|
|
92
87
|
|
|
@@ -148,7 +143,7 @@ class MissingDates(Forwards):
|
|
|
148
143
|
raise MissingDateError(f"Date {self.forward.dates[n]} is missing (index={n})")
|
|
149
144
|
|
|
150
145
|
@property
|
|
151
|
-
def reason(self) ->
|
|
146
|
+
def reason(self) -> dict[str, Any]:
|
|
152
147
|
"""Provides the reason for missing dates."""
|
|
153
148
|
return {"missing_dates": self.missing_dates}
|
|
154
149
|
|
|
@@ -162,7 +157,7 @@ class MissingDates(Forwards):
|
|
|
162
157
|
"""
|
|
163
158
|
return Node(self, [self.forward.tree()], **self.reason)
|
|
164
159
|
|
|
165
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
160
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
166
161
|
"""Provides metadata specific to the subclass.
|
|
167
162
|
|
|
168
163
|
Returns
|
|
@@ -184,7 +179,7 @@ class SkipMissingDates(Forwards):
|
|
|
184
179
|
The expected access pattern.
|
|
185
180
|
"""
|
|
186
181
|
|
|
187
|
-
def __init__(self, dataset: Dataset, expected_access:
|
|
182
|
+
def __init__(self, dataset: Dataset, expected_access: int | slice) -> None:
|
|
188
183
|
"""Initializes the SkipMissingDates class.
|
|
189
184
|
|
|
190
185
|
Parameters
|
|
@@ -285,7 +280,7 @@ class SkipMissingDates(Forwards):
|
|
|
285
280
|
return tuple(np.stack(_) for _ in result)
|
|
286
281
|
|
|
287
282
|
@debug_indexing
|
|
288
|
-
def _get_slice(self, s: slice) ->
|
|
283
|
+
def _get_slice(self, s: slice) -> tuple[NDArray[Any], ...]:
|
|
289
284
|
"""Retrieves a slice of items.
|
|
290
285
|
|
|
291
286
|
Parameters
|
|
@@ -303,7 +298,7 @@ class SkipMissingDates(Forwards):
|
|
|
303
298
|
return tuple(np.stack(_) for _ in result)
|
|
304
299
|
|
|
305
300
|
@debug_indexing
|
|
306
|
-
def __getitem__(self, n: FullIndex) ->
|
|
301
|
+
def __getitem__(self, n: FullIndex) -> tuple[NDArray[Any], ...]:
|
|
307
302
|
"""Retrieves the item at the given index.
|
|
308
303
|
|
|
309
304
|
Parameters
|
|
@@ -339,7 +334,7 @@ class SkipMissingDates(Forwards):
|
|
|
339
334
|
"""
|
|
340
335
|
return Node(self, [self.forward.tree()], expected_access=self.expected_access)
|
|
341
336
|
|
|
342
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
337
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
343
338
|
"""Provides metadata specific to the subclass.
|
|
344
339
|
|
|
345
340
|
Returns
|
|
@@ -404,7 +399,7 @@ class MissingDataset(Forwards):
|
|
|
404
399
|
return self._dates
|
|
405
400
|
|
|
406
401
|
@property
|
|
407
|
-
def missing(self) ->
|
|
402
|
+
def missing(self) -> set[int]:
|
|
408
403
|
"""Returns the set of missing indices."""
|
|
409
404
|
return self._missing
|
|
410
405
|
|
|
@@ -436,7 +431,7 @@ class MissingDataset(Forwards):
|
|
|
436
431
|
"""
|
|
437
432
|
return Node(self, [self.forward.tree()], start=self.start, end=self.end)
|
|
438
433
|
|
|
439
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
434
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
440
435
|
"""Provides metadata specific to the subclass.
|
|
441
436
|
|
|
442
437
|
Returns
|
|
@@ -10,8 +10,6 @@ import logging
|
|
|
10
10
|
import os
|
|
11
11
|
from functools import cached_property
|
|
12
12
|
from typing import Any
|
|
13
|
-
from typing import Dict
|
|
14
|
-
from typing import Tuple
|
|
15
13
|
|
|
16
14
|
import numpy as np
|
|
17
15
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
@@ -82,10 +80,8 @@ class ObservationsBase(Dataset):
|
|
|
82
80
|
# return [self.getitem(j) for j in i]
|
|
83
81
|
|
|
84
82
|
raise ValueError(
|
|
85
|
-
(
|
|
86
|
-
|
|
87
|
-
"observations datasets. Please use a second [] to select part of the data [i][a,b,c]"
|
|
88
|
-
)
|
|
83
|
+
f"Expected int, got {i} of type {type(i)}. Only int is supported to index "
|
|
84
|
+
"observations datasets. Please use a second [] to select part of the data [i][a,b,c]"
|
|
89
85
|
)
|
|
90
86
|
|
|
91
87
|
@property
|
|
@@ -195,13 +191,11 @@ class ObservationsZarr(ObservationsBase):
|
|
|
195
191
|
|
|
196
192
|
if len(self.forward) != len(self.dates):
|
|
197
193
|
raise ValueError(
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
f"{self.dates[0]}, {self.dates[1]}, ..., {self.dates[-2]}, {self.dates[-1]} "
|
|
204
|
-
)
|
|
194
|
+
f"Dates are not consistent with the number of items in the dataset. "
|
|
195
|
+
f"The dataset contains {len(self.forward)} time windows. "
|
|
196
|
+
f"This is not compatible with the "
|
|
197
|
+
f"{len(self.dates)} requested dates with frequency={frequency_hours}"
|
|
198
|
+
f"{self.dates[0]}, {self.dates[1]}, ..., {self.dates[-2]}, {self.dates[-1]} "
|
|
205
199
|
)
|
|
206
200
|
|
|
207
201
|
@property
|
|
@@ -307,7 +301,7 @@ class ObservationsZarr(ObservationsBase):
|
|
|
307
301
|
return f"Observations({os.path.basename(self.path)}, {self.dates[0]};{self.dates[-1]}, {len(self)})"
|
|
308
302
|
|
|
309
303
|
|
|
310
|
-
def observations_factory(args:
|
|
304
|
+
def observations_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> ObservationsBase:
|
|
311
305
|
observations = kwargs.pop("observations")
|
|
312
306
|
|
|
313
307
|
if not isinstance(observations, dict):
|
anemoi/datasets/data/padded.py
CHANGED
|
@@ -12,8 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import Set
|
|
17
15
|
|
|
18
16
|
import numpy as np
|
|
19
17
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
@@ -38,7 +36,7 @@ class Padded(Forwards):
|
|
|
38
36
|
_after: int = 0
|
|
39
37
|
_inside: int = 0
|
|
40
38
|
|
|
41
|
-
def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason:
|
|
39
|
+
def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason: dict[str, Any]) -> None:
|
|
42
40
|
"""Create a padded subset of a dataset.
|
|
43
41
|
|
|
44
42
|
Attributes:
|
|
@@ -195,7 +193,7 @@ class Padded(Forwards):
|
|
|
195
193
|
return (len(self.dates),) + self.dataset.shape[1:]
|
|
196
194
|
|
|
197
195
|
@cached_property
|
|
198
|
-
def missing(self) ->
|
|
196
|
+
def missing(self) -> set[int]:
|
|
199
197
|
raise NotImplementedError("Need to decide whether to include the added dates as missing or not")
|
|
200
198
|
# return self.forward.missing
|
|
201
199
|
|
|
@@ -207,7 +205,7 @@ class Padded(Forwards):
|
|
|
207
205
|
"""
|
|
208
206
|
return Node(self, [self.dataset.tree()], **self.reason)
|
|
209
207
|
|
|
210
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
208
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
211
209
|
"""Get the metadata specific to the forwards subclass.
|
|
212
210
|
|
|
213
211
|
Returns:
|
|
@@ -35,7 +35,7 @@ class Npz1Backend(Backend):
|
|
|
35
35
|
return dict(np.load(f))
|
|
36
36
|
|
|
37
37
|
def read_metadata(self):
|
|
38
|
-
with open(os.path.join(self.path, "metadata.json")
|
|
38
|
+
with open(os.path.join(self.path, "metadata.json")) as f:
|
|
39
39
|
return json.load(f)
|
|
40
40
|
|
|
41
41
|
def read_statistics(self):
|
|
@@ -56,7 +56,7 @@ class Npz2Backend(Backend):
|
|
|
56
56
|
return dict(np.load(f))
|
|
57
57
|
|
|
58
58
|
def read_metadata(self):
|
|
59
|
-
with open(os.path.join(self.path, "metadata.json")
|
|
59
|
+
with open(os.path.join(self.path, "metadata.json")) as f:
|
|
60
60
|
return json.load(f)
|
|
61
61
|
|
|
62
62
|
def read_statistics(self):
|
anemoi/datasets/data/rescale.py
CHANGED
|
@@ -12,11 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Optional
|
|
18
|
-
from typing import Tuple
|
|
19
|
-
from typing import Union
|
|
20
15
|
|
|
21
16
|
import numpy as np
|
|
22
17
|
from numpy.typing import NDArray
|
|
@@ -35,9 +30,7 @@ from .indexing import update_tuple
|
|
|
35
30
|
LOG = logging.getLogger(__name__)
|
|
36
31
|
|
|
37
32
|
|
|
38
|
-
def make_rescale(
|
|
39
|
-
variable: str, rescale: Union[Tuple[float, float], List[str], Dict[str, float]]
|
|
40
|
-
) -> Tuple[float, float]:
|
|
33
|
+
def make_rescale(variable: str, rescale: tuple[float, float] | list[str] | dict[str, float]) -> tuple[float, float]:
|
|
41
34
|
"""Create rescale parameters (scale and offset) based on the input rescale specification.
|
|
42
35
|
|
|
43
36
|
Parameters
|
|
@@ -86,7 +79,7 @@ class Rescale(Forwards):
|
|
|
86
79
|
"""A class to apply rescaling to dataset variables."""
|
|
87
80
|
|
|
88
81
|
def __init__(
|
|
89
|
-
self, dataset: Dataset, rescale:
|
|
82
|
+
self, dataset: Dataset, rescale: dict[str, tuple[float, float] | list[str] | dict[str, float]]
|
|
90
83
|
) -> None:
|
|
91
84
|
"""Initialize the Rescale object.
|
|
92
85
|
|
|
@@ -129,7 +122,7 @@ class Rescale(Forwards):
|
|
|
129
122
|
"""
|
|
130
123
|
return Node(self, [self.forward.tree()], rescale=self.rescale)
|
|
131
124
|
|
|
132
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
125
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
133
126
|
"""Get the metadata specific to the rescale subclass.
|
|
134
127
|
|
|
135
128
|
Returns
|
|
@@ -204,7 +197,7 @@ class Rescale(Forwards):
|
|
|
204
197
|
return data * self._a[0] + self._b[0]
|
|
205
198
|
|
|
206
199
|
@cached_property
|
|
207
|
-
def statistics(self) ->
|
|
200
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
208
201
|
"""Get the statistics of the rescaled data."""
|
|
209
202
|
result = {}
|
|
210
203
|
a = self._a.squeeze()
|
|
@@ -224,7 +217,7 @@ class Rescale(Forwards):
|
|
|
224
217
|
|
|
225
218
|
return result
|
|
226
219
|
|
|
227
|
-
def statistics_tendencies(self, delta:
|
|
220
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
228
221
|
"""Get the tendencies of the statistics of the rescaled data.
|
|
229
222
|
|
|
230
223
|
Parameters
|
anemoi/datasets/data/select.py
CHANGED
|
@@ -12,9 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Optional
|
|
18
15
|
|
|
19
16
|
from numpy.typing import NDArray
|
|
20
17
|
|
|
@@ -37,7 +34,7 @@ LOG = logging.getLogger(__name__)
|
|
|
37
34
|
class Select(Forwards):
|
|
38
35
|
"""Class to select a subset of variables from a dataset."""
|
|
39
36
|
|
|
40
|
-
def __init__(self, dataset: Dataset, indices:
|
|
37
|
+
def __init__(self, dataset: Dataset, indices: list[int], reason: dict[str, Any]) -> None:
|
|
41
38
|
"""Initialize the Select class.
|
|
42
39
|
|
|
43
40
|
Parameters
|
|
@@ -140,26 +137,26 @@ class Select(Forwards):
|
|
|
140
137
|
return (len(self), len(self.indices)) + self.dataset.shape[2:]
|
|
141
138
|
|
|
142
139
|
@cached_property
|
|
143
|
-
def variables(self) ->
|
|
140
|
+
def variables(self) -> list[str]:
|
|
144
141
|
"""Get the variables of the dataset."""
|
|
145
142
|
return [self.dataset.variables[i] for i in self.indices]
|
|
146
143
|
|
|
147
144
|
@cached_property
|
|
148
|
-
def variables_metadata(self) ->
|
|
145
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
149
146
|
"""Get the metadata of the variables."""
|
|
150
147
|
return {k: v for k, v in self.dataset.variables_metadata.items() if k in self.variables}
|
|
151
148
|
|
|
152
149
|
@cached_property
|
|
153
|
-
def name_to_index(self) ->
|
|
150
|
+
def name_to_index(self) -> dict[str, int]:
|
|
154
151
|
"""Get the mapping of variable names to indices."""
|
|
155
152
|
return {k: i for i, k in enumerate(self.variables)}
|
|
156
153
|
|
|
157
154
|
@cached_property
|
|
158
|
-
def statistics(self) ->
|
|
155
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
159
156
|
"""Get the statistics of the dataset."""
|
|
160
157
|
return {k: v[self.indices] for k, v in self.dataset.statistics.items()}
|
|
161
158
|
|
|
162
|
-
def statistics_tendencies(self, delta:
|
|
159
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
163
160
|
"""Get the statistical tendencies of the dataset.
|
|
164
161
|
|
|
165
162
|
Parameters
|
|
@@ -176,7 +173,7 @@ class Select(Forwards):
|
|
|
176
173
|
delta = self.frequency
|
|
177
174
|
return {k: v[self.indices] for k, v in self.dataset.statistics_tendencies(delta).items()}
|
|
178
175
|
|
|
179
|
-
def metadata_specific(self, **kwargs: Any) ->
|
|
176
|
+
def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
|
|
180
177
|
"""Get the specific metadata of the dataset.
|
|
181
178
|
|
|
182
179
|
Parameters
|
|
@@ -216,7 +213,7 @@ class Select(Forwards):
|
|
|
216
213
|
"""
|
|
217
214
|
return Node(self, [self.dataset.tree()], **self.reason)
|
|
218
215
|
|
|
219
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
216
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
220
217
|
"""Get the metadata specific to the subclass.
|
|
221
218
|
|
|
222
219
|
Returns
|
|
@@ -231,7 +228,7 @@ class Select(Forwards):
|
|
|
231
228
|
class Rename(Forwards):
|
|
232
229
|
"""Class to rename variables in a dataset."""
|
|
233
230
|
|
|
234
|
-
def __init__(self, dataset: Dataset, rename:
|
|
231
|
+
def __init__(self, dataset: Dataset, rename: dict[str, str]) -> None:
|
|
235
232
|
"""Initialize the Rename class.
|
|
236
233
|
|
|
237
234
|
Parameters
|
|
@@ -251,17 +248,17 @@ class Rename(Forwards):
|
|
|
251
248
|
self.rename = rename
|
|
252
249
|
|
|
253
250
|
@property
|
|
254
|
-
def variables(self) ->
|
|
251
|
+
def variables(self) -> list[str]:
|
|
255
252
|
"""Get the renamed variables."""
|
|
256
253
|
return self._variables
|
|
257
254
|
|
|
258
255
|
@property
|
|
259
|
-
def variables_metadata(self) ->
|
|
256
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
260
257
|
"""Get the renamed variables metadata."""
|
|
261
258
|
return self._variables_metadata
|
|
262
259
|
|
|
263
260
|
@cached_property
|
|
264
|
-
def name_to_index(self) ->
|
|
261
|
+
def name_to_index(self) -> dict[str, int]:
|
|
265
262
|
"""Get the mapping of renamed variable names to indices."""
|
|
266
263
|
return {k: i for i, k in enumerate(self.variables)}
|
|
267
264
|
|
|
@@ -273,7 +270,7 @@ class Rename(Forwards):
|
|
|
273
270
|
"""
|
|
274
271
|
return Node(self, [self.forward.tree()], rename=self.rename)
|
|
275
272
|
|
|
276
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
273
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
277
274
|
"""Get the metadata specific to the subclass.
|
|
278
275
|
|
|
279
276
|
Returns:
|
|
@@ -12,9 +12,6 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import Optional
|
|
17
|
-
from typing import Set
|
|
18
15
|
|
|
19
16
|
from numpy.typing import NDArray
|
|
20
17
|
|
|
@@ -56,11 +53,11 @@ class Statistics(Forwards):
|
|
|
56
53
|
)
|
|
57
54
|
|
|
58
55
|
@cached_property
|
|
59
|
-
def statistics(self) ->
|
|
56
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
60
57
|
"""Get the statistics."""
|
|
61
58
|
return self._statistic.statistics
|
|
62
59
|
|
|
63
|
-
def statistics_tendencies(self, delta:
|
|
60
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
64
61
|
"""Get the statistics tendencies.
|
|
65
62
|
|
|
66
63
|
Parameters
|
|
@@ -77,7 +74,7 @@ class Statistics(Forwards):
|
|
|
77
74
|
delta = self.frequency
|
|
78
75
|
return self._statistic.statistics_tendencies(delta)
|
|
79
76
|
|
|
80
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
77
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
81
78
|
"""Get the metadata specific to the forwards subclass.
|
|
82
79
|
|
|
83
80
|
Returns
|
|
@@ -97,7 +94,7 @@ class Statistics(Forwards):
|
|
|
97
94
|
"""
|
|
98
95
|
return Node(self, [self.forward.tree()])
|
|
99
96
|
|
|
100
|
-
def get_dataset_names(self, names:
|
|
97
|
+
def get_dataset_names(self, names: set[str]) -> None:
|
|
101
98
|
"""Get the dataset names.
|
|
102
99
|
|
|
103
100
|
Parameters
|
anemoi/datasets/data/stores.py
CHANGED
|
@@ -15,11 +15,6 @@ import tempfile
|
|
|
15
15
|
import warnings
|
|
16
16
|
from functools import cached_property
|
|
17
17
|
from typing import Any
|
|
18
|
-
from typing import Dict
|
|
19
|
-
from typing import List
|
|
20
|
-
from typing import Optional
|
|
21
|
-
from typing import Set
|
|
22
|
-
from typing import Union
|
|
23
18
|
from urllib.parse import urlparse
|
|
24
19
|
|
|
25
20
|
import numpy as np
|
|
@@ -90,7 +85,7 @@ class S3Store(ReadOnlyStore):
|
|
|
90
85
|
options using the anemoi configs.
|
|
91
86
|
"""
|
|
92
87
|
|
|
93
|
-
def __init__(self, url: str, region:
|
|
88
|
+
def __init__(self, url: str, region: str | None = None) -> None:
|
|
94
89
|
"""Initialize the S3Store with a URL and optional region."""
|
|
95
90
|
from anemoi.utils.remote.s3 import s3_client
|
|
96
91
|
|
|
@@ -107,51 +102,6 @@ class S3Store(ReadOnlyStore):
|
|
|
107
102
|
return response["Body"].read()
|
|
108
103
|
|
|
109
104
|
|
|
110
|
-
class PlanetaryComputerStore(ReadOnlyStore):
|
|
111
|
-
"""We write our own Store to access catalogs on Planetary Computer,
|
|
112
|
-
as it requires some extra arguments to use xr.open_zarr.
|
|
113
|
-
"""
|
|
114
|
-
|
|
115
|
-
def __init__(self, data_catalog_id: str) -> None:
|
|
116
|
-
"""Initialize the PlanetaryComputerStore with a data catalog ID.
|
|
117
|
-
|
|
118
|
-
Parameters
|
|
119
|
-
----------
|
|
120
|
-
data_catalog_id : str
|
|
121
|
-
The data catalog ID.
|
|
122
|
-
"""
|
|
123
|
-
self.data_catalog_id = data_catalog_id
|
|
124
|
-
|
|
125
|
-
import planetary_computer
|
|
126
|
-
import pystac_client
|
|
127
|
-
|
|
128
|
-
catalog = pystac_client.Client.open(
|
|
129
|
-
"https://planetarycomputer.microsoft.com/api/stac/v1/",
|
|
130
|
-
modifier=planetary_computer.sign_inplace,
|
|
131
|
-
)
|
|
132
|
-
collection = catalog.get_collection(self.data_catalog_id)
|
|
133
|
-
|
|
134
|
-
asset = collection.assets["zarr-abfs"]
|
|
135
|
-
|
|
136
|
-
if "xarray:storage_options" in asset.extra_fields:
|
|
137
|
-
store = {
|
|
138
|
-
"store": asset.href,
|
|
139
|
-
"storage_options": asset.extra_fields["xarray:storage_options"],
|
|
140
|
-
**asset.extra_fields["xarray:open_kwargs"],
|
|
141
|
-
}
|
|
142
|
-
else:
|
|
143
|
-
store = {
|
|
144
|
-
"filename_or_obj": asset.href,
|
|
145
|
-
**asset.extra_fields["xarray:open_kwargs"],
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
self.store = store
|
|
149
|
-
|
|
150
|
-
def __getitem__(self, key: str) -> bytes:
|
|
151
|
-
"""Retrieve an item from the store."""
|
|
152
|
-
raise NotImplementedError()
|
|
153
|
-
|
|
154
|
-
|
|
155
105
|
class DebugStore(ReadOnlyStore):
|
|
156
106
|
"""A store to debug the zarr loading."""
|
|
157
107
|
|
|
@@ -190,11 +140,11 @@ def name_to_zarr_store(path_or_url: str) -> ReadOnlyStore:
|
|
|
190
140
|
|
|
191
141
|
if store.startswith("http://") or store.startswith("https://"):
|
|
192
142
|
|
|
193
|
-
parsed = urlparse(store)
|
|
194
|
-
|
|
195
143
|
if store.endswith(".zip"):
|
|
196
144
|
import multiurl
|
|
197
145
|
|
|
146
|
+
parsed = urlparse(store)
|
|
147
|
+
|
|
198
148
|
# Zarr cannot handle zip files over HTTP
|
|
199
149
|
tmpdir = tempfile.gettempdir()
|
|
200
150
|
name = os.path.basename(parsed.path)
|
|
@@ -210,15 +160,7 @@ def name_to_zarr_store(path_or_url: str) -> ReadOnlyStore:
|
|
|
210
160
|
os.rename(path + ".tmp", path)
|
|
211
161
|
return name_to_zarr_store(path)
|
|
212
162
|
|
|
213
|
-
|
|
214
|
-
if len(bits) == 5 and (bits[1], bits[3], bits[4]) == ("s3", "amazonaws", "com"):
|
|
215
|
-
s3_url = f"s3://{bits[0]}{parsed.path}"
|
|
216
|
-
store = S3Store(s3_url, region=bits[2])
|
|
217
|
-
elif store.startswith("https://planetarycomputer.microsoft.com/"):
|
|
218
|
-
data_catalog_id = store.rsplit("/", 1)[-1]
|
|
219
|
-
store = PlanetaryComputerStore(data_catalog_id).store
|
|
220
|
-
else:
|
|
221
|
-
store = HTTPStore(store)
|
|
163
|
+
return HTTPStore(store)
|
|
222
164
|
|
|
223
165
|
return store
|
|
224
166
|
|
|
@@ -252,7 +194,7 @@ def open_zarr(path: str, dont_fail: bool = False, cache: int = None) -> zarr.hie
|
|
|
252
194
|
class Zarr(Dataset):
|
|
253
195
|
"""A zarr dataset."""
|
|
254
196
|
|
|
255
|
-
def __init__(self, path:
|
|
197
|
+
def __init__(self, path: str | zarr.hierarchy.Group) -> None:
|
|
256
198
|
"""Initialize the Zarr dataset with a path or zarr group."""
|
|
257
199
|
if isinstance(path, zarr.hierarchy.Group):
|
|
258
200
|
self.was_zarr = True
|
|
@@ -268,7 +210,7 @@ class Zarr(Dataset):
|
|
|
268
210
|
self._missing = set()
|
|
269
211
|
|
|
270
212
|
@property
|
|
271
|
-
def missing(self) ->
|
|
213
|
+
def missing(self) -> set[int]:
|
|
272
214
|
"""Return the missing dates of the dataset."""
|
|
273
215
|
return self._missing
|
|
274
216
|
|
|
@@ -289,7 +231,7 @@ class Zarr(Dataset):
|
|
|
289
231
|
"""Retrieve an item from the dataset."""
|
|
290
232
|
return self.data[n]
|
|
291
233
|
|
|
292
|
-
def _unwind(self, index:
|
|
234
|
+
def _unwind(self, index: int | slice | list | tuple, rest: list, shape: tuple, axis: int, axes: list) -> iter:
|
|
293
235
|
"""Unwind the index for multi-dimensional indexing."""
|
|
294
236
|
if not isinstance(index, (int, slice, list, tuple)):
|
|
295
237
|
try:
|
|
@@ -351,7 +293,7 @@ class Zarr(Dataset):
|
|
|
351
293
|
return self.z.longitude[:]
|
|
352
294
|
|
|
353
295
|
@property
|
|
354
|
-
def statistics(self) ->
|
|
296
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
355
297
|
"""Return the statistics of the dataset."""
|
|
356
298
|
return dict(
|
|
357
299
|
mean=self.z.mean[:],
|
|
@@ -360,7 +302,7 @@ class Zarr(Dataset):
|
|
|
360
302
|
minimum=self.z.minimum[:],
|
|
361
303
|
)
|
|
362
304
|
|
|
363
|
-
def statistics_tendencies(self, delta:
|
|
305
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
364
306
|
"""Return the statistical tendencies of the dataset."""
|
|
365
307
|
if delta is None:
|
|
366
308
|
delta = self.frequency
|
|
@@ -407,14 +349,14 @@ class Zarr(Dataset):
|
|
|
407
349
|
return dates[1].astype(object) - dates[0].astype(object)
|
|
408
350
|
|
|
409
351
|
@property
|
|
410
|
-
def name_to_index(self) ->
|
|
352
|
+
def name_to_index(self) -> dict[str, int]:
|
|
411
353
|
"""Return the name to index mapping of the dataset."""
|
|
412
354
|
if "variables" in self.z.attrs:
|
|
413
355
|
return {n: i for i, n in enumerate(self.z.attrs["variables"])}
|
|
414
356
|
return self.z.attrs["name_to_index"]
|
|
415
357
|
|
|
416
358
|
@property
|
|
417
|
-
def variables(self) ->
|
|
359
|
+
def variables(self) -> list[str]:
|
|
418
360
|
"""Return the variables of the dataset."""
|
|
419
361
|
return [
|
|
420
362
|
k
|
|
@@ -425,7 +367,7 @@ class Zarr(Dataset):
|
|
|
425
367
|
]
|
|
426
368
|
|
|
427
369
|
@cached_property
|
|
428
|
-
def constant_fields(self) ->
|
|
370
|
+
def constant_fields(self) -> list[str]:
|
|
429
371
|
"""Return the constant fields of the dataset."""
|
|
430
372
|
result = self.z.attrs.get("constant_fields")
|
|
431
373
|
if result is None:
|
|
@@ -433,7 +375,7 @@ class Zarr(Dataset):
|
|
|
433
375
|
return self.computed_constant_fields()
|
|
434
376
|
|
|
435
377
|
@property
|
|
436
|
-
def variables_metadata(self) ->
|
|
378
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
437
379
|
"""Return the metadata of the variables."""
|
|
438
380
|
return self.z.attrs.get("variables_metadata", {})
|
|
439
381
|
|
|
@@ -445,7 +387,7 @@ class Zarr(Dataset):
|
|
|
445
387
|
"""Return the end date of the statistics."""
|
|
446
388
|
return self.dates[-1]
|
|
447
389
|
|
|
448
|
-
def metadata_specific(self, **kwargs: Any) ->
|
|
390
|
+
def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
|
|
449
391
|
"""Return the specific metadata of the dataset."""
|
|
450
392
|
return super().metadata_specific(
|
|
451
393
|
attrs=dict(self.z.attrs),
|
|
@@ -469,7 +411,7 @@ class Zarr(Dataset):
|
|
|
469
411
|
"""Return the tree representation of the dataset."""
|
|
470
412
|
return Node(self, [], path=self.path)
|
|
471
413
|
|
|
472
|
-
def get_dataset_names(self, names:
|
|
414
|
+
def get_dataset_names(self, names: set[str]) -> None:
|
|
473
415
|
"""Get the names of the datasets."""
|
|
474
416
|
name, _ = os.path.splitext(os.path.basename(self.path))
|
|
475
417
|
names.add(name)
|
|
@@ -486,17 +428,17 @@ class Zarr(Dataset):
|
|
|
486
428
|
class ZarrWithMissingDates(Zarr):
|
|
487
429
|
"""A zarr dataset with missing dates."""
|
|
488
430
|
|
|
489
|
-
def __init__(self, path:
|
|
431
|
+
def __init__(self, path: str | zarr.hierarchy.Group) -> None:
|
|
490
432
|
"""Initialize the ZarrWithMissingDates dataset with a path or zarr group."""
|
|
491
433
|
super().__init__(path)
|
|
492
434
|
|
|
493
435
|
missing_dates = self.z.attrs.get("missing_dates", [])
|
|
494
|
-
missing_dates =
|
|
436
|
+
missing_dates = {np.datetime64(x, "s") for x in missing_dates}
|
|
495
437
|
self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
|
|
496
438
|
self._missing = set(self.missing_to_dates)
|
|
497
439
|
|
|
498
440
|
@property
|
|
499
|
-
def missing(self) ->
|
|
441
|
+
def missing(self) -> set[int]:
|
|
500
442
|
"""Return the missing dates of the dataset."""
|
|
501
443
|
return self._missing
|
|
502
444
|
|
|
@@ -559,12 +501,16 @@ class ZarrWithMissingDates(Zarr):
|
|
|
559
501
|
QUIET = set()
|
|
560
502
|
|
|
561
503
|
|
|
562
|
-
def zarr_lookup(name: str, fail: bool = True) ->
|
|
504
|
+
def zarr_lookup(name: str, fail: bool = True) -> str | None:
|
|
563
505
|
"""Look up a zarr dataset by name."""
|
|
564
506
|
|
|
565
507
|
config = load_config()["datasets"]
|
|
566
508
|
use_search_path_not_found = config.get("use_search_path_not_found", False)
|
|
567
509
|
|
|
510
|
+
if name.endswith(".zarr/"):
|
|
511
|
+
LOG.warning("Removing trailing slash from path: %s", name)
|
|
512
|
+
name = name[:-1]
|
|
513
|
+
|
|
568
514
|
if name.endswith(".zarr") or name.endswith(".zip"):
|
|
569
515
|
|
|
570
516
|
if os.path.exists(name):
|