anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/recipe/__init__.py +93 -0
- anemoi/datasets/commands/recipe/format.py +55 -0
- anemoi/datasets/commands/recipe/migrate.py +555 -0
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +64 -26
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +5 -6
- anemoi/datasets/create/input/__init__.py +44 -65
- anemoi/datasets/create/input/action.py +296 -238
- anemoi/datasets/create/input/context/__init__.py +71 -0
- anemoi/datasets/create/input/context/field.py +54 -0
- anemoi/datasets/create/input/data_sources.py +7 -9
- anemoi/datasets/create/input/misc.py +2 -75
- anemoi/datasets/create/input/repeated_dates.py +11 -130
- anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
- anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +120 -145
- anemoi/datasets/create/sources/accumulations2.py +20 -53
- anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
- anemoi/datasets/create/sources/constants.py +39 -40
- anemoi/datasets/create/sources/empty.py +22 -19
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +29 -29
- anemoi/datasets/create/sources/grib.py +94 -78
- anemoi/datasets/create/sources/grib_index.py +57 -55
- anemoi/datasets/create/sources/hindcasts.py +57 -59
- anemoi/datasets/create/sources/legacy.py +10 -62
- anemoi/datasets/create/sources/mars.py +121 -149
- anemoi/datasets/create/sources/netcdf.py +28 -25
- anemoi/datasets/create/sources/opendap.py +28 -26
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/recentre.py +46 -48
- anemoi/datasets/create/sources/repeated_dates.py +44 -0
- anemoi/datasets/create/sources/source.py +26 -51
- anemoi/datasets/create/sources/tendencies.py +68 -98
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
- anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
- anemoi/datasets/create/sources/xarray_support/field.py +20 -16
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
- anemoi/datasets/create/sources/xarray_zarr.py +28 -25
- anemoi/datasets/create/sources/zenodo.py +43 -41
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +4 -0
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +0 -43
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +7 -12
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +48 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +22 -26
- anemoi/datasets/data/grids.py +12 -168
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/rolling_average.py +141 -0
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +22 -29
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +15 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/dumper.py +76 -0
- anemoi/datasets/grids.py +4 -185
- anemoi/datasets/schemas/recipe.json +131 -0
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
- anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
- anemoi/datasets/create/filter.py +0 -48
- anemoi/datasets/create/input/concat.py +0 -164
- anemoi/datasets/create/input/context.py +0 -89
- anemoi/datasets/create/input/empty.py +0 -54
- anemoi/datasets/create/input/filter.py +0 -118
- anemoi/datasets/create/input/function.py +0 -233
- anemoi/datasets/create/input/join.py +0 -130
- anemoi/datasets/create/input/pipe.py +0 -66
- anemoi/datasets/create/input/step.py +0 -177
- anemoi/datasets/create/input/template.py +0 -162
- anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
anemoi/datasets/data/subset.py
CHANGED
|
@@ -10,12 +10,9 @@
|
|
|
10
10
|
|
|
11
11
|
import datetime
|
|
12
12
|
import logging
|
|
13
|
+
from collections.abc import Sequence
|
|
13
14
|
from functools import cached_property
|
|
14
15
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Sequence
|
|
18
|
-
from typing import Set
|
|
19
16
|
from typing import Union
|
|
20
17
|
|
|
21
18
|
import numpy as np
|
|
@@ -95,7 +92,7 @@ def _end(a: int, b: int, dates: NDArray[np.datetime64]) -> int:
|
|
|
95
92
|
return b
|
|
96
93
|
|
|
97
94
|
|
|
98
|
-
def _combine_reasons(reason1:
|
|
95
|
+
def _combine_reasons(reason1: dict[str, Any], reason2: dict[str, Any], dates: NDArray[np.datetime64]) -> dict[str, Any]:
|
|
99
96
|
"""Combine two reason dictionaries.
|
|
100
97
|
|
|
101
98
|
Parameters:
|
|
@@ -126,7 +123,7 @@ class Subset(Forwards):
|
|
|
126
123
|
reason (Dict[str, Any]): Dictionary of reasons.
|
|
127
124
|
"""
|
|
128
125
|
|
|
129
|
-
def __init__(self, dataset: Union[Dataset, "Subset"], indices: Sequence[int], reason:
|
|
126
|
+
def __init__(self, dataset: Union[Dataset, "Subset"], indices: Sequence[int], reason: dict[str, Any]) -> None:
|
|
130
127
|
"""Initialize the Subset.
|
|
131
128
|
|
|
132
129
|
Parameters:
|
|
@@ -140,8 +137,8 @@ class Subset(Forwards):
|
|
|
140
137
|
dataset = dataset.dataset
|
|
141
138
|
|
|
142
139
|
self.dataset: Dataset = dataset
|
|
143
|
-
self.indices:
|
|
144
|
-
self.reason:
|
|
140
|
+
self.indices: list[int] = list(indices)
|
|
141
|
+
self.reason: dict[str, Any] = {k: v for k, v in reason.items() if v is not None}
|
|
145
142
|
|
|
146
143
|
# Forward other properties to the super dataset
|
|
147
144
|
super().__init__(dataset)
|
|
@@ -274,10 +271,10 @@ class Subset(Forwards):
|
|
|
274
271
|
return f"Subset({self.dataset},{self.dates[0]}...{self.dates[-1]}/{self.frequency})"
|
|
275
272
|
|
|
276
273
|
@cached_property
|
|
277
|
-
def missing(self) ->
|
|
274
|
+
def missing(self) -> set[int]:
|
|
278
275
|
"""Get the missing indices of the subset."""
|
|
279
276
|
missing = self.dataset.missing
|
|
280
|
-
result:
|
|
277
|
+
result: set[int] = set()
|
|
281
278
|
for j, i in enumerate(self.indices):
|
|
282
279
|
if i in missing:
|
|
283
280
|
result.add(j)
|
|
@@ -291,7 +288,7 @@ class Subset(Forwards):
|
|
|
291
288
|
"""
|
|
292
289
|
return Node(self, [self.dataset.tree()], **self.reason)
|
|
293
290
|
|
|
294
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
291
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
295
292
|
"""Get the metadata specific to the forwards subclass.
|
|
296
293
|
|
|
297
294
|
Returns:
|
|
@@ -10,14 +10,10 @@
|
|
|
10
10
|
|
|
11
11
|
import datetime
|
|
12
12
|
import logging
|
|
13
|
+
from collections.abc import Callable
|
|
13
14
|
from functools import cached_property
|
|
14
15
|
from functools import wraps
|
|
15
16
|
from typing import Any
|
|
16
|
-
from typing import Callable
|
|
17
|
-
from typing import Dict
|
|
18
|
-
from typing import List
|
|
19
|
-
from typing import Optional
|
|
20
|
-
from typing import Set
|
|
21
17
|
|
|
22
18
|
import numpy as np
|
|
23
19
|
from numpy.typing import NDArray
|
|
@@ -163,13 +159,13 @@ class Unchecked(Combined):
|
|
|
163
159
|
|
|
164
160
|
@check("check_same_variables")
|
|
165
161
|
@property
|
|
166
|
-
def name_to_index(self) ->
|
|
162
|
+
def name_to_index(self) -> dict[str, int]:
|
|
167
163
|
"""Get the mapping of variable names to their indices."""
|
|
168
164
|
raise NotImplementedError()
|
|
169
165
|
|
|
170
166
|
@check("check_same_variables")
|
|
171
167
|
@property
|
|
172
|
-
def variables(self) ->
|
|
168
|
+
def variables(self) -> list[str]:
|
|
173
169
|
"""Get the list of variables in the dataset."""
|
|
174
170
|
raise NotImplementedError()
|
|
175
171
|
|
|
@@ -181,12 +177,12 @@ class Unchecked(Combined):
|
|
|
181
177
|
|
|
182
178
|
@check("check_same_variables")
|
|
183
179
|
@property
|
|
184
|
-
def statistics(self) ->
|
|
180
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
185
181
|
"""Get the statistics of the dataset."""
|
|
186
182
|
raise NotImplementedError()
|
|
187
183
|
|
|
188
184
|
@check("check_same_variables")
|
|
189
|
-
def statistics_tendencies(self, delta:
|
|
185
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
190
186
|
"""Get the statistics tendencies of the dataset.
|
|
191
187
|
|
|
192
188
|
Parameters
|
|
@@ -207,9 +203,9 @@ class Unchecked(Combined):
|
|
|
207
203
|
raise NotImplementedError()
|
|
208
204
|
|
|
209
205
|
@cached_property
|
|
210
|
-
def missing(self) ->
|
|
206
|
+
def missing(self) -> set[int]:
|
|
211
207
|
"""Get the missing data indices."""
|
|
212
|
-
result:
|
|
208
|
+
result: set[int] = set()
|
|
213
209
|
for d in self.datasets:
|
|
214
210
|
result = result | d.missing
|
|
215
211
|
return result
|
anemoi/datasets/data/xy.py
CHANGED
|
@@ -11,10 +11,6 @@
|
|
|
11
11
|
import logging
|
|
12
12
|
from functools import cached_property
|
|
13
13
|
from typing import Any
|
|
14
|
-
from typing import Dict
|
|
15
|
-
from typing import List
|
|
16
|
-
from typing import Set
|
|
17
|
-
from typing import Tuple
|
|
18
14
|
|
|
19
15
|
from .dataset import Dataset
|
|
20
16
|
from .dataset import FullIndex
|
|
@@ -29,7 +25,7 @@ LOG = logging.getLogger(__name__)
|
|
|
29
25
|
class ZipBase(Combined):
|
|
30
26
|
"""Base class for handling zipped datasets."""
|
|
31
27
|
|
|
32
|
-
def __init__(self, datasets:
|
|
28
|
+
def __init__(self, datasets: list[Any], check_compatibility: bool = True) -> None:
|
|
33
29
|
"""Initialize ZipBase with a list of datasets.
|
|
34
30
|
|
|
35
31
|
Parameters
|
|
@@ -58,7 +54,7 @@ class ZipBase(Combined):
|
|
|
58
54
|
new_parents = [parent.clone(ds) for ds in self.datasets]
|
|
59
55
|
return self.clone(new_parents)
|
|
60
56
|
|
|
61
|
-
def clone(self, datasets:
|
|
57
|
+
def clone(self, datasets: list[Any]) -> "ZipBase":
|
|
62
58
|
"""Clone the ZipBase with new datasets.
|
|
63
59
|
|
|
64
60
|
Parameters
|
|
@@ -81,7 +77,11 @@ class ZipBase(Combined):
|
|
|
81
77
|
Node
|
|
82
78
|
Tree representation of the datasets.
|
|
83
79
|
"""
|
|
84
|
-
return Node(
|
|
80
|
+
return Node(
|
|
81
|
+
self,
|
|
82
|
+
[d.tree() for d in self.datasets],
|
|
83
|
+
check_compatibility=self._check_compatibility,
|
|
84
|
+
)
|
|
85
85
|
|
|
86
86
|
def __len__(self) -> int:
|
|
87
87
|
"""Get the length of the smallest dataset.
|
|
@@ -93,7 +93,7 @@ class ZipBase(Combined):
|
|
|
93
93
|
"""
|
|
94
94
|
return min(len(d) for d in self.datasets)
|
|
95
95
|
|
|
96
|
-
def __getitem__(self, n: FullIndex) ->
|
|
96
|
+
def __getitem__(self, n: FullIndex) -> tuple[Any, ...]:
|
|
97
97
|
"""Get the item at the specified index from all datasets.
|
|
98
98
|
|
|
99
99
|
Parameters
|
|
@@ -145,55 +145,55 @@ class ZipBase(Combined):
|
|
|
145
145
|
pass
|
|
146
146
|
|
|
147
147
|
@cached_property
|
|
148
|
-
def missing(self) ->
|
|
148
|
+
def missing(self) -> set[int]:
|
|
149
149
|
"""Get the set of missing indices from all datasets."""
|
|
150
|
-
result:
|
|
150
|
+
result: set[int] = set()
|
|
151
151
|
for d in self.datasets:
|
|
152
152
|
result = result | d.missing
|
|
153
153
|
return result
|
|
154
154
|
|
|
155
155
|
@property
|
|
156
|
-
def shape(self) ->
|
|
156
|
+
def shape(self) -> tuple[Any, ...]:
|
|
157
157
|
"""Get the shape of all datasets."""
|
|
158
158
|
return tuple(d.shape for d in self.datasets)
|
|
159
159
|
|
|
160
160
|
@property
|
|
161
|
-
def field_shape(self) ->
|
|
161
|
+
def field_shape(self) -> tuple[Any, ...]:
|
|
162
162
|
"""Get the field shape of all datasets."""
|
|
163
163
|
return tuple(d.shape for d in self.datasets)
|
|
164
164
|
|
|
165
165
|
@property
|
|
166
|
-
def latitudes(self) ->
|
|
166
|
+
def latitudes(self) -> tuple[Any, ...]:
|
|
167
167
|
"""Get the latitudes of all datasets."""
|
|
168
168
|
return tuple(d.latitudes for d in self.datasets)
|
|
169
169
|
|
|
170
170
|
@property
|
|
171
|
-
def longitudes(self) ->
|
|
171
|
+
def longitudes(self) -> tuple[Any, ...]:
|
|
172
172
|
"""Get the longitudes of all datasets."""
|
|
173
173
|
return tuple(d.longitudes for d in self.datasets)
|
|
174
174
|
|
|
175
175
|
@property
|
|
176
|
-
def dtype(self) ->
|
|
176
|
+
def dtype(self) -> tuple[Any, ...]:
|
|
177
177
|
"""Get the data types of all datasets."""
|
|
178
178
|
return tuple(d.dtype for d in self.datasets)
|
|
179
179
|
|
|
180
180
|
@property
|
|
181
|
-
def grids(self) ->
|
|
181
|
+
def grids(self) -> tuple[Any, ...]:
|
|
182
182
|
"""Get the grids of all datasets."""
|
|
183
183
|
return tuple(d.grids for d in self.datasets)
|
|
184
184
|
|
|
185
185
|
@property
|
|
186
|
-
def statistics(self) ->
|
|
186
|
+
def statistics(self) -> tuple[Any, ...]:
|
|
187
187
|
"""Get the statistics of all datasets."""
|
|
188
188
|
return tuple(d.statistics for d in self.datasets)
|
|
189
189
|
|
|
190
190
|
@property
|
|
191
|
-
def resolution(self) ->
|
|
191
|
+
def resolution(self) -> tuple[Any, ...]:
|
|
192
192
|
"""Get the resolution of all datasets."""
|
|
193
193
|
return tuple(d.resolution for d in self.datasets)
|
|
194
194
|
|
|
195
195
|
@property
|
|
196
|
-
def name_to_index(self) ->
|
|
196
|
+
def name_to_index(self) -> tuple[Any, ...]:
|
|
197
197
|
"""Get the name to index mapping of all datasets."""
|
|
198
198
|
return tuple(d.name_to_index for d in self.datasets)
|
|
199
199
|
|
|
@@ -210,6 +210,10 @@ class ZipBase(Combined):
|
|
|
210
210
|
if self._check_compatibility:
|
|
211
211
|
super().check_compatibility(d1, d2)
|
|
212
212
|
|
|
213
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
214
|
+
"""Get the metadata specific to the subclass."""
|
|
215
|
+
return {}
|
|
216
|
+
|
|
213
217
|
|
|
214
218
|
class Zip(ZipBase):
|
|
215
219
|
"""Class for handling zipped datasets."""
|
|
@@ -223,7 +227,7 @@ class XY(ZipBase):
|
|
|
223
227
|
pass
|
|
224
228
|
|
|
225
229
|
|
|
226
|
-
def xy_factory(args:
|
|
230
|
+
def xy_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> XY:
|
|
227
231
|
"""Factory function to create an XY instance.
|
|
228
232
|
|
|
229
233
|
Parameters
|
|
@@ -256,7 +260,7 @@ def xy_factory(args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> XY:
|
|
|
256
260
|
return XY(datasets, check_compatibility=check_compatibility)._subset(**kwargs)
|
|
257
261
|
|
|
258
262
|
|
|
259
|
-
def zip_factory(args:
|
|
263
|
+
def zip_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> Zip:
|
|
260
264
|
"""Factory function to create a Zip instance.
|
|
261
265
|
|
|
262
266
|
Parameters
|
|
@@ -10,15 +10,10 @@
|
|
|
10
10
|
|
|
11
11
|
import datetime
|
|
12
12
|
import warnings
|
|
13
|
+
from collections.abc import Iterator
|
|
13
14
|
from functools import reduce
|
|
14
15
|
from math import gcd
|
|
15
16
|
from typing import Any
|
|
16
|
-
from typing import Dict
|
|
17
|
-
from typing import Iterator
|
|
18
|
-
from typing import List
|
|
19
|
-
from typing import Optional
|
|
20
|
-
from typing import Tuple
|
|
21
|
-
from typing import Union
|
|
22
17
|
|
|
23
18
|
# from anemoi.utils.dates import as_datetime
|
|
24
19
|
from anemoi.utils.dates import DateTimes
|
|
@@ -29,7 +24,7 @@ from anemoi.utils.hindcasts import HindcastDatesTimes
|
|
|
29
24
|
from anemoi.utils.humanize import print_dates
|
|
30
25
|
|
|
31
26
|
|
|
32
|
-
def extend(x:
|
|
27
|
+
def extend(x: str | list[Any] | tuple[Any, ...]) -> Iterator[datetime.datetime]:
|
|
33
28
|
"""Extend a date range or list of dates into individual datetime objects.
|
|
34
29
|
|
|
35
30
|
Args:
|
|
@@ -63,6 +58,8 @@ def extend(x: Union[str, List[Any], Tuple[Any, ...]]) -> Iterator[datetime.datet
|
|
|
63
58
|
class DatesProvider:
|
|
64
59
|
"""Base class for date generation.
|
|
65
60
|
|
|
61
|
+
Examples
|
|
62
|
+
--------
|
|
66
63
|
>>> DatesProvider.from_config(**{"start": "2023-01-01 00:00", "end": "2023-01-02 00:00", "frequency": "1d"}).values
|
|
67
64
|
[datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 2, 0, 0)]
|
|
68
65
|
|
|
@@ -86,7 +83,7 @@ class DatesProvider:
|
|
|
86
83
|
3
|
|
87
84
|
"""
|
|
88
85
|
|
|
89
|
-
def __init__(self, missing:
|
|
86
|
+
def __init__(self, missing: list[str | datetime.datetime] | None = None) -> None:
|
|
90
87
|
"""Initialize the DatesProvider with optional missing dates.
|
|
91
88
|
|
|
92
89
|
Parameters
|
|
@@ -168,7 +165,7 @@ class ValuesDates(DatesProvider):
|
|
|
168
165
|
**kwargs (Any): Additional arguments.
|
|
169
166
|
"""
|
|
170
167
|
|
|
171
|
-
def __init__(self, values:
|
|
168
|
+
def __init__(self, values: list[str | datetime.datetime], **kwargs: Any) -> None:
|
|
172
169
|
"""Initialize ValuesDates with a list of values.
|
|
173
170
|
|
|
174
171
|
Args:
|
|
@@ -188,7 +185,7 @@ class ValuesDates(DatesProvider):
|
|
|
188
185
|
"""
|
|
189
186
|
return f"{self.__class__.__name__}({self.values[0]}..{self.values[-1]})"
|
|
190
187
|
|
|
191
|
-
def as_dict(self) ->
|
|
188
|
+
def as_dict(self) -> dict[str, Any]:
|
|
192
189
|
"""Convert the ValuesDates instance to a dictionary.
|
|
193
190
|
|
|
194
191
|
Returns
|
|
@@ -215,9 +212,9 @@ class StartEndDates(DatesProvider):
|
|
|
215
212
|
|
|
216
213
|
def __init__(
|
|
217
214
|
self,
|
|
218
|
-
start:
|
|
219
|
-
end:
|
|
220
|
-
frequency:
|
|
215
|
+
start: str | datetime.datetime,
|
|
216
|
+
end: str | datetime.datetime,
|
|
217
|
+
frequency: int | str = 1,
|
|
221
218
|
**kwargs: Any,
|
|
222
219
|
) -> None:
|
|
223
220
|
"""Initialize StartEndDates with start, end, and frequency.
|
|
@@ -259,7 +256,7 @@ class StartEndDates(DatesProvider):
|
|
|
259
256
|
|
|
260
257
|
super().__init__(missing=missing)
|
|
261
258
|
|
|
262
|
-
def as_dict(self) ->
|
|
259
|
+
def as_dict(self) -> dict[str, Any]:
|
|
263
260
|
"""Convert the StartEndDates instance to a dictionary.
|
|
264
261
|
|
|
265
262
|
Returns
|
|
@@ -314,9 +311,9 @@ class HindcastsDates(DatesProvider):
|
|
|
314
311
|
|
|
315
312
|
def __init__(
|
|
316
313
|
self,
|
|
317
|
-
start:
|
|
318
|
-
end:
|
|
319
|
-
steps:
|
|
314
|
+
start: str | list[str],
|
|
315
|
+
end: str | list[str],
|
|
316
|
+
steps: list[int] = [0],
|
|
320
317
|
years: int = 20,
|
|
321
318
|
**kwargs: Any,
|
|
322
319
|
) -> None:
|
|
@@ -403,7 +400,7 @@ class HindcastsDates(DatesProvider):
|
|
|
403
400
|
"""
|
|
404
401
|
return f"{self.__class__.__name__}({self.values[0]}..{self.values[-1]})"
|
|
405
402
|
|
|
406
|
-
def as_dict(self) ->
|
|
403
|
+
def as_dict(self) -> dict[str, Any]:
|
|
407
404
|
"""Convert the HindcastsDates instance to a dictionary.
|
|
408
405
|
|
|
409
406
|
Returns
|
anemoi/datasets/dates/groups.py
CHANGED
|
@@ -12,19 +12,16 @@ import datetime
|
|
|
12
12
|
import itertools
|
|
13
13
|
from abc import ABC
|
|
14
14
|
from abc import abstractmethod
|
|
15
|
+
from collections.abc import Callable
|
|
16
|
+
from collections.abc import Iterator
|
|
15
17
|
from functools import cached_property
|
|
16
18
|
from typing import Any
|
|
17
|
-
from typing import Callable
|
|
18
|
-
from typing import Iterator
|
|
19
|
-
from typing import List
|
|
20
|
-
from typing import Tuple
|
|
21
|
-
from typing import Union
|
|
22
19
|
|
|
23
20
|
from anemoi.datasets.dates import DatesProvider
|
|
24
21
|
from anemoi.datasets.dates import as_datetime
|
|
25
22
|
|
|
26
23
|
|
|
27
|
-
def _shorten(dates:
|
|
24
|
+
def _shorten(dates: list[datetime.datetime] | tuple[datetime.datetime, ...]) -> str | list[str]:
|
|
28
25
|
"""Shorten the list of dates for display.
|
|
29
26
|
|
|
30
27
|
Args:
|
|
@@ -43,7 +40,7 @@ def _shorten(dates: Union[List[datetime.datetime], Tuple[datetime.datetime, ...]
|
|
|
43
40
|
class GroupOfDates:
|
|
44
41
|
"""A class to represent a group of dates."""
|
|
45
42
|
|
|
46
|
-
def __init__(self, dates:
|
|
43
|
+
def __init__(self, dates: list[datetime.datetime], provider: DatesProvider, partial_ok: bool = False) -> None:
|
|
47
44
|
assert isinstance(provider, DatesProvider), type(provider)
|
|
48
45
|
assert isinstance(dates, list)
|
|
49
46
|
|
|
@@ -197,10 +194,10 @@ class Groups:
|
|
|
197
194
|
class Filter:
|
|
198
195
|
"""A class to filter out missing dates."""
|
|
199
196
|
|
|
200
|
-
def __init__(self, missing:
|
|
201
|
-
self.missing =
|
|
197
|
+
def __init__(self, missing: list[datetime.datetime]) -> None:
|
|
198
|
+
self.missing = {as_datetime(m) for m in missing}
|
|
202
199
|
|
|
203
|
-
def __call__(self, dates:
|
|
200
|
+
def __call__(self, dates: list[datetime.datetime]) -> list[datetime.datetime]:
|
|
204
201
|
"""Filter out missing dates from the list of dates.
|
|
205
202
|
|
|
206
203
|
Args:
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# (C) Copyright 2025 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
import datetime
|
|
11
|
+
import io
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
import ruamel.yaml
|
|
15
|
+
|
|
16
|
+
LOG = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def represent_date(dumper, data):
|
|
20
|
+
|
|
21
|
+
if isinstance(data, datetime.datetime):
|
|
22
|
+
if data.tzinfo is None:
|
|
23
|
+
data = data.replace(tzinfo=datetime.timezone.utc)
|
|
24
|
+
data = data.astimezone(datetime.timezone.utc)
|
|
25
|
+
iso_str = data.replace(tzinfo=None).isoformat(timespec="seconds") + "Z"
|
|
26
|
+
else:
|
|
27
|
+
iso_str = data.isoformat()
|
|
28
|
+
|
|
29
|
+
return dumper.represent_scalar("tag:yaml.org,2002:timestamp", iso_str)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# --- Represent multiline strings with | style ---
|
|
33
|
+
def represent_multiline_str(dumper, data):
|
|
34
|
+
if "\n" in data:
|
|
35
|
+
return dumper.represent_scalar("tag:yaml.org,2002:str", data.strip(), style="|")
|
|
36
|
+
return dumper.represent_scalar("tag:yaml.org,2002:str", data)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# --- Represent short lists inline (flow style) ---
|
|
40
|
+
def represent_inline_list(dumper, data):
|
|
41
|
+
|
|
42
|
+
if not all(isinstance(i, (str, int, float, bool, type(None))) for i in data):
|
|
43
|
+
return dumper.represent_sequence("tag:yaml.org,2002:seq", data)
|
|
44
|
+
|
|
45
|
+
return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def yaml_dump(obj, order=None, stream=None, **kwargs):
|
|
49
|
+
|
|
50
|
+
if order:
|
|
51
|
+
|
|
52
|
+
def _ordering(k):
|
|
53
|
+
return order.index(k) if k in order else len(order)
|
|
54
|
+
|
|
55
|
+
obj = {k: v for k, v in sorted(obj.items(), key=lambda item: _ordering(item[0]))}
|
|
56
|
+
|
|
57
|
+
yaml = ruamel.yaml.YAML()
|
|
58
|
+
yaml.width = 120 # wrap long flow sequences
|
|
59
|
+
|
|
60
|
+
yaml.Representer.add_representer(datetime.date, represent_date)
|
|
61
|
+
yaml.Representer.add_representer(datetime.datetime, represent_date)
|
|
62
|
+
yaml.Representer.add_representer(str, represent_multiline_str)
|
|
63
|
+
yaml.Representer.add_representer(list, represent_inline_list)
|
|
64
|
+
|
|
65
|
+
data = ruamel.yaml.comments.CommentedMap()
|
|
66
|
+
for i, (k, v) in enumerate(obj.items()):
|
|
67
|
+
data[k] = v
|
|
68
|
+
if i > 0:
|
|
69
|
+
data.yaml_set_comment_before_after_key(key=k, before="\n")
|
|
70
|
+
|
|
71
|
+
if stream:
|
|
72
|
+
yaml.dump(data, stream=stream, **kwargs)
|
|
73
|
+
|
|
74
|
+
stream = io.StringIO()
|
|
75
|
+
yaml.dump(data, stream=stream, **kwargs)
|
|
76
|
+
return stream.getvalue()
|