anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +4 -1
- anemoi/datasets/__main__.py +12 -2
- anemoi/datasets/_version.py +9 -4
- anemoi/datasets/commands/cleanup.py +17 -2
- anemoi/datasets/commands/compare.py +18 -2
- anemoi/datasets/commands/copy.py +196 -14
- anemoi/datasets/commands/create.py +50 -7
- anemoi/datasets/commands/finalise-additions.py +17 -2
- anemoi/datasets/commands/finalise.py +17 -2
- anemoi/datasets/commands/init-additions.py +17 -2
- anemoi/datasets/commands/init.py +16 -2
- anemoi/datasets/commands/inspect.py +283 -62
- anemoi/datasets/commands/load-additions.py +16 -2
- anemoi/datasets/commands/load.py +16 -2
- anemoi/datasets/commands/patch.py +17 -2
- anemoi/datasets/commands/publish.py +17 -2
- anemoi/datasets/commands/scan.py +31 -3
- anemoi/datasets/compute/recentre.py +47 -11
- anemoi/datasets/create/__init__.py +612 -85
- anemoi/datasets/create/check.py +142 -20
- anemoi/datasets/create/chunks.py +64 -4
- anemoi/datasets/create/config.py +185 -21
- anemoi/datasets/create/filter.py +50 -0
- anemoi/datasets/create/filters/__init__.py +33 -0
- anemoi/datasets/create/filters/empty.py +37 -0
- anemoi/datasets/create/filters/legacy.py +93 -0
- anemoi/datasets/create/filters/noop.py +37 -0
- anemoi/datasets/create/filters/orog_to_z.py +58 -0
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
- anemoi/datasets/create/filters/rename.py +205 -0
- anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
- anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
- anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
- anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
- anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
- anemoi/datasets/create/filters/transform.py +53 -0
- anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
- anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
- anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
- anemoi/datasets/create/input/__init__.py +76 -5
- anemoi/datasets/create/input/action.py +149 -13
- anemoi/datasets/create/input/concat.py +81 -10
- anemoi/datasets/create/input/context.py +39 -4
- anemoi/datasets/create/input/data_sources.py +72 -6
- anemoi/datasets/create/input/empty.py +21 -3
- anemoi/datasets/create/input/filter.py +60 -12
- anemoi/datasets/create/input/function.py +154 -37
- anemoi/datasets/create/input/join.py +86 -14
- anemoi/datasets/create/input/misc.py +67 -17
- anemoi/datasets/create/input/pipe.py +33 -6
- anemoi/datasets/create/input/repeated_dates.py +189 -41
- anemoi/datasets/create/input/result.py +202 -87
- anemoi/datasets/create/input/step.py +119 -22
- anemoi/datasets/create/input/template.py +100 -13
- anemoi/datasets/create/input/trace.py +62 -7
- anemoi/datasets/create/patch.py +52 -4
- anemoi/datasets/create/persistent.py +134 -17
- anemoi/datasets/create/size.py +15 -1
- anemoi/datasets/create/source.py +51 -0
- anemoi/datasets/create/sources/__init__.py +36 -0
- anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
- anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
- anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
- anemoi/datasets/create/sources/empty.py +37 -0
- anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
- anemoi/datasets/create/sources/grib.py +297 -0
- anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
- anemoi/datasets/create/sources/legacy.py +93 -0
- anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
- anemoi/datasets/create/sources/netcdf.py +42 -0
- anemoi/datasets/create/sources/opendap.py +43 -0
- anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
- anemoi/datasets/create/sources/recentre.py +150 -0
- anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
- anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
- anemoi/datasets/create/sources/xarray.py +92 -0
- anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
- anemoi/datasets/create/sources/xarray_support/README.md +1 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
- anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
- anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
- anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
- anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
- anemoi/datasets/create/sources/xarray_support/time.py +391 -0
- anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
- anemoi/datasets/create/sources/xarray_zarr.py +41 -0
- anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
- anemoi/datasets/create/statistics/__init__.py +233 -44
- anemoi/datasets/create/statistics/summary.py +52 -6
- anemoi/datasets/create/testing.py +76 -0
- anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
- anemoi/datasets/create/utils.py +97 -6
- anemoi/datasets/create/writer.py +26 -4
- anemoi/datasets/create/zarr.py +170 -23
- anemoi/datasets/data/__init__.py +51 -4
- anemoi/datasets/data/complement.py +191 -40
- anemoi/datasets/data/concat.py +141 -16
- anemoi/datasets/data/dataset.py +552 -61
- anemoi/datasets/data/debug.py +197 -26
- anemoi/datasets/data/ensemble.py +93 -8
- anemoi/datasets/data/fill_missing.py +165 -18
- anemoi/datasets/data/forwards.py +428 -56
- anemoi/datasets/data/grids.py +323 -97
- anemoi/datasets/data/indexing.py +112 -19
- anemoi/datasets/data/interpolate.py +92 -12
- anemoi/datasets/data/join.py +158 -19
- anemoi/datasets/data/masked.py +129 -15
- anemoi/datasets/data/merge.py +137 -23
- anemoi/datasets/data/misc.py +172 -16
- anemoi/datasets/data/missing.py +233 -29
- anemoi/datasets/data/rescale.py +111 -10
- anemoi/datasets/data/select.py +168 -26
- anemoi/datasets/data/statistics.py +67 -6
- anemoi/datasets/data/stores.py +149 -64
- anemoi/datasets/data/subset.py +159 -25
- anemoi/datasets/data/unchecked.py +168 -57
- anemoi/datasets/data/xy.py +168 -25
- anemoi/datasets/dates/__init__.py +191 -16
- anemoi/datasets/dates/groups.py +189 -47
- anemoi/datasets/grids.py +270 -31
- anemoi/datasets/testing.py +28 -1
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +9 -6
- anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/functions/__init__.py +0 -66
- anemoi/datasets/create/functions/filters/__init__.py +0 -9
- anemoi/datasets/create/functions/filters/empty.py +0 -17
- anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/functions/filters/rename.py +0 -79
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
- anemoi/datasets/create/functions/sources/empty.py +0 -15
- anemoi/datasets/create/functions/sources/grib.py +0 -150
- anemoi/datasets/create/functions/sources/netcdf.py +0 -15
- anemoi/datasets/create/functions/sources/opendap.py +0 -15
- anemoi/datasets/create/functions/sources/recentre.py +0 -60
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
- anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
- anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
- anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
- anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
- anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
- anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
- anemoi/datasets/utils/fields.py +0 -47
- anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
|
@@ -15,10 +15,14 @@ import time
|
|
|
15
15
|
import uuid
|
|
16
16
|
import warnings
|
|
17
17
|
from functools import cached_property
|
|
18
|
+
from typing import Any
|
|
19
|
+
from typing import Optional
|
|
20
|
+
from typing import Union
|
|
18
21
|
|
|
19
22
|
import cftime
|
|
20
23
|
import numpy as np
|
|
21
24
|
import tqdm
|
|
25
|
+
import zarr
|
|
22
26
|
from anemoi.utils.dates import as_datetime
|
|
23
27
|
from anemoi.utils.dates import frequency_to_string
|
|
24
28
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
@@ -55,8 +59,19 @@ LOG = logging.getLogger(__name__)
|
|
|
55
59
|
VERSION = "0.30"
|
|
56
60
|
|
|
57
61
|
|
|
58
|
-
def json_tidy(o):
|
|
62
|
+
def json_tidy(o: Any) -> Any:
|
|
63
|
+
"""Convert various types to JSON serializable format.
|
|
59
64
|
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
o : Any
|
|
68
|
+
The object to convert.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
Any
|
|
73
|
+
The JSON serializable object.
|
|
74
|
+
"""
|
|
60
75
|
if isinstance(o, datetime.datetime):
|
|
61
76
|
return o.isoformat()
|
|
62
77
|
|
|
@@ -85,12 +100,24 @@ def json_tidy(o):
|
|
|
85
100
|
raise TypeError(f"{repr(o)} is not JSON serializable {type(o)}")
|
|
86
101
|
|
|
87
102
|
|
|
88
|
-
def build_statistics_dates(
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
103
|
+
def build_statistics_dates(
|
|
104
|
+
dates: list[datetime.datetime], start: Optional[datetime.datetime], end: Optional[datetime.datetime]
|
|
105
|
+
) -> tuple[str, str]:
|
|
106
|
+
"""Compute the start and end dates for the statistics.
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
dates : list of datetime.datetime
|
|
111
|
+
The list of dates.
|
|
112
|
+
start : Optional[datetime.datetime]
|
|
113
|
+
The start date.
|
|
114
|
+
end : Optional[datetime.datetime]
|
|
115
|
+
The end date.
|
|
116
|
+
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
tuple of str
|
|
120
|
+
The start and end dates in ISO format.
|
|
94
121
|
"""
|
|
95
122
|
# if not specified, use the default statistics dates
|
|
96
123
|
default_start, default_end = default_statistics_dates(dates)
|
|
@@ -109,7 +136,19 @@ def build_statistics_dates(dates, start, end):
|
|
|
109
136
|
return (start.isoformat(), end.isoformat())
|
|
110
137
|
|
|
111
138
|
|
|
112
|
-
def _path_readable(path):
|
|
139
|
+
def _path_readable(path: str) -> bool:
|
|
140
|
+
"""Check if the path is readable.
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
path : str
|
|
145
|
+
The path to check.
|
|
146
|
+
|
|
147
|
+
Returns
|
|
148
|
+
-------
|
|
149
|
+
bool
|
|
150
|
+
True if the path is readable, False otherwise.
|
|
151
|
+
"""
|
|
113
152
|
import zarr
|
|
114
153
|
|
|
115
154
|
try:
|
|
@@ -120,14 +159,37 @@ def _path_readable(path):
|
|
|
120
159
|
|
|
121
160
|
|
|
122
161
|
class Dataset:
|
|
123
|
-
|
|
162
|
+
"""A class to represent a dataset."""
|
|
163
|
+
|
|
164
|
+
def __init__(self, path: str):
|
|
165
|
+
"""Initialize a Dataset instance.
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
path : str
|
|
170
|
+
The path to the dataset.
|
|
171
|
+
"""
|
|
124
172
|
self.path = path
|
|
125
173
|
|
|
126
174
|
_, ext = os.path.splitext(self.path)
|
|
127
175
|
if ext != ".zarr":
|
|
128
176
|
raise ValueError(f"Unsupported extension={ext} for path={self.path}")
|
|
129
177
|
|
|
130
|
-
def add_dataset(self, mode="r+", **kwargs):
|
|
178
|
+
def add_dataset(self, mode: str = "r+", **kwargs: Any) -> zarr.Array:
|
|
179
|
+
"""Add a dataset to the Zarr store.
|
|
180
|
+
|
|
181
|
+
Parameters
|
|
182
|
+
----------
|
|
183
|
+
mode : str, optional
|
|
184
|
+
The mode to open the Zarr store.
|
|
185
|
+
**kwargs
|
|
186
|
+
Additional arguments for the dataset.
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
zarr.Array
|
|
191
|
+
The added dataset.
|
|
192
|
+
"""
|
|
131
193
|
import zarr
|
|
132
194
|
|
|
133
195
|
z = zarr.open(self.path, mode=mode)
|
|
@@ -135,7 +197,14 @@ class Dataset:
|
|
|
135
197
|
|
|
136
198
|
return add_zarr_dataset(zarr_root=z, **kwargs)
|
|
137
199
|
|
|
138
|
-
def update_metadata(self, **kwargs):
|
|
200
|
+
def update_metadata(self, **kwargs: Any) -> None:
|
|
201
|
+
"""Update the metadata of the dataset.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
**kwargs
|
|
206
|
+
The metadata to update.
|
|
207
|
+
"""
|
|
139
208
|
import zarr
|
|
140
209
|
|
|
141
210
|
LOG.debug(f"Updating metadata {kwargs}")
|
|
@@ -148,16 +217,19 @@ class Dataset:
|
|
|
148
217
|
z.attrs[k] = json.loads(json.dumps(v, default=json_tidy))
|
|
149
218
|
|
|
150
219
|
@cached_property
|
|
151
|
-
def anemoi_dataset(self):
|
|
220
|
+
def anemoi_dataset(self) -> Any:
|
|
221
|
+
"""Get the Anemoi dataset."""
|
|
152
222
|
return open_dataset(self.path)
|
|
153
223
|
|
|
154
224
|
@cached_property
|
|
155
|
-
def zarr_metadata(self):
|
|
225
|
+
def zarr_metadata(self) -> dict:
|
|
226
|
+
"""Get the Zarr metadata."""
|
|
156
227
|
import zarr
|
|
157
228
|
|
|
158
229
|
return dict(zarr.open(self.path, mode="r").attrs)
|
|
159
230
|
|
|
160
|
-
def print_info(self):
|
|
231
|
+
def print_info(self) -> None:
|
|
232
|
+
"""Print information about the dataset."""
|
|
161
233
|
import zarr
|
|
162
234
|
|
|
163
235
|
z = zarr.open(self.path, mode="r")
|
|
@@ -166,13 +238,42 @@ class Dataset:
|
|
|
166
238
|
except Exception as e:
|
|
167
239
|
LOG.info(e)
|
|
168
240
|
|
|
169
|
-
def get_zarr_chunks(self):
|
|
241
|
+
def get_zarr_chunks(self) -> tuple:
|
|
242
|
+
"""Get the chunks of the Zarr dataset.
|
|
243
|
+
|
|
244
|
+
Returns
|
|
245
|
+
-------
|
|
246
|
+
tuple
|
|
247
|
+
The chunks of the Zarr dataset.
|
|
248
|
+
"""
|
|
170
249
|
import zarr
|
|
171
250
|
|
|
172
251
|
z = zarr.open(self.path, mode="r")
|
|
173
252
|
return z["data"].chunks
|
|
174
253
|
|
|
175
|
-
def check_name(
|
|
254
|
+
def check_name(
|
|
255
|
+
self,
|
|
256
|
+
resolution: str,
|
|
257
|
+
dates: list[datetime.datetime],
|
|
258
|
+
frequency: datetime.timedelta,
|
|
259
|
+
raise_exception: bool = True,
|
|
260
|
+
is_test: bool = False,
|
|
261
|
+
) -> None:
|
|
262
|
+
"""Check the name of the dataset.
|
|
263
|
+
|
|
264
|
+
Parameters
|
|
265
|
+
----------
|
|
266
|
+
resolution : str
|
|
267
|
+
The resolution of the dataset.
|
|
268
|
+
dates : list of datetime.datetime
|
|
269
|
+
The dates of the dataset.
|
|
270
|
+
frequency : datetime.timedelta
|
|
271
|
+
The frequency of the dataset.
|
|
272
|
+
raise_exception : bool, optional
|
|
273
|
+
Whether to raise an exception if the name is invalid.
|
|
274
|
+
is_test : bool, optional
|
|
275
|
+
Whether this is a test.
|
|
276
|
+
"""
|
|
176
277
|
basename, _ = os.path.splitext(os.path.basename(self.path))
|
|
177
278
|
try:
|
|
178
279
|
DatasetName(basename, resolution, dates[0], dates[-1], frequency).raise_if_not_valid()
|
|
@@ -182,8 +283,14 @@ class Dataset:
|
|
|
182
283
|
else:
|
|
183
284
|
LOG.warning(f"Dataset name error: {e}")
|
|
184
285
|
|
|
185
|
-
def get_main_config(self):
|
|
186
|
-
"""
|
|
286
|
+
def get_main_config(self) -> Any:
|
|
287
|
+
"""Get the main configuration of the dataset.
|
|
288
|
+
|
|
289
|
+
Returns
|
|
290
|
+
-------
|
|
291
|
+
Any
|
|
292
|
+
The main configuration.
|
|
293
|
+
"""
|
|
187
294
|
import zarr
|
|
188
295
|
|
|
189
296
|
z = zarr.open(self.path, mode="r")
|
|
@@ -191,7 +298,16 @@ class Dataset:
|
|
|
191
298
|
|
|
192
299
|
|
|
193
300
|
class WritableDataset(Dataset):
|
|
194
|
-
|
|
301
|
+
"""A class to represent a writable dataset."""
|
|
302
|
+
|
|
303
|
+
def __init__(self, path: str):
|
|
304
|
+
"""Initialize a WritableDataset instance.
|
|
305
|
+
|
|
306
|
+
Parameters
|
|
307
|
+
----------
|
|
308
|
+
path : str
|
|
309
|
+
The path to the dataset.
|
|
310
|
+
"""
|
|
195
311
|
super().__init__(path)
|
|
196
312
|
self.path = path
|
|
197
313
|
|
|
@@ -200,14 +316,26 @@ class WritableDataset(Dataset):
|
|
|
200
316
|
self.z = zarr.open(self.path, mode="r+")
|
|
201
317
|
|
|
202
318
|
@cached_property
|
|
203
|
-
def data_array(self):
|
|
319
|
+
def data_array(self) -> Any:
|
|
320
|
+
"""Get the data array of the dataset."""
|
|
204
321
|
import zarr
|
|
205
322
|
|
|
206
323
|
return zarr.open(self.path, mode="r+")["data"]
|
|
207
324
|
|
|
208
325
|
|
|
209
326
|
class NewDataset(Dataset):
|
|
210
|
-
|
|
327
|
+
"""A class to represent a new dataset."""
|
|
328
|
+
|
|
329
|
+
def __init__(self, path: str, overwrite: bool = False):
|
|
330
|
+
"""Initialize a NewDataset instance.
|
|
331
|
+
|
|
332
|
+
Parameters
|
|
333
|
+
----------
|
|
334
|
+
path : str
|
|
335
|
+
The path to the dataset.
|
|
336
|
+
overwrite : bool, optional
|
|
337
|
+
Whether to overwrite the existing dataset.
|
|
338
|
+
"""
|
|
211
339
|
super().__init__(path)
|
|
212
340
|
self.path = path
|
|
213
341
|
|
|
@@ -218,9 +346,20 @@ class NewDataset(Dataset):
|
|
|
218
346
|
|
|
219
347
|
|
|
220
348
|
class Actor: # TODO: rename to Creator
|
|
349
|
+
"""A base class for dataset creation actors."""
|
|
350
|
+
|
|
221
351
|
dataset_class = WritableDataset
|
|
222
352
|
|
|
223
|
-
def __init__(self, path, cache=None):
|
|
353
|
+
def __init__(self, path: str, cache: Optional[str] = None):
|
|
354
|
+
"""Initialize an Actor instance.
|
|
355
|
+
|
|
356
|
+
Parameters
|
|
357
|
+
----------
|
|
358
|
+
path : str
|
|
359
|
+
The path to the dataset.
|
|
360
|
+
cache : Optional[str], optional
|
|
361
|
+
The cache directory.
|
|
362
|
+
"""
|
|
224
363
|
# Catch all floating point errors, including overflow, sqrt(<0), etc
|
|
225
364
|
np.seterr(all="raise", under="warn")
|
|
226
365
|
|
|
@@ -228,23 +367,52 @@ class Actor: # TODO: rename to Creator
|
|
|
228
367
|
self.cache = cache
|
|
229
368
|
self.dataset = self.dataset_class(self.path)
|
|
230
369
|
|
|
231
|
-
def run(self):
|
|
370
|
+
def run(self) -> None:
|
|
371
|
+
"""Run the actor."""
|
|
232
372
|
# to be implemented in the sub-classes
|
|
233
373
|
raise NotImplementedError()
|
|
234
374
|
|
|
235
|
-
def update_metadata(self, **kwargs):
|
|
375
|
+
def update_metadata(self, **kwargs: Any) -> None:
|
|
376
|
+
"""Update the metadata of the dataset.
|
|
377
|
+
|
|
378
|
+
Parameters
|
|
379
|
+
----------
|
|
380
|
+
**kwargs
|
|
381
|
+
The metadata to update.
|
|
382
|
+
"""
|
|
236
383
|
self.dataset.update_metadata(**kwargs)
|
|
237
384
|
|
|
238
|
-
def _cache_context(self):
|
|
385
|
+
def _cache_context(self) -> Any:
|
|
386
|
+
"""Get the cache context.
|
|
387
|
+
|
|
388
|
+
Returns
|
|
389
|
+
-------
|
|
390
|
+
Any
|
|
391
|
+
The cache context.
|
|
392
|
+
"""
|
|
239
393
|
from .utils import cache_context
|
|
240
394
|
|
|
241
395
|
return cache_context(self.cache)
|
|
242
396
|
|
|
243
|
-
def check_unkown_kwargs(self, kwargs):
|
|
397
|
+
def check_unkown_kwargs(self, kwargs: dict) -> None:
|
|
398
|
+
"""Check for unknown keyword arguments.
|
|
399
|
+
|
|
400
|
+
Parameters
|
|
401
|
+
----------
|
|
402
|
+
kwargs : dict
|
|
403
|
+
The keyword arguments.
|
|
404
|
+
"""
|
|
244
405
|
# remove this latter
|
|
245
406
|
LOG.warning(f"💬 Unknown kwargs for {self.__class__.__name__}: {kwargs}")
|
|
246
407
|
|
|
247
|
-
def read_dataset_metadata(self, path):
|
|
408
|
+
def read_dataset_metadata(self, path: str) -> None:
|
|
409
|
+
"""Read the metadata of the dataset.
|
|
410
|
+
|
|
411
|
+
Parameters
|
|
412
|
+
----------
|
|
413
|
+
path : str
|
|
414
|
+
The path to the dataset.
|
|
415
|
+
"""
|
|
248
416
|
ds = open_dataset(path)
|
|
249
417
|
self.dataset_shape = ds.shape
|
|
250
418
|
self.variables_names = ds.variables
|
|
@@ -253,7 +421,19 @@ class Actor: # TODO: rename to Creator
|
|
|
253
421
|
|
|
254
422
|
self.missing_dates = sorted(list([self.dates[i] for i in ds.missing]))
|
|
255
423
|
|
|
256
|
-
def check_missing_dates(expected):
|
|
424
|
+
def check_missing_dates(expected: list[np.datetime64]) -> None:
|
|
425
|
+
"""Check if the missing dates in the dataset match the expected dates.
|
|
426
|
+
|
|
427
|
+
Parameters
|
|
428
|
+
----------
|
|
429
|
+
expected : list of np.datetime64
|
|
430
|
+
The expected missing dates.
|
|
431
|
+
|
|
432
|
+
Raises
|
|
433
|
+
------
|
|
434
|
+
ValueError
|
|
435
|
+
If the missing dates in the dataset do not match the expected dates.
|
|
436
|
+
"""
|
|
257
437
|
import zarr
|
|
258
438
|
|
|
259
439
|
z = zarr.open(path, "r")
|
|
@@ -269,21 +449,43 @@ class Actor: # TODO: rename to Creator
|
|
|
269
449
|
|
|
270
450
|
|
|
271
451
|
class Patch(Actor):
|
|
272
|
-
|
|
452
|
+
"""A class to apply patches to a dataset."""
|
|
453
|
+
|
|
454
|
+
def __init__(self, path: str, options: dict = None, **kwargs: Any):
|
|
455
|
+
"""Initialize a Patch instance.
|
|
456
|
+
|
|
457
|
+
Parameters
|
|
458
|
+
----------
|
|
459
|
+
path : str
|
|
460
|
+
The path to the dataset.
|
|
461
|
+
options : dict, optional
|
|
462
|
+
The patch options.
|
|
463
|
+
"""
|
|
273
464
|
self.path = path
|
|
274
465
|
self.options = options or {}
|
|
275
466
|
|
|
276
|
-
def run(self):
|
|
467
|
+
def run(self) -> None:
|
|
468
|
+
"""Run the patch."""
|
|
277
469
|
from .patch import apply_patch
|
|
278
470
|
|
|
279
471
|
apply_patch(self.path, **self.options)
|
|
280
472
|
|
|
281
473
|
|
|
282
474
|
class Size(Actor):
|
|
283
|
-
|
|
475
|
+
"""A class to compute the size of a dataset."""
|
|
476
|
+
|
|
477
|
+
def __init__(self, path: str, **kwargs: Any):
|
|
478
|
+
"""Initialize a Size instance.
|
|
479
|
+
|
|
480
|
+
Parameters
|
|
481
|
+
----------
|
|
482
|
+
path : str
|
|
483
|
+
The path to the dataset.
|
|
484
|
+
"""
|
|
284
485
|
super().__init__(path)
|
|
285
486
|
|
|
286
|
-
def run(self):
|
|
487
|
+
def run(self) -> None:
|
|
488
|
+
"""Run the size computation."""
|
|
287
489
|
from .size import compute_directory_sizes
|
|
288
490
|
|
|
289
491
|
metadata = compute_directory_sizes(self.path)
|
|
@@ -301,23 +503,37 @@ class Size(Actor):
|
|
|
301
503
|
|
|
302
504
|
|
|
303
505
|
class HasRegistryMixin:
|
|
506
|
+
"""A mixin class to provide registry functionality."""
|
|
507
|
+
|
|
304
508
|
@cached_property
|
|
305
|
-
def registry(self):
|
|
509
|
+
def registry(self) -> Any:
|
|
510
|
+
"""Get the registry."""
|
|
306
511
|
from .zarr import ZarrBuiltRegistry
|
|
307
512
|
|
|
308
513
|
return ZarrBuiltRegistry(self.path, use_threads=self.use_threads)
|
|
309
514
|
|
|
310
515
|
|
|
311
516
|
class HasStatisticTempMixin:
|
|
517
|
+
"""A mixin class to provide temporary statistics functionality."""
|
|
518
|
+
|
|
312
519
|
@cached_property
|
|
313
|
-
def tmp_statistics(self):
|
|
520
|
+
def tmp_statistics(self) -> TmpStatistics:
|
|
521
|
+
"""Get the temporary statistics."""
|
|
314
522
|
directory = self.statistics_temp_dir or os.path.join(self.path + ".storage_for_statistics.tmp")
|
|
315
523
|
return TmpStatistics(directory)
|
|
316
524
|
|
|
317
525
|
|
|
318
526
|
class HasElementForDataMixin:
|
|
319
|
-
|
|
527
|
+
"""A mixin class to provide element creation functionality for data."""
|
|
528
|
+
|
|
529
|
+
def create_elements(self, config: Any) -> None:
|
|
530
|
+
"""Create elements for the dataset.
|
|
320
531
|
|
|
532
|
+
Parameters
|
|
533
|
+
----------
|
|
534
|
+
config : Any
|
|
535
|
+
The configuration.
|
|
536
|
+
"""
|
|
321
537
|
assert self.registry
|
|
322
538
|
assert self.tmp_statistics
|
|
323
539
|
|
|
@@ -329,11 +545,24 @@ class HasElementForDataMixin:
|
|
|
329
545
|
self.output = build_output(config.output, parent=self)
|
|
330
546
|
|
|
331
547
|
self.input = build_input_(main_config=config, output_config=self.output)
|
|
332
|
-
LOG.info("%s", self.input)
|
|
548
|
+
# LOG.info("%s", self.input)
|
|
333
549
|
|
|
334
550
|
|
|
335
|
-
def build_input_(main_config, output_config):
|
|
551
|
+
def build_input_(main_config: Any, output_config: Any) -> Any:
|
|
552
|
+
"""Build the input for the dataset.
|
|
336
553
|
|
|
554
|
+
Parameters
|
|
555
|
+
----------
|
|
556
|
+
main_config : Any
|
|
557
|
+
The main configuration.
|
|
558
|
+
output_config : Any
|
|
559
|
+
The output configuration.
|
|
560
|
+
|
|
561
|
+
Returns
|
|
562
|
+
-------
|
|
563
|
+
Any
|
|
564
|
+
The input builder.
|
|
565
|
+
"""
|
|
337
566
|
builder = build_input(
|
|
338
567
|
main_config.input,
|
|
339
568
|
data_sources=main_config.get("data_sources", {}),
|
|
@@ -348,21 +577,46 @@ def build_input_(main_config, output_config):
|
|
|
348
577
|
|
|
349
578
|
|
|
350
579
|
class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
|
|
580
|
+
"""A class to initialize a new dataset."""
|
|
581
|
+
|
|
351
582
|
dataset_class = NewDataset
|
|
352
583
|
|
|
353
584
|
def __init__(
|
|
354
585
|
self,
|
|
355
|
-
path,
|
|
356
|
-
config,
|
|
357
|
-
check_name=False,
|
|
358
|
-
overwrite=False,
|
|
359
|
-
use_threads=False,
|
|
360
|
-
statistics_temp_dir=None,
|
|
361
|
-
progress=None,
|
|
362
|
-
test=False,
|
|
363
|
-
cache=None,
|
|
364
|
-
**kwargs,
|
|
586
|
+
path: str,
|
|
587
|
+
config: dict,
|
|
588
|
+
check_name: bool = False,
|
|
589
|
+
overwrite: bool = False,
|
|
590
|
+
use_threads: bool = False,
|
|
591
|
+
statistics_temp_dir: Optional[str] = None,
|
|
592
|
+
progress: Any = None,
|
|
593
|
+
test: bool = False,
|
|
594
|
+
cache: Optional[str] = None,
|
|
595
|
+
**kwargs: Any,
|
|
365
596
|
):
|
|
597
|
+
"""Initialize an Init instance.
|
|
598
|
+
|
|
599
|
+
Parameters
|
|
600
|
+
----------
|
|
601
|
+
path : str
|
|
602
|
+
The path to the dataset.
|
|
603
|
+
config : dict
|
|
604
|
+
The configuration.
|
|
605
|
+
check_name : bool, optional
|
|
606
|
+
Whether to check the dataset name.
|
|
607
|
+
overwrite : bool, optional
|
|
608
|
+
Whether to overwrite the existing dataset.
|
|
609
|
+
use_threads : bool, optional
|
|
610
|
+
Whether to use threads.
|
|
611
|
+
statistics_temp_dir : Optional[str], optional
|
|
612
|
+
The directory for temporary statistics.
|
|
613
|
+
progress : Any, optional
|
|
614
|
+
The progress indicator.
|
|
615
|
+
test : bool, optional
|
|
616
|
+
Whether this is a test.
|
|
617
|
+
cache : Optional[str], optional
|
|
618
|
+
The cache directory.
|
|
619
|
+
"""
|
|
366
620
|
if _path_readable(path) and not overwrite:
|
|
367
621
|
raise Exception(f"{path} already exists. Use overwrite=True to overwrite.")
|
|
368
622
|
|
|
@@ -390,12 +644,26 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
390
644
|
LOG.info(f"Minimal input for 'init' step (using only the first date) : {one_date}")
|
|
391
645
|
LOG.info(self.minimal_input)
|
|
392
646
|
|
|
393
|
-
def run(self):
|
|
647
|
+
def run(self) -> int:
|
|
648
|
+
"""Run the initialization.
|
|
649
|
+
|
|
650
|
+
Returns
|
|
651
|
+
-------
|
|
652
|
+
int
|
|
653
|
+
The number of groups to process.
|
|
654
|
+
"""
|
|
394
655
|
with self._cache_context():
|
|
395
656
|
return self._run()
|
|
396
657
|
|
|
397
|
-
def _run(self):
|
|
398
|
-
"""
|
|
658
|
+
def _run(self) -> int:
|
|
659
|
+
"""Internal method to run the initialization.
|
|
660
|
+
|
|
661
|
+
Returns
|
|
662
|
+
-------
|
|
663
|
+
int
|
|
664
|
+
The number of groups to process.
|
|
665
|
+
"""
|
|
666
|
+
"""Create an empty dataset of the right final shape.
|
|
399
667
|
|
|
400
668
|
Read a small part of the data to get the shape of the data and the resolution and more metadata.
|
|
401
669
|
"""
|
|
@@ -547,9 +815,35 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
547
815
|
|
|
548
816
|
|
|
549
817
|
class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
|
|
818
|
+
"""A class to load data into a dataset."""
|
|
819
|
+
|
|
550
820
|
def __init__(
|
|
551
|
-
self,
|
|
821
|
+
self,
|
|
822
|
+
path: str,
|
|
823
|
+
parts: Optional[str] = None,
|
|
824
|
+
use_threads: bool = False,
|
|
825
|
+
statistics_temp_dir: Optional[str] = None,
|
|
826
|
+
progress: Any = None,
|
|
827
|
+
cache: Optional[str] = None,
|
|
828
|
+
**kwargs: Any,
|
|
552
829
|
):
|
|
830
|
+
"""Initialize a Load instance.
|
|
831
|
+
|
|
832
|
+
Parameters
|
|
833
|
+
----------
|
|
834
|
+
path : str
|
|
835
|
+
The path to the dataset.
|
|
836
|
+
parts : Optional[str], optional
|
|
837
|
+
The parts to load.
|
|
838
|
+
use_threads : bool, optional
|
|
839
|
+
Whether to use threads.
|
|
840
|
+
statistics_temp_dir : Optional[str], optional
|
|
841
|
+
The directory for temporary statistics.
|
|
842
|
+
progress : Any, optional
|
|
843
|
+
The progress indicator.
|
|
844
|
+
cache : Optional[str], optional
|
|
845
|
+
The cache directory.
|
|
846
|
+
"""
|
|
553
847
|
super().__init__(path, cache=cache)
|
|
554
848
|
self.use_threads = use_threads
|
|
555
849
|
self.statistics_temp_dir = statistics_temp_dir
|
|
@@ -567,11 +861,13 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
567
861
|
self.data_array = self.dataset.data_array
|
|
568
862
|
self.n_groups = len(self.groups)
|
|
569
863
|
|
|
570
|
-
def run(self):
|
|
864
|
+
def run(self) -> None:
|
|
865
|
+
"""Run the data loading."""
|
|
571
866
|
with self._cache_context():
|
|
572
867
|
self._run()
|
|
573
868
|
|
|
574
|
-
def _run(self):
|
|
869
|
+
def _run(self) -> None:
|
|
870
|
+
"""Internal method to run the data loading."""
|
|
575
871
|
for igroup, group in enumerate(self.groups):
|
|
576
872
|
if not self.chunk_filter(igroup):
|
|
577
873
|
continue
|
|
@@ -595,7 +891,14 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
595
891
|
|
|
596
892
|
self.dataset.print_info()
|
|
597
893
|
|
|
598
|
-
def load_result(self, result):
|
|
894
|
+
def load_result(self, result: Any) -> None:
|
|
895
|
+
"""Load the result into the dataset.
|
|
896
|
+
|
|
897
|
+
Parameters
|
|
898
|
+
----------
|
|
899
|
+
result : Any
|
|
900
|
+
The result to load.
|
|
901
|
+
"""
|
|
599
902
|
# There is one cube to load for each result.
|
|
600
903
|
dates = list(result.group_of_dates)
|
|
601
904
|
|
|
@@ -656,14 +959,30 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
656
959
|
array.flush()
|
|
657
960
|
LOG.info("Flushed data array")
|
|
658
961
|
|
|
659
|
-
def _get_allow_nans(self):
|
|
962
|
+
def _get_allow_nans(self) -> Union[bool, list]:
|
|
963
|
+
"""Get the allow_nans configuration.
|
|
964
|
+
|
|
965
|
+
Returns
|
|
966
|
+
-------
|
|
967
|
+
bool | list
|
|
968
|
+
The allow_nans configuration.
|
|
969
|
+
"""
|
|
660
970
|
config = self.main_config
|
|
661
971
|
if "allow_nans" in config.build:
|
|
662
972
|
return config.build.allow_nans
|
|
663
973
|
|
|
664
974
|
return config.statistics.get("allow_nans", [])
|
|
665
975
|
|
|
666
|
-
def load_cube(self, cube, array):
|
|
976
|
+
def load_cube(self, cube: Any, array: ViewCacheArray) -> None:
|
|
977
|
+
"""Load the cube into the array.
|
|
978
|
+
|
|
979
|
+
Parameters
|
|
980
|
+
----------
|
|
981
|
+
cube : Any
|
|
982
|
+
The cube to load.
|
|
983
|
+
array : ViewCacheArray
|
|
984
|
+
The array to load into.
|
|
985
|
+
"""
|
|
667
986
|
# There are several cubelets for each cube
|
|
668
987
|
start = time.time()
|
|
669
988
|
load = 0
|
|
@@ -673,7 +992,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
673
992
|
total = cube.count(reading_chunks)
|
|
674
993
|
LOG.debug(f"Loading datacube: {cube}")
|
|
675
994
|
|
|
676
|
-
def position(x):
|
|
995
|
+
def position(x: Any) -> Optional[int]:
|
|
677
996
|
if isinstance(x, str) and "/" in x:
|
|
678
997
|
x = x.split("/")
|
|
679
998
|
return int(x[0])
|
|
@@ -715,7 +1034,29 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
715
1034
|
|
|
716
1035
|
|
|
717
1036
|
class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
|
|
718
|
-
|
|
1037
|
+
"""A class to clean up temporary data and registry entries."""
|
|
1038
|
+
|
|
1039
|
+
def __init__(
|
|
1040
|
+
self,
|
|
1041
|
+
path: str,
|
|
1042
|
+
statistics_temp_dir: Optional[str] = None,
|
|
1043
|
+
delta: list = [],
|
|
1044
|
+
use_threads: bool = False,
|
|
1045
|
+
**kwargs: Any,
|
|
1046
|
+
):
|
|
1047
|
+
"""Initialize a Cleanup instance.
|
|
1048
|
+
|
|
1049
|
+
Parameters
|
|
1050
|
+
----------
|
|
1051
|
+
path : str
|
|
1052
|
+
The path to the dataset.
|
|
1053
|
+
statistics_temp_dir : Optional[str], optional
|
|
1054
|
+
The directory for temporary statistics.
|
|
1055
|
+
delta : list, optional
|
|
1056
|
+
The delta values.
|
|
1057
|
+
use_threads : bool, optional
|
|
1058
|
+
Whether to use threads.
|
|
1059
|
+
"""
|
|
719
1060
|
super().__init__(path)
|
|
720
1061
|
self.use_threads = use_threads
|
|
721
1062
|
self.statistics_temp_dir = statistics_temp_dir
|
|
@@ -725,7 +1066,8 @@ class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
|
|
|
725
1066
|
for d in delta
|
|
726
1067
|
]
|
|
727
1068
|
|
|
728
|
-
def run(self):
|
|
1069
|
+
def run(self) -> None:
|
|
1070
|
+
"""Run the cleanup."""
|
|
729
1071
|
self.tmp_statistics.delete()
|
|
730
1072
|
self.registry.clean()
|
|
731
1073
|
for actor in self.actors:
|
|
@@ -733,16 +1075,35 @@ class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
|
|
|
733
1075
|
|
|
734
1076
|
|
|
735
1077
|
class Verify(Actor):
|
|
736
|
-
|
|
1078
|
+
"""A class to verify the integrity of a dataset."""
|
|
1079
|
+
|
|
1080
|
+
def __init__(self, path: str, **kwargs: Any):
|
|
1081
|
+
"""Initialize a Verify instance.
|
|
1082
|
+
|
|
1083
|
+
Parameters
|
|
1084
|
+
----------
|
|
1085
|
+
path : str
|
|
1086
|
+
The path to the dataset.
|
|
1087
|
+
"""
|
|
737
1088
|
super().__init__(path)
|
|
738
1089
|
|
|
739
|
-
def run(self):
|
|
1090
|
+
def run(self) -> None:
|
|
1091
|
+
"""Run the verification."""
|
|
740
1092
|
LOG.info(f"Verifying dataset at {self.path}")
|
|
741
1093
|
LOG.info(str(self.dataset.anemoi_dataset))
|
|
742
1094
|
|
|
743
1095
|
|
|
744
1096
|
class AdditionsMixin:
|
|
745
|
-
|
|
1097
|
+
"""A mixin class to handle dataset additions."""
|
|
1098
|
+
|
|
1099
|
+
def skip(self) -> bool:
|
|
1100
|
+
"""Check if the additions should be skipped.
|
|
1101
|
+
|
|
1102
|
+
Returns
|
|
1103
|
+
-------
|
|
1104
|
+
bool
|
|
1105
|
+
Whether to skip the additions.
|
|
1106
|
+
"""
|
|
746
1107
|
frequency = frequency_to_timedelta(self.dataset.anemoi_dataset.frequency)
|
|
747
1108
|
if not self.delta.total_seconds() % frequency.total_seconds() == 0:
|
|
748
1109
|
LOG.debug(f"Delta {self.delta} is not a multiple of frequency {frequency}. Skipping.")
|
|
@@ -755,13 +1116,15 @@ class AdditionsMixin:
|
|
|
755
1116
|
return False
|
|
756
1117
|
|
|
757
1118
|
@cached_property
|
|
758
|
-
def tmp_storage_path(self):
|
|
1119
|
+
def tmp_storage_path(self) -> str:
|
|
1120
|
+
"""Get the path to the temporary storage."""
|
|
759
1121
|
name = "storage_for_additions"
|
|
760
1122
|
if self.delta:
|
|
761
1123
|
name += frequency_to_string(self.delta)
|
|
762
1124
|
return os.path.join(f"{self.path}.{name}.tmp")
|
|
763
1125
|
|
|
764
|
-
def read_from_dataset(self):
|
|
1126
|
+
def read_from_dataset(self) -> None:
|
|
1127
|
+
"""Read data from the dataset."""
|
|
765
1128
|
self.variables = self.dataset.anemoi_dataset.variables
|
|
766
1129
|
self.frequency = frequency_to_timedelta(self.dataset.anemoi_dataset.frequency)
|
|
767
1130
|
start = self.dataset.zarr_metadata["statistics_start_date"]
|
|
@@ -780,11 +1143,34 @@ class AdditionsMixin:
|
|
|
780
1143
|
|
|
781
1144
|
|
|
782
1145
|
class DeltaDataset:
|
|
783
|
-
|
|
1146
|
+
"""A class to represent a dataset with delta values."""
|
|
1147
|
+
|
|
1148
|
+
def __init__(self, ds: Any, idelta: int):
|
|
1149
|
+
"""Initialize a DeltaDataset instance.
|
|
1150
|
+
|
|
1151
|
+
Parameters
|
|
1152
|
+
----------
|
|
1153
|
+
ds : Any
|
|
1154
|
+
The dataset.
|
|
1155
|
+
idelta : int
|
|
1156
|
+
The delta value.
|
|
1157
|
+
"""
|
|
784
1158
|
self.ds = ds
|
|
785
1159
|
self.idelta = idelta
|
|
786
1160
|
|
|
787
|
-
def __getitem__(self, i):
|
|
1161
|
+
def __getitem__(self, i: int) -> Any:
|
|
1162
|
+
"""Get an item from the dataset.
|
|
1163
|
+
|
|
1164
|
+
Parameters
|
|
1165
|
+
----------
|
|
1166
|
+
i : int
|
|
1167
|
+
The index.
|
|
1168
|
+
|
|
1169
|
+
Returns
|
|
1170
|
+
-------
|
|
1171
|
+
Any
|
|
1172
|
+
The item.
|
|
1173
|
+
"""
|
|
788
1174
|
j = i - self.idelta
|
|
789
1175
|
if j < 0:
|
|
790
1176
|
raise MissingDateError(f"Missing date {j}")
|
|
@@ -792,13 +1178,29 @@ class DeltaDataset:
|
|
|
792
1178
|
|
|
793
1179
|
|
|
794
1180
|
class _InitAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
795
|
-
|
|
1181
|
+
"""A class to initialize dataset additions."""
|
|
1182
|
+
|
|
1183
|
+
def __init__(self, path: str, delta: str, use_threads: bool = False, progress: Any = None, **kwargs: Any):
|
|
1184
|
+
"""Initialize an _InitAdditions instance.
|
|
1185
|
+
|
|
1186
|
+
Parameters
|
|
1187
|
+
----------
|
|
1188
|
+
path : str
|
|
1189
|
+
The path to the dataset.
|
|
1190
|
+
delta : str
|
|
1191
|
+
The delta value.
|
|
1192
|
+
use_threads : bool, optional
|
|
1193
|
+
Whether to use threads.
|
|
1194
|
+
progress : Any, optional
|
|
1195
|
+
The progress indicator.
|
|
1196
|
+
"""
|
|
796
1197
|
super().__init__(path)
|
|
797
1198
|
self.delta = frequency_to_timedelta(delta)
|
|
798
1199
|
self.use_threads = use_threads
|
|
799
1200
|
self.progress = progress
|
|
800
1201
|
|
|
801
|
-
def run(self):
|
|
1202
|
+
def run(self) -> None:
|
|
1203
|
+
"""Run the additions initialization."""
|
|
802
1204
|
if self.skip():
|
|
803
1205
|
LOG.info(f"Skipping delta={self.delta}")
|
|
804
1206
|
return
|
|
@@ -808,14 +1210,40 @@ class _InitAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
|
808
1210
|
self.tmp_storage.create()
|
|
809
1211
|
LOG.info(f"Dataset {self.tmp_storage_path} additions initialized.")
|
|
810
1212
|
|
|
811
|
-
def cleanup(self):
|
|
1213
|
+
def cleanup(self) -> None:
|
|
1214
|
+
"""Clean up the temporary storage."""
|
|
812
1215
|
self.tmp_storage = build_storage(directory=self.tmp_storage_path, create=False)
|
|
813
1216
|
self.tmp_storage.delete()
|
|
814
1217
|
LOG.info(f"Cleaned temporary storage {self.tmp_storage_path}")
|
|
815
1218
|
|
|
816
1219
|
|
|
817
1220
|
class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
818
|
-
|
|
1221
|
+
"""A class to run dataset additions."""
|
|
1222
|
+
|
|
1223
|
+
def __init__(
|
|
1224
|
+
self,
|
|
1225
|
+
path: str,
|
|
1226
|
+
delta: str,
|
|
1227
|
+
parts: Optional[str] = None,
|
|
1228
|
+
use_threads: bool = False,
|
|
1229
|
+
progress: Any = None,
|
|
1230
|
+
**kwargs: Any,
|
|
1231
|
+
):
|
|
1232
|
+
"""Initialize a _RunAdditions instance.
|
|
1233
|
+
|
|
1234
|
+
Parameters
|
|
1235
|
+
----------
|
|
1236
|
+
path : str
|
|
1237
|
+
The path to the dataset.
|
|
1238
|
+
delta : str
|
|
1239
|
+
The delta value.
|
|
1240
|
+
parts : Optional[str], optional
|
|
1241
|
+
The parts to load.
|
|
1242
|
+
use_threads : bool, optional
|
|
1243
|
+
Whether to use threads.
|
|
1244
|
+
progress : Any, optional
|
|
1245
|
+
The progress indicator.
|
|
1246
|
+
"""
|
|
819
1247
|
super().__init__(path)
|
|
820
1248
|
self.delta = frequency_to_timedelta(delta)
|
|
821
1249
|
self.use_threads = use_threads
|
|
@@ -825,7 +1253,8 @@ class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
|
825
1253
|
self.tmp_storage = build_storage(directory=self.tmp_storage_path, create=False)
|
|
826
1254
|
LOG.info(f"Writing in {self.tmp_storage_path}")
|
|
827
1255
|
|
|
828
|
-
def run(self):
|
|
1256
|
+
def run(self) -> None:
|
|
1257
|
+
"""Run the additions."""
|
|
829
1258
|
if self.skip():
|
|
830
1259
|
LOG.info(f"Skipping delta={self.delta}")
|
|
831
1260
|
return
|
|
@@ -846,7 +1275,14 @@ class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
|
846
1275
|
self.tmp_storage.flush()
|
|
847
1276
|
LOG.debug(f"Dataset {self.path} additions run.")
|
|
848
1277
|
|
|
849
|
-
def allow_nans(self):
|
|
1278
|
+
def allow_nans(self) -> bool:
|
|
1279
|
+
"""Check if NaNs are allowed.
|
|
1280
|
+
|
|
1281
|
+
Returns
|
|
1282
|
+
-------
|
|
1283
|
+
bool
|
|
1284
|
+
Whether NaNs are allowed.
|
|
1285
|
+
"""
|
|
850
1286
|
if self.dataset.anemoi_dataset.metadata.get("allow_nans", False):
|
|
851
1287
|
return True
|
|
852
1288
|
|
|
@@ -858,7 +1294,22 @@ class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
|
858
1294
|
|
|
859
1295
|
|
|
860
1296
|
class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
861
|
-
|
|
1297
|
+
"""A class to finalize dataset additions."""
|
|
1298
|
+
|
|
1299
|
+
def __init__(self, path: str, delta: str, use_threads: bool = False, progress: Any = None, **kwargs: Any):
|
|
1300
|
+
"""Initialize a _FinaliseAdditions instance.
|
|
1301
|
+
|
|
1302
|
+
Parameters
|
|
1303
|
+
----------
|
|
1304
|
+
path : str
|
|
1305
|
+
The path to the dataset.
|
|
1306
|
+
delta : str
|
|
1307
|
+
The delta value.
|
|
1308
|
+
use_threads : bool, optional
|
|
1309
|
+
Whether to use threads.
|
|
1310
|
+
progress : Any, optional
|
|
1311
|
+
The progress indicator.
|
|
1312
|
+
"""
|
|
862
1313
|
super().__init__(path)
|
|
863
1314
|
self.delta = frequency_to_timedelta(delta)
|
|
864
1315
|
self.use_threads = use_threads
|
|
@@ -867,7 +1318,8 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
|
867
1318
|
self.tmp_storage = build_storage(directory=self.tmp_storage_path, create=False)
|
|
868
1319
|
LOG.info(f"Reading from {self.tmp_storage_path}.")
|
|
869
1320
|
|
|
870
|
-
def run(self):
|
|
1321
|
+
def run(self) -> None:
|
|
1322
|
+
"""Run the additions finalization."""
|
|
871
1323
|
if self.skip():
|
|
872
1324
|
LOG.info(f"Skipping delta={self.delta}.")
|
|
873
1325
|
return
|
|
@@ -969,7 +1421,14 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
|
969
1421
|
self._write(self.summary)
|
|
970
1422
|
self.tmp_storage.delete()
|
|
971
1423
|
|
|
972
|
-
def _write(self, summary):
|
|
1424
|
+
def _write(self, summary: Summary) -> None:
|
|
1425
|
+
"""Write the summary to the dataset.
|
|
1426
|
+
|
|
1427
|
+
Parameters
|
|
1428
|
+
----------
|
|
1429
|
+
summary : Summary
|
|
1430
|
+
The summary to write.
|
|
1431
|
+
"""
|
|
973
1432
|
for k in ["mean", "stdev", "minimum", "maximum", "sums", "squares", "count", "has_nans"]:
|
|
974
1433
|
name = f"statistics_tendencies_{frequency_to_string(self.delta)}_{k}"
|
|
975
1434
|
self.dataset.add_dataset(name=name, array=summary[k], dimensions=("variable",))
|
|
@@ -977,9 +1436,22 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
|
977
1436
|
LOG.debug(f"Wrote additions in {self.path}")
|
|
978
1437
|
|
|
979
1438
|
|
|
980
|
-
def multi_addition(cls):
|
|
1439
|
+
def multi_addition(cls: type) -> type:
|
|
1440
|
+
"""Create a class to handle multiple additions.
|
|
1441
|
+
|
|
1442
|
+
Parameters
|
|
1443
|
+
----------
|
|
1444
|
+
cls : type
|
|
1445
|
+
The class to handle additions.
|
|
1446
|
+
|
|
1447
|
+
Returns
|
|
1448
|
+
-------
|
|
1449
|
+
type
|
|
1450
|
+
The class to handle multiple additions.
|
|
1451
|
+
"""
|
|
1452
|
+
|
|
981
1453
|
class MultiAdditions:
|
|
982
|
-
def __init__(self, *args, **kwargs):
|
|
1454
|
+
def __init__(self, *args, **kwargs: Any):
|
|
983
1455
|
self.actors = []
|
|
984
1456
|
|
|
985
1457
|
for k in kwargs.pop("delta", []):
|
|
@@ -988,7 +1460,8 @@ def multi_addition(cls):
|
|
|
988
1460
|
if not self.actors:
|
|
989
1461
|
LOG.warning("No delta found in kwargs, no additions will be computed.")
|
|
990
1462
|
|
|
991
|
-
def run(self):
|
|
1463
|
+
def run(self) -> None:
|
|
1464
|
+
"""Run the additions."""
|
|
992
1465
|
for actor in self.actors:
|
|
993
1466
|
actor.run()
|
|
994
1467
|
|
|
@@ -1001,13 +1474,36 @@ FinaliseAdditions = multi_addition(_FinaliseAdditions)
|
|
|
1001
1474
|
|
|
1002
1475
|
|
|
1003
1476
|
class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
|
|
1004
|
-
|
|
1477
|
+
"""A class to compute statistics for a dataset."""
|
|
1478
|
+
|
|
1479
|
+
def __init__(
|
|
1480
|
+
self,
|
|
1481
|
+
path: str,
|
|
1482
|
+
use_threads: bool = False,
|
|
1483
|
+
statistics_temp_dir: Optional[str] = None,
|
|
1484
|
+
progress: Any = None,
|
|
1485
|
+
**kwargs: Any,
|
|
1486
|
+
):
|
|
1487
|
+
"""Initialize a Statistics instance.
|
|
1488
|
+
|
|
1489
|
+
Parameters
|
|
1490
|
+
----------
|
|
1491
|
+
path : str
|
|
1492
|
+
The path to the dataset.
|
|
1493
|
+
use_threads : bool, optional
|
|
1494
|
+
Whether to use threads.
|
|
1495
|
+
statistics_temp_dir : Optional[str], optional
|
|
1496
|
+
The directory for temporary statistics.
|
|
1497
|
+
progress : Any, optional
|
|
1498
|
+
The progress indicator.
|
|
1499
|
+
"""
|
|
1005
1500
|
super().__init__(path)
|
|
1006
1501
|
self.use_threads = use_threads
|
|
1007
1502
|
self.progress = progress
|
|
1008
1503
|
self.statistics_temp_dir = statistics_temp_dir
|
|
1009
1504
|
|
|
1010
|
-
def run(self):
|
|
1505
|
+
def run(self) -> None:
|
|
1506
|
+
"""Run the statistics computation."""
|
|
1011
1507
|
start, end = (
|
|
1012
1508
|
self.dataset.zarr_metadata["statistics_start_date"],
|
|
1013
1509
|
self.dataset.zarr_metadata["statistics_end_date"],
|
|
@@ -1034,7 +1530,8 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
|
|
|
1034
1530
|
LOG.info(f"Wrote statistics in {self.path}")
|
|
1035
1531
|
|
|
1036
1532
|
@cached_property
|
|
1037
|
-
def allow_nans(self):
|
|
1533
|
+
def allow_nans(self) -> Union[bool, list]:
|
|
1534
|
+
"""Check if NaNs are allowed."""
|
|
1038
1535
|
import zarr
|
|
1039
1536
|
|
|
1040
1537
|
z = zarr.open(self.path, mode="r")
|
|
@@ -1048,12 +1545,26 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
|
|
|
1048
1545
|
return True
|
|
1049
1546
|
|
|
1050
1547
|
|
|
1051
|
-
def chain(tasks):
|
|
1548
|
+
def chain(tasks: list) -> type:
|
|
1549
|
+
"""Create a class to chain multiple tasks.
|
|
1550
|
+
|
|
1551
|
+
Parameters
|
|
1552
|
+
----------
|
|
1553
|
+
tasks : list
|
|
1554
|
+
The list of tasks to chain.
|
|
1555
|
+
|
|
1556
|
+
Returns
|
|
1557
|
+
-------
|
|
1558
|
+
type
|
|
1559
|
+
The class to chain multiple tasks.
|
|
1560
|
+
"""
|
|
1561
|
+
|
|
1052
1562
|
class Chain(Actor):
|
|
1053
|
-
def __init__(self, **kwargs):
|
|
1563
|
+
def __init__(self, **kwargs: Any):
|
|
1054
1564
|
self.kwargs = kwargs
|
|
1055
1565
|
|
|
1056
|
-
def run(self):
|
|
1566
|
+
def run(self) -> None:
|
|
1567
|
+
"""Run the chained tasks."""
|
|
1057
1568
|
for cls in tasks:
|
|
1058
1569
|
t = cls(**self.kwargs)
|
|
1059
1570
|
t.run()
|
|
@@ -1061,7 +1572,23 @@ def chain(tasks):
|
|
|
1061
1572
|
return Chain
|
|
1062
1573
|
|
|
1063
1574
|
|
|
1064
|
-
def creator_factory(name, trace=None, **kwargs):
|
|
1575
|
+
def creator_factory(name: str, trace: Optional[str] = None, **kwargs: Any) -> Any:
|
|
1576
|
+
"""Create a dataset creator.
|
|
1577
|
+
|
|
1578
|
+
Parameters
|
|
1579
|
+
----------
|
|
1580
|
+
name : str
|
|
1581
|
+
The name of the creator.
|
|
1582
|
+
trace : Optional[str], optional
|
|
1583
|
+
The trace file.
|
|
1584
|
+
**kwargs
|
|
1585
|
+
Additional arguments for the creator.
|
|
1586
|
+
|
|
1587
|
+
Returns
|
|
1588
|
+
-------
|
|
1589
|
+
Any
|
|
1590
|
+
The dataset creator.
|
|
1591
|
+
"""
|
|
1065
1592
|
if trace:
|
|
1066
1593
|
|
|
1067
1594
|
enable_trace(trace)
|