anemoi-datasets 0.5.15__py3-none-any.whl → 0.5.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +4 -1
- anemoi/datasets/__main__.py +12 -2
- anemoi/datasets/_version.py +9 -4
- anemoi/datasets/commands/cleanup.py +17 -2
- anemoi/datasets/commands/compare.py +18 -2
- anemoi/datasets/commands/copy.py +196 -14
- anemoi/datasets/commands/create.py +50 -7
- anemoi/datasets/commands/finalise-additions.py +17 -2
- anemoi/datasets/commands/finalise.py +17 -2
- anemoi/datasets/commands/init-additions.py +17 -2
- anemoi/datasets/commands/init.py +16 -2
- anemoi/datasets/commands/inspect.py +283 -62
- anemoi/datasets/commands/load-additions.py +16 -2
- anemoi/datasets/commands/load.py +16 -2
- anemoi/datasets/commands/patch.py +17 -2
- anemoi/datasets/commands/publish.py +17 -2
- anemoi/datasets/commands/scan.py +31 -3
- anemoi/datasets/compute/recentre.py +47 -11
- anemoi/datasets/create/__init__.py +612 -85
- anemoi/datasets/create/check.py +142 -20
- anemoi/datasets/create/chunks.py +64 -4
- anemoi/datasets/create/config.py +185 -21
- anemoi/datasets/create/filter.py +50 -0
- anemoi/datasets/create/filters/__init__.py +33 -0
- anemoi/datasets/create/filters/empty.py +37 -0
- anemoi/datasets/create/filters/legacy.py +93 -0
- anemoi/datasets/create/filters/noop.py +37 -0
- anemoi/datasets/create/filters/orog_to_z.py +58 -0
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
- anemoi/datasets/create/filters/rename.py +205 -0
- anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
- anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
- anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
- anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
- anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
- anemoi/datasets/create/filters/transform.py +53 -0
- anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
- anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
- anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
- anemoi/datasets/create/input/__init__.py +76 -5
- anemoi/datasets/create/input/action.py +149 -13
- anemoi/datasets/create/input/concat.py +81 -10
- anemoi/datasets/create/input/context.py +39 -4
- anemoi/datasets/create/input/data_sources.py +72 -6
- anemoi/datasets/create/input/empty.py +21 -3
- anemoi/datasets/create/input/filter.py +60 -12
- anemoi/datasets/create/input/function.py +154 -37
- anemoi/datasets/create/input/join.py +86 -14
- anemoi/datasets/create/input/misc.py +67 -17
- anemoi/datasets/create/input/pipe.py +33 -6
- anemoi/datasets/create/input/repeated_dates.py +189 -41
- anemoi/datasets/create/input/result.py +202 -87
- anemoi/datasets/create/input/step.py +119 -22
- anemoi/datasets/create/input/template.py +100 -13
- anemoi/datasets/create/input/trace.py +62 -7
- anemoi/datasets/create/patch.py +52 -4
- anemoi/datasets/create/persistent.py +134 -17
- anemoi/datasets/create/size.py +15 -1
- anemoi/datasets/create/source.py +51 -0
- anemoi/datasets/create/sources/__init__.py +36 -0
- anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
- anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
- anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
- anemoi/datasets/create/sources/empty.py +37 -0
- anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
- anemoi/datasets/create/sources/grib.py +297 -0
- anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
- anemoi/datasets/create/sources/legacy.py +93 -0
- anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
- anemoi/datasets/create/sources/netcdf.py +42 -0
- anemoi/datasets/create/sources/opendap.py +43 -0
- anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
- anemoi/datasets/create/sources/recentre.py +150 -0
- anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
- anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
- anemoi/datasets/create/sources/xarray.py +92 -0
- anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
- anemoi/datasets/create/sources/xarray_support/README.md +1 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
- anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
- anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
- anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
- anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
- anemoi/datasets/create/sources/xarray_support/time.py +391 -0
- anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
- anemoi/datasets/create/sources/xarray_zarr.py +41 -0
- anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
- anemoi/datasets/create/statistics/__init__.py +233 -44
- anemoi/datasets/create/statistics/summary.py +52 -6
- anemoi/datasets/create/testing.py +76 -0
- anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
- anemoi/datasets/create/utils.py +97 -6
- anemoi/datasets/create/writer.py +26 -4
- anemoi/datasets/create/zarr.py +170 -23
- anemoi/datasets/data/__init__.py +51 -4
- anemoi/datasets/data/complement.py +191 -40
- anemoi/datasets/data/concat.py +141 -16
- anemoi/datasets/data/dataset.py +552 -61
- anemoi/datasets/data/debug.py +197 -26
- anemoi/datasets/data/ensemble.py +93 -8
- anemoi/datasets/data/fill_missing.py +165 -18
- anemoi/datasets/data/forwards.py +428 -56
- anemoi/datasets/data/grids.py +323 -97
- anemoi/datasets/data/indexing.py +112 -19
- anemoi/datasets/data/interpolate.py +92 -12
- anemoi/datasets/data/join.py +158 -19
- anemoi/datasets/data/masked.py +129 -15
- anemoi/datasets/data/merge.py +137 -23
- anemoi/datasets/data/misc.py +172 -16
- anemoi/datasets/data/missing.py +233 -29
- anemoi/datasets/data/rescale.py +111 -10
- anemoi/datasets/data/select.py +168 -26
- anemoi/datasets/data/statistics.py +67 -6
- anemoi/datasets/data/stores.py +149 -64
- anemoi/datasets/data/subset.py +159 -25
- anemoi/datasets/data/unchecked.py +168 -57
- anemoi/datasets/data/xy.py +168 -25
- anemoi/datasets/dates/__init__.py +191 -16
- anemoi/datasets/dates/groups.py +189 -47
- anemoi/datasets/grids.py +270 -31
- anemoi/datasets/testing.py +28 -1
- {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +10 -7
- anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
- {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
- {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +1 -1
- anemoi/datasets/create/functions/__init__.py +0 -66
- anemoi/datasets/create/functions/filters/__init__.py +0 -9
- anemoi/datasets/create/functions/filters/empty.py +0 -17
- anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/functions/filters/rename.py +0 -79
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
- anemoi/datasets/create/functions/sources/empty.py +0 -15
- anemoi/datasets/create/functions/sources/grib.py +0 -150
- anemoi/datasets/create/functions/sources/netcdf.py +0 -15
- anemoi/datasets/create/functions/sources/opendap.py +0 -15
- anemoi/datasets/create/functions/sources/recentre.py +0 -60
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
- anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
- anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
- anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
- anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
- anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
- anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
- anemoi/datasets/utils/fields.py +0 -47
- anemoi_datasets-0.5.15.dist-info/RECORD +0 -129
- {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
anemoi/datasets/data/misc.py
CHANGED
|
@@ -12,21 +12,48 @@ import calendar
|
|
|
12
12
|
import datetime
|
|
13
13
|
import logging
|
|
14
14
|
from pathlib import PurePath
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
from typing import Any
|
|
17
|
+
from typing import Dict
|
|
18
|
+
from typing import List
|
|
19
|
+
from typing import Optional
|
|
20
|
+
from typing import Tuple
|
|
21
|
+
from typing import Union
|
|
15
22
|
|
|
16
23
|
import numpy as np
|
|
17
24
|
import zarr
|
|
18
25
|
from anemoi.utils.config import load_config as load_settings
|
|
26
|
+
from numpy.typing import NDArray
|
|
19
27
|
|
|
20
|
-
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from .dataset import Dataset
|
|
21
30
|
|
|
22
31
|
LOG = logging.getLogger(__name__)
|
|
23
32
|
|
|
24
33
|
|
|
25
|
-
def load_config():
|
|
34
|
+
def load_config() -> Dict[str, Any]:
|
|
35
|
+
"""Load the configuration settings.
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
Dict[str, Any]
|
|
40
|
+
The configuration settings.
|
|
41
|
+
"""
|
|
26
42
|
return load_settings(defaults={"datasets": {"named": {}, "path": []}})
|
|
27
43
|
|
|
28
44
|
|
|
29
|
-
def add_named_dataset(name, path, **kwargs):
|
|
45
|
+
def add_named_dataset(name: str, path: str, **kwargs: Any) -> None:
|
|
46
|
+
"""Add a named dataset to the configuration.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
name : str
|
|
51
|
+
The name of the dataset.
|
|
52
|
+
path : str
|
|
53
|
+
The path to the dataset.
|
|
54
|
+
**kwargs : Any
|
|
55
|
+
Additional arguments.
|
|
56
|
+
"""
|
|
30
57
|
config = load_config()
|
|
31
58
|
if name["datasets"]["named"]:
|
|
32
59
|
raise ValueError(f"Dataset {name} already exists")
|
|
@@ -34,15 +61,37 @@ def add_named_dataset(name, path, **kwargs):
|
|
|
34
61
|
config["datasets"]["named"][name] = path
|
|
35
62
|
|
|
36
63
|
|
|
37
|
-
def add_dataset_path(path):
|
|
64
|
+
def add_dataset_path(path: str) -> None:
|
|
65
|
+
"""Add a dataset path to the configuration.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
path : str
|
|
70
|
+
The path to add.
|
|
71
|
+
"""
|
|
38
72
|
config = load_config()
|
|
39
73
|
|
|
40
74
|
if path not in config["datasets"]["path"]:
|
|
41
75
|
config["datasets"]["path"].append(path)
|
|
42
76
|
|
|
43
77
|
|
|
44
|
-
def round_datetime(d, dates, up):
|
|
45
|
-
"""Round up (or down) a datetime to the nearest date in a list of dates
|
|
78
|
+
def round_datetime(d: np.datetime64, dates: NDArray[np.datetime64], up: bool) -> np.datetime64:
|
|
79
|
+
"""Round up (or down) a datetime to the nearest date in a list of dates.
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
d : np.datetime64
|
|
84
|
+
The datetime to round.
|
|
85
|
+
dates : NDArray[np.datetime64]
|
|
86
|
+
The list of dates.
|
|
87
|
+
up : bool
|
|
88
|
+
Whether to round up.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
np.datetime64
|
|
93
|
+
The rounded datetime.
|
|
94
|
+
"""
|
|
46
95
|
if dates is None or len(dates) == 0:
|
|
47
96
|
return d
|
|
48
97
|
|
|
@@ -58,8 +107,25 @@ def round_datetime(d, dates, up):
|
|
|
58
107
|
return dates[-1]
|
|
59
108
|
|
|
60
109
|
|
|
61
|
-
def _as_date(
|
|
62
|
-
|
|
110
|
+
def _as_date(
|
|
111
|
+
d: Union[int, str, np.datetime64, datetime.date], dates: NDArray[np.datetime64], last: bool
|
|
112
|
+
) -> np.datetime64:
|
|
113
|
+
"""Convert a date to a numpy datetime64 object, rounding to the nearest date in a list of dates.
|
|
114
|
+
|
|
115
|
+
Parameters
|
|
116
|
+
----------
|
|
117
|
+
d : Union[int, str, np.datetime64, datetime.date]
|
|
118
|
+
The date to convert.
|
|
119
|
+
dates : NDArray[np.datetime64]
|
|
120
|
+
The list of dates.
|
|
121
|
+
last : bool
|
|
122
|
+
Whether to round to the last date.
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
np.datetime64
|
|
127
|
+
The converted date.
|
|
128
|
+
"""
|
|
63
129
|
# WARNING, datetime.datetime is a subclass of datetime.date
|
|
64
130
|
# so we need to check for datetime.datetime first
|
|
65
131
|
|
|
@@ -103,7 +169,7 @@ def _as_date(d, dates, last):
|
|
|
103
169
|
|
|
104
170
|
if isinstance(d, str):
|
|
105
171
|
|
|
106
|
-
def isfloat(s):
|
|
172
|
+
def isfloat(s: str) -> bool:
|
|
107
173
|
try:
|
|
108
174
|
float(s)
|
|
109
175
|
return True
|
|
@@ -164,16 +230,57 @@ def _as_date(d, dates, last):
|
|
|
164
230
|
raise NotImplementedError(f"Unsupported date: {d} ({type(d)})")
|
|
165
231
|
|
|
166
232
|
|
|
167
|
-
def as_first_date(d, dates):
|
|
233
|
+
def as_first_date(d: Union[int, str, np.datetime64, datetime.date], dates: NDArray[np.datetime64]) -> np.datetime64:
|
|
234
|
+
"""Convert a date to the first date in a list of dates.
|
|
235
|
+
|
|
236
|
+
Parameters
|
|
237
|
+
----------
|
|
238
|
+
d : Union[int, str, np.datetime64, datetime.date]
|
|
239
|
+
The date to convert.
|
|
240
|
+
dates : NDArray[np.datetime64]
|
|
241
|
+
The list of dates.
|
|
242
|
+
|
|
243
|
+
Returns
|
|
244
|
+
-------
|
|
245
|
+
np.datetime64
|
|
246
|
+
The first date.
|
|
247
|
+
"""
|
|
168
248
|
return _as_date(d, dates, last=False)
|
|
169
249
|
|
|
170
250
|
|
|
171
|
-
def as_last_date(d, dates):
|
|
251
|
+
def as_last_date(d: Union[int, str, np.datetime64, datetime.date], dates: NDArray[np.datetime64]) -> np.datetime64:
|
|
252
|
+
"""Convert a date to the last date in a list of dates.
|
|
253
|
+
|
|
254
|
+
Parameters
|
|
255
|
+
----------
|
|
256
|
+
d : Union[int, str, np.datetime64, datetime.date]
|
|
257
|
+
The date to convert.
|
|
258
|
+
dates : NDArray[np.datetime64]
|
|
259
|
+
The list of dates.
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
np.datetime64
|
|
264
|
+
The last date.
|
|
265
|
+
"""
|
|
172
266
|
return _as_date(d, dates, last=True)
|
|
173
267
|
|
|
174
268
|
|
|
175
|
-
def _concat_or_join(datasets, kwargs):
|
|
269
|
+
def _concat_or_join(datasets: List["Dataset"], kwargs: Dict[str, Any]) -> Tuple["Dataset", Dict[str, Any]]:
|
|
270
|
+
"""Concatenate or join datasets based on their date ranges.
|
|
176
271
|
|
|
272
|
+
Parameters
|
|
273
|
+
----------
|
|
274
|
+
datasets : List[Dataset]
|
|
275
|
+
The list of datasets.
|
|
276
|
+
kwargs : Dict[str, Any]
|
|
277
|
+
Additional arguments.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
Tuple[Dataset, Dict[str, Any]]
|
|
282
|
+
The concatenated or joined dataset and remaining arguments.
|
|
283
|
+
"""
|
|
177
284
|
if "adjust" in kwargs:
|
|
178
285
|
raise ValueError("Cannot use 'adjust' without specifying 'concat' or 'join'")
|
|
179
286
|
datasets, kwargs = _auto_adjust(datasets, kwargs)
|
|
@@ -193,7 +300,20 @@ def _concat_or_join(datasets, kwargs):
|
|
|
193
300
|
return Concat(datasets), kwargs
|
|
194
301
|
|
|
195
302
|
|
|
196
|
-
def _open(a):
|
|
303
|
+
def _open(a: Union[str, PurePath, Dict[str, Any], List[Any], Tuple[Any, ...]]) -> "Dataset":
|
|
304
|
+
"""Open a dataset from various input types.
|
|
305
|
+
|
|
306
|
+
Parameters
|
|
307
|
+
----------
|
|
308
|
+
a : Union[str, PurePath, Dict[str, Any], List[Any], Tuple[Any, ...]]
|
|
309
|
+
The input to open.
|
|
310
|
+
|
|
311
|
+
Returns
|
|
312
|
+
-------
|
|
313
|
+
Dataset
|
|
314
|
+
The opened dataset.
|
|
315
|
+
"""
|
|
316
|
+
from .dataset import Dataset
|
|
197
317
|
from .stores import Zarr
|
|
198
318
|
from .stores import zarr_lookup
|
|
199
319
|
|
|
@@ -218,8 +338,27 @@ def _open(a):
|
|
|
218
338
|
raise NotImplementedError(f"Unsupported argument: {type(a)}")
|
|
219
339
|
|
|
220
340
|
|
|
221
|
-
def _auto_adjust(
|
|
222
|
-
|
|
341
|
+
def _auto_adjust(
|
|
342
|
+
datasets: List["Dataset"],
|
|
343
|
+
kwargs: Dict[str, Any],
|
|
344
|
+
exclude: Optional[List[str]] = None,
|
|
345
|
+
) -> Tuple[List["Dataset"], Dict[str, Any]]:
|
|
346
|
+
"""Automatically adjust datasets based on specified criteria.
|
|
347
|
+
|
|
348
|
+
Parameters
|
|
349
|
+
----------
|
|
350
|
+
datasets : List[Dataset]
|
|
351
|
+
The list of datasets.
|
|
352
|
+
kwargs : Dict[str, Any]
|
|
353
|
+
Additional arguments.
|
|
354
|
+
exclude : Optional[List[str]]
|
|
355
|
+
List of keys to exclude from adjustment.
|
|
356
|
+
|
|
357
|
+
Returns
|
|
358
|
+
-------
|
|
359
|
+
Tuple[List[Dataset], Dict[str, Any]]
|
|
360
|
+
The adjusted datasets and remaining arguments.
|
|
361
|
+
"""
|
|
223
362
|
if "adjust" not in kwargs:
|
|
224
363
|
return datasets, kwargs
|
|
225
364
|
|
|
@@ -296,31 +435,48 @@ def _auto_adjust(datasets, kwargs, exclude=None):
|
|
|
296
435
|
return datasets, kwargs
|
|
297
436
|
|
|
298
437
|
|
|
299
|
-
def _open_dataset(*args, **kwargs):
|
|
438
|
+
def _open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
|
|
439
|
+
"""Open a dataset.
|
|
440
|
+
|
|
441
|
+
Parameters
|
|
442
|
+
----------
|
|
443
|
+
*args : Any
|
|
444
|
+
Positional arguments.
|
|
445
|
+
**kwargs : Any
|
|
446
|
+
Keyword arguments.
|
|
300
447
|
|
|
448
|
+
Returns
|
|
449
|
+
-------
|
|
450
|
+
Dataset
|
|
451
|
+
The opened dataset.
|
|
452
|
+
"""
|
|
301
453
|
sets = []
|
|
302
454
|
for a in args:
|
|
303
455
|
sets.append(_open(a))
|
|
304
456
|
|
|
305
457
|
if "xy" in kwargs:
|
|
458
|
+
# Experimental feature, may be removed
|
|
306
459
|
from .xy import xy_factory
|
|
307
460
|
|
|
308
461
|
assert not sets, sets
|
|
309
462
|
return xy_factory(args, kwargs).mutate()
|
|
310
463
|
|
|
311
464
|
if "x" in kwargs and "y" in kwargs:
|
|
465
|
+
# Experimental feature, may be removed
|
|
312
466
|
from .xy import xy_factory
|
|
313
467
|
|
|
314
468
|
assert not sets, sets
|
|
315
469
|
return xy_factory(args, kwargs).mutate()
|
|
316
470
|
|
|
317
471
|
if "zip" in kwargs:
|
|
472
|
+
# Experimental feature, may be removed
|
|
318
473
|
from .xy import zip_factory
|
|
319
474
|
|
|
320
475
|
assert not sets, sets
|
|
321
476
|
return zip_factory(args, kwargs).mutate()
|
|
322
477
|
|
|
323
478
|
if "chain" in kwargs:
|
|
479
|
+
# Experimental feature, may be removed
|
|
324
480
|
from .unchecked import chain_factory
|
|
325
481
|
|
|
326
482
|
assert not sets, sets
|
anemoi/datasets/data/missing.py
CHANGED
|
@@ -8,14 +8,25 @@
|
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
import datetime
|
|
11
12
|
import logging
|
|
12
13
|
from functools import cached_property
|
|
14
|
+
from typing import Any
|
|
15
|
+
from typing import Dict
|
|
16
|
+
from typing import List
|
|
17
|
+
from typing import Set
|
|
18
|
+
from typing import Tuple
|
|
19
|
+
from typing import Union
|
|
13
20
|
|
|
14
21
|
import numpy as np
|
|
22
|
+
from numpy.typing import NDArray
|
|
15
23
|
|
|
16
24
|
from anemoi.datasets.create.utils import to_datetime
|
|
17
25
|
from anemoi.datasets.data import MissingDateError
|
|
18
26
|
|
|
27
|
+
from .dataset import Dataset
|
|
28
|
+
from .dataset import FullIndex
|
|
29
|
+
from .dataset import TupleIndex
|
|
19
30
|
from .debug import Node
|
|
20
31
|
from .debug import debug_indexing
|
|
21
32
|
from .forwards import Forwards
|
|
@@ -24,11 +35,30 @@ from .indexing import update_tuple
|
|
|
24
35
|
|
|
25
36
|
LOG = logging.getLogger(__name__)
|
|
26
37
|
|
|
38
|
+
# TODO: Use that class instead of ZarrMissing
|
|
27
39
|
|
|
28
|
-
class MissingDates(Forwards):
|
|
29
|
-
# TODO: Use that class instead of ZarrMissing
|
|
30
40
|
|
|
31
|
-
|
|
41
|
+
class MissingDates(Forwards):
|
|
42
|
+
"""Handles missing dates in a dataset.
|
|
43
|
+
|
|
44
|
+
Attributes
|
|
45
|
+
----------
|
|
46
|
+
dataset : Dataset
|
|
47
|
+
The dataset object.
|
|
48
|
+
missing_dates : List[Union[int, str]]
|
|
49
|
+
List of missing dates.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, dataset: Dataset, missing_dates: List[Union[int, str]]) -> None:
|
|
53
|
+
"""Initializes the MissingDates class.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
dataset : Dataset
|
|
58
|
+
The dataset object.
|
|
59
|
+
missing_dates : List[Union[int, str]]
|
|
60
|
+
List of missing dates.
|
|
61
|
+
"""
|
|
32
62
|
super().__init__(dataset)
|
|
33
63
|
self.missing_dates = []
|
|
34
64
|
|
|
@@ -56,12 +86,25 @@ class MissingDates(Forwards):
|
|
|
56
86
|
assert len(self._missing), "No dates to force missing"
|
|
57
87
|
|
|
58
88
|
@cached_property
|
|
59
|
-
def missing(self):
|
|
89
|
+
def missing(self) -> Set[int]:
|
|
90
|
+
"""Returns the set of missing indices."""
|
|
60
91
|
return self._missing.union(self.forward.missing)
|
|
61
92
|
|
|
62
93
|
@debug_indexing
|
|
63
94
|
@expand_list_indexing
|
|
64
|
-
def __getitem__(self, n):
|
|
95
|
+
def __getitem__(self, n: FullIndex) -> NDArray[Any]:
|
|
96
|
+
"""Retrieves the item at the given index.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
n : FullIndex
|
|
101
|
+
The index to retrieve.
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
NDArray[Any]
|
|
106
|
+
The item at the given index.
|
|
107
|
+
"""
|
|
65
108
|
if isinstance(n, int):
|
|
66
109
|
if n in self.missing:
|
|
67
110
|
self._report_missing(n)
|
|
@@ -94,23 +137,63 @@ class MissingDates(Forwards):
|
|
|
94
137
|
|
|
95
138
|
raise TypeError(f"Unsupported index {n} {type(n)}")
|
|
96
139
|
|
|
97
|
-
def _report_missing(self, n):
|
|
140
|
+
def _report_missing(self, n: int) -> None:
|
|
141
|
+
"""Reports a missing date.
|
|
142
|
+
|
|
143
|
+
Parameters
|
|
144
|
+
----------
|
|
145
|
+
n : int
|
|
146
|
+
The index of the missing date.
|
|
147
|
+
"""
|
|
98
148
|
raise MissingDateError(f"Date {self.forward.dates[n]} is missing (index={n})")
|
|
99
149
|
|
|
100
150
|
@property
|
|
101
|
-
def reason(self):
|
|
151
|
+
def reason(self) -> Dict[str, Any]:
|
|
152
|
+
"""Provides the reason for missing dates."""
|
|
102
153
|
return {"missing_dates": self.missing_dates}
|
|
103
154
|
|
|
104
|
-
def tree(self):
|
|
155
|
+
def tree(self) -> Node:
|
|
156
|
+
"""Builds a tree representation of the missing dates.
|
|
157
|
+
|
|
158
|
+
Returns
|
|
159
|
+
-------
|
|
160
|
+
Node
|
|
161
|
+
The tree representation of the missing dates.
|
|
162
|
+
"""
|
|
105
163
|
return Node(self, [self.forward.tree()], **self.reason)
|
|
106
164
|
|
|
107
|
-
def
|
|
165
|
+
def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
|
|
166
|
+
"""Provides metadata specific to the subclass.
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
Dict[str, Any]
|
|
171
|
+
Metadata specific to the subclass.
|
|
172
|
+
"""
|
|
108
173
|
return {"missing_dates": self.missing_dates}
|
|
109
174
|
|
|
110
175
|
|
|
111
176
|
class SkipMissingDates(Forwards):
|
|
112
|
-
|
|
113
|
-
|
|
177
|
+
"""Skips missing dates in a dataset.
|
|
178
|
+
|
|
179
|
+
Attributes
|
|
180
|
+
----------
|
|
181
|
+
dataset : Dataset
|
|
182
|
+
The dataset object.
|
|
183
|
+
expected_access : Union[int, slice]
|
|
184
|
+
The expected access pattern.
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
def __init__(self, dataset: Dataset, expected_access: Union[int, slice]) -> None:
|
|
188
|
+
"""Initializes the SkipMissingDates class.
|
|
189
|
+
|
|
190
|
+
Parameters
|
|
191
|
+
----------
|
|
192
|
+
dataset : Dataset
|
|
193
|
+
The dataset object.
|
|
194
|
+
expected_access : Union[int, slice]
|
|
195
|
+
The expected access pattern.
|
|
196
|
+
"""
|
|
114
197
|
super().__init__(dataset)
|
|
115
198
|
|
|
116
199
|
# if isinstance(expected_access, (tuple, list)):
|
|
@@ -141,24 +224,46 @@ class SkipMissingDates(Forwards):
|
|
|
141
224
|
self.expected_access = expected_access
|
|
142
225
|
self.indices = indices
|
|
143
226
|
|
|
144
|
-
def __len__(self):
|
|
227
|
+
def __len__(self) -> int:
|
|
228
|
+
"""Returns the length of the indices.
|
|
229
|
+
|
|
230
|
+
Returns
|
|
231
|
+
-------
|
|
232
|
+
int
|
|
233
|
+
The length of the indices.
|
|
234
|
+
"""
|
|
145
235
|
return len(self.indices)
|
|
146
236
|
|
|
147
237
|
@property
|
|
148
|
-
def start_date(self):
|
|
238
|
+
def start_date(self) -> np.datetime64:
|
|
239
|
+
"""Returns the start date."""
|
|
149
240
|
return self.forward.start_date
|
|
150
241
|
|
|
151
242
|
@property
|
|
152
|
-
def end_date(self):
|
|
243
|
+
def end_date(self) -> np.datetime64:
|
|
244
|
+
"""Returns the end date."""
|
|
153
245
|
return self.forward.end_date
|
|
154
246
|
|
|
155
247
|
@property
|
|
156
|
-
def dates(self):
|
|
248
|
+
def dates(self) -> NDArray[np.datetime64]:
|
|
249
|
+
"""Not implemented. Raises an error."""
|
|
157
250
|
raise NotImplementedError("SkipMissingDates.dates")
|
|
158
251
|
|
|
159
252
|
@debug_indexing
|
|
160
253
|
@expand_list_indexing
|
|
161
|
-
def _get_tuple(self, index):
|
|
254
|
+
def _get_tuple(self, index: TupleIndex) -> NDArray[Any]:
|
|
255
|
+
"""Retrieves a tuple of items at the given index.
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
index : TupleIndex
|
|
260
|
+
The index to retrieve.
|
|
261
|
+
|
|
262
|
+
Returns
|
|
263
|
+
-------
|
|
264
|
+
NDArray[Any]
|
|
265
|
+
The tuple of items at the given index.
|
|
266
|
+
"""
|
|
162
267
|
|
|
163
268
|
def _get_one(n):
|
|
164
269
|
result = []
|
|
@@ -180,13 +285,37 @@ class SkipMissingDates(Forwards):
|
|
|
180
285
|
return tuple(np.stack(_) for _ in result)
|
|
181
286
|
|
|
182
287
|
@debug_indexing
|
|
183
|
-
def _get_slice(self, s):
|
|
288
|
+
def _get_slice(self, s: slice) -> Tuple[NDArray[Any], ...]:
|
|
289
|
+
"""Retrieves a slice of items.
|
|
290
|
+
|
|
291
|
+
Parameters
|
|
292
|
+
----------
|
|
293
|
+
s : slice
|
|
294
|
+
The slice to retrieve.
|
|
295
|
+
|
|
296
|
+
Returns
|
|
297
|
+
-------
|
|
298
|
+
Tuple[NDArray[Any], ...]
|
|
299
|
+
The slice of items.
|
|
300
|
+
"""
|
|
184
301
|
values = [self[i] for i in range(*s.indices(self._len))]
|
|
185
302
|
result = [_ for _ in zip(*values)]
|
|
186
303
|
return tuple(np.stack(_) for _ in result)
|
|
187
304
|
|
|
188
305
|
@debug_indexing
|
|
189
|
-
def __getitem__(self, n):
|
|
306
|
+
def __getitem__(self, n: FullIndex) -> Tuple[NDArray[Any], ...]:
|
|
307
|
+
"""Retrieves the item at the given index.
|
|
308
|
+
|
|
309
|
+
Parameters
|
|
310
|
+
----------
|
|
311
|
+
n : FullIndex
|
|
312
|
+
The index to retrieve.
|
|
313
|
+
|
|
314
|
+
Returns
|
|
315
|
+
-------
|
|
316
|
+
Tuple[NDArray[Any], ...]
|
|
317
|
+
The item at the given index.
|
|
318
|
+
"""
|
|
190
319
|
if isinstance(n, tuple):
|
|
191
320
|
return self._get_tuple(n)
|
|
192
321
|
|
|
@@ -196,19 +325,56 @@ class SkipMissingDates(Forwards):
|
|
|
196
325
|
return tuple(self.forward[i] for i in self.indices[n])
|
|
197
326
|
|
|
198
327
|
@property
|
|
199
|
-
def frequency(self):
|
|
328
|
+
def frequency(self) -> datetime.timedelta:
|
|
329
|
+
"""Returns the frequency of the dataset."""
|
|
200
330
|
return self.forward.frequency
|
|
201
331
|
|
|
202
|
-
def tree(self):
|
|
332
|
+
def tree(self) -> Node:
|
|
333
|
+
"""Builds a tree representation of the skipped missing dates.
|
|
334
|
+
|
|
335
|
+
Returns
|
|
336
|
+
-------
|
|
337
|
+
Node
|
|
338
|
+
The tree representation of the skipped missing dates.
|
|
339
|
+
"""
|
|
203
340
|
return Node(self, [self.forward.tree()], expected_access=self.expected_access)
|
|
204
341
|
|
|
205
|
-
def
|
|
342
|
+
def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
|
|
343
|
+
"""Provides metadata specific to the subclass.
|
|
344
|
+
|
|
345
|
+
Returns
|
|
346
|
+
-------
|
|
347
|
+
Dict[str, Any]
|
|
348
|
+
Metadata specific to the subclass.
|
|
349
|
+
"""
|
|
206
350
|
return {"expected_access": self.expected_access}
|
|
207
351
|
|
|
208
352
|
|
|
209
353
|
class MissingDataset(Forwards):
|
|
210
|
-
|
|
211
|
-
|
|
354
|
+
"""Represents a dataset with missing dates.
|
|
355
|
+
|
|
356
|
+
Attributes
|
|
357
|
+
----------
|
|
358
|
+
dataset : Dataset
|
|
359
|
+
The dataset object.
|
|
360
|
+
start : np.datetime64
|
|
361
|
+
The start date.
|
|
362
|
+
end : np.datetime64
|
|
363
|
+
The end date.
|
|
364
|
+
"""
|
|
365
|
+
|
|
366
|
+
def __init__(self, dataset: Dataset, start: np.datetime64, end: np.datetime64) -> None:
|
|
367
|
+
"""Initializes the MissingDataset class.
|
|
368
|
+
|
|
369
|
+
Parameters
|
|
370
|
+
----------
|
|
371
|
+
dataset : Dataset
|
|
372
|
+
The dataset object.
|
|
373
|
+
start : np.datetime64
|
|
374
|
+
The start date.
|
|
375
|
+
end : np.datetime64
|
|
376
|
+
The end date.
|
|
377
|
+
"""
|
|
212
378
|
super().__init__(dataset)
|
|
213
379
|
self.start = start
|
|
214
380
|
self.end = end
|
|
@@ -222,22 +388,60 @@ class MissingDataset(Forwards):
|
|
|
222
388
|
self._dates = np.array(dates, dtype="datetime64")
|
|
223
389
|
self._missing = set(range(len(dates)))
|
|
224
390
|
|
|
225
|
-
def __len__(self):
|
|
391
|
+
def __len__(self) -> int:
|
|
392
|
+
"""Returns the length of the dates.
|
|
393
|
+
|
|
394
|
+
Returns
|
|
395
|
+
-------
|
|
396
|
+
int
|
|
397
|
+
The length of the dates.
|
|
398
|
+
"""
|
|
226
399
|
return len(self._dates)
|
|
227
400
|
|
|
228
401
|
@property
|
|
229
|
-
def dates(self):
|
|
402
|
+
def dates(self) -> NDArray[np.datetime64]:
|
|
403
|
+
"""Returns the dates of the dataset."""
|
|
230
404
|
return self._dates
|
|
231
405
|
|
|
232
406
|
@property
|
|
233
|
-
def missing(self):
|
|
407
|
+
def missing(self) -> Set[int]:
|
|
408
|
+
"""Returns the set of missing indices."""
|
|
234
409
|
return self._missing
|
|
235
410
|
|
|
236
|
-
def __getitem__(self, n):
|
|
411
|
+
def __getitem__(self, n: FullIndex) -> NDArray[Any]:
|
|
412
|
+
"""Raises an error for missing dates.
|
|
413
|
+
|
|
414
|
+
Parameters
|
|
415
|
+
----------
|
|
416
|
+
n : FullIndex
|
|
417
|
+
The index to retrieve.
|
|
418
|
+
|
|
419
|
+
Raises
|
|
420
|
+
------
|
|
421
|
+
MissingDateError
|
|
422
|
+
If the date is missing.
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
NDArray[Any]: The data at the specified index.
|
|
426
|
+
"""
|
|
237
427
|
raise MissingDateError(f"Date {self.dates[n]} is missing (index={n})")
|
|
238
428
|
|
|
239
|
-
def tree(self):
|
|
429
|
+
def tree(self) -> Node:
|
|
430
|
+
"""Builds a tree representation of the missing dataset.
|
|
431
|
+
|
|
432
|
+
Returns
|
|
433
|
+
-------
|
|
434
|
+
Node
|
|
435
|
+
The tree representation of the missing dataset.
|
|
436
|
+
"""
|
|
240
437
|
return Node(self, [self.forward.tree()], start=self.start, end=self.end)
|
|
241
438
|
|
|
242
|
-
def
|
|
439
|
+
def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
|
|
440
|
+
"""Provides metadata specific to the subclass.
|
|
441
|
+
|
|
442
|
+
Returns
|
|
443
|
+
-------
|
|
444
|
+
Dict[str, Any]
|
|
445
|
+
Metadata specific to the subclass.
|
|
446
|
+
"""
|
|
243
447
|
return {"start": self.start, "end": self.end}
|