anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +22 -25
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +3 -6
- anemoi/datasets/create/filter.py +1 -2
- anemoi/datasets/create/input/__init__.py +1 -2
- anemoi/datasets/create/input/action.py +3 -5
- anemoi/datasets/create/input/concat.py +5 -8
- anemoi/datasets/create/input/context.py +3 -6
- anemoi/datasets/create/input/data_sources.py +5 -8
- anemoi/datasets/create/input/empty.py +1 -2
- anemoi/datasets/create/input/filter.py +2 -3
- anemoi/datasets/create/input/function.py +1 -2
- anemoi/datasets/create/input/join.py +4 -5
- anemoi/datasets/create/input/misc.py +4 -6
- anemoi/datasets/create/input/repeated_dates.py +13 -18
- anemoi/datasets/create/input/result.py +29 -33
- anemoi/datasets/create/input/step.py +4 -8
- anemoi/datasets/create/input/template.py +3 -4
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +47 -52
- anemoi/datasets/create/sources/accumulations2.py +4 -8
- anemoi/datasets/create/sources/constants.py +1 -3
- anemoi/datasets/create/sources/empty.py +1 -2
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +1 -2
- anemoi/datasets/create/sources/grib.py +6 -10
- anemoi/datasets/create/sources/grib_index.py +13 -15
- anemoi/datasets/create/sources/hindcasts.py +2 -5
- anemoi/datasets/create/sources/legacy.py +1 -1
- anemoi/datasets/create/sources/mars.py +17 -21
- anemoi/datasets/create/sources/netcdf.py +1 -2
- anemoi/datasets/create/sources/opendap.py +1 -3
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/recentre.py +8 -11
- anemoi/datasets/create/sources/source.py +3 -6
- anemoi/datasets/create/sources/tendencies.py +2 -5
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +12 -13
- anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
- anemoi/datasets/create/sources/xarray_support/field.py +16 -12
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
- anemoi/datasets/create/sources/xarray_zarr.py +1 -2
- anemoi/datasets/create/sources/zenodo.py +3 -5
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +4 -0
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +1 -2
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +7 -12
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +42 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +22 -26
- anemoi/datasets/data/grids.py +12 -16
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +16 -21
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +13 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/grids.py +5 -9
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/METADATA +4 -4
- anemoi_datasets-0.5.27.dist-info/RECORD +134 -0
- anemoi/datasets/utils/__init__.py +0 -8
- anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/top_level.txt +0 -0
anemoi/datasets/create/zarr.py
CHANGED
|
@@ -11,7 +11,6 @@ import datetime
|
|
|
11
11
|
import logging
|
|
12
12
|
import shutil
|
|
13
13
|
from typing import Any
|
|
14
|
-
from typing import Optional
|
|
15
14
|
|
|
16
15
|
import numpy as np
|
|
17
16
|
import zarr
|
|
@@ -120,7 +119,7 @@ class ZarrBuiltRegistry:
|
|
|
120
119
|
flags = None
|
|
121
120
|
z = None
|
|
122
121
|
|
|
123
|
-
def __init__(self, path: str, synchronizer_path:
|
|
122
|
+
def __init__(self, path: str, synchronizer_path: str | None = None, use_threads: bool = False):
|
|
124
123
|
"""Initialize the ZarrBuiltRegistry.
|
|
125
124
|
|
|
126
125
|
Parameters
|
|
@@ -154,6 +153,12 @@ class ZarrBuiltRegistry:
|
|
|
154
153
|
except FileNotFoundError:
|
|
155
154
|
pass
|
|
156
155
|
|
|
156
|
+
_build = self.zarr_path + "/_build"
|
|
157
|
+
try:
|
|
158
|
+
shutil.rmtree(_build)
|
|
159
|
+
except FileNotFoundError:
|
|
160
|
+
pass
|
|
161
|
+
|
|
157
162
|
def _open_write(self) -> zarr.Group:
|
|
158
163
|
"""Open the Zarr store in write mode."""
|
|
159
164
|
import zarr
|
anemoi/datasets/data/__init__.py
CHANGED
|
@@ -8,9 +8,9 @@
|
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
|
+
import os
|
|
11
12
|
from typing import TYPE_CHECKING
|
|
12
13
|
from typing import Any
|
|
13
|
-
from typing import Set
|
|
14
14
|
|
|
15
15
|
# from .dataset import FullIndex
|
|
16
16
|
# from .dataset import Shape
|
|
@@ -82,6 +82,9 @@ def open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
|
|
|
82
82
|
Dataset
|
|
83
83
|
The opened dataset.
|
|
84
84
|
"""
|
|
85
|
+
|
|
86
|
+
trace = int(os.environ.get("ANEMOI_DATASETS_TRACE", 0))
|
|
87
|
+
|
|
85
88
|
# That will get rid of OmegaConf objects
|
|
86
89
|
|
|
87
90
|
args, kwargs = _convert(args), _convert(kwargs)
|
|
@@ -90,22 +93,28 @@ def open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
|
|
|
90
93
|
ds = ds.mutate()
|
|
91
94
|
ds.arguments = {"args": args, "kwargs": kwargs}
|
|
92
95
|
ds._check()
|
|
96
|
+
|
|
97
|
+
if trace:
|
|
98
|
+
from anemoi.datasets.testing import Trace
|
|
99
|
+
|
|
100
|
+
ds = Trace(ds)
|
|
101
|
+
|
|
93
102
|
return ds
|
|
94
103
|
|
|
95
104
|
|
|
96
|
-
def save_dataset(
|
|
105
|
+
def save_dataset(dataset: "Dataset", zarr_path: str, n_workers: int = 1) -> None:
|
|
97
106
|
"""Open a dataset and save it to disk.
|
|
98
107
|
|
|
99
108
|
Parameters
|
|
100
109
|
----------
|
|
101
|
-
|
|
102
|
-
|
|
110
|
+
dataset : Dataset
|
|
111
|
+
anemoi-dataset opened from python to save to Zarr store
|
|
103
112
|
zarr_path : str
|
|
104
113
|
Path to store the obtained anemoi dataset to disk.
|
|
105
114
|
n_workers : int
|
|
106
115
|
Number of workers to use for parallel processing. If none, sequential processing will be performed.
|
|
107
116
|
"""
|
|
108
|
-
_save_dataset(
|
|
117
|
+
_save_dataset(dataset, zarr_path, n_workers)
|
|
109
118
|
|
|
110
119
|
|
|
111
120
|
def list_dataset_names(*args: Any, **kwargs: Any) -> list[str]:
|
|
@@ -124,6 +133,6 @@ def list_dataset_names(*args: Any, **kwargs: Any) -> list[str]:
|
|
|
124
133
|
The list of dataset names.
|
|
125
134
|
"""
|
|
126
135
|
ds = _open_dataset(*args, **kwargs)
|
|
127
|
-
names:
|
|
136
|
+
names: set[str] = set()
|
|
128
137
|
ds.get_dataset_names(names)
|
|
129
138
|
return sorted(names)
|
|
@@ -12,11 +12,6 @@ import logging
|
|
|
12
12
|
from abc import abstractmethod
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Optional
|
|
18
|
-
from typing import Set
|
|
19
|
-
from typing import Tuple
|
|
20
15
|
|
|
21
16
|
import numpy as np
|
|
22
17
|
from numpy.typing import NDArray
|
|
@@ -91,26 +86,26 @@ class Complement(Combined):
|
|
|
91
86
|
raise ValueError("Augment: no missing variables")
|
|
92
87
|
|
|
93
88
|
@property
|
|
94
|
-
def variables(self) ->
|
|
89
|
+
def variables(self) -> list[str]:
|
|
95
90
|
"""Returns the list of variables to be added to the target dataset."""
|
|
96
91
|
return self._variables
|
|
97
92
|
|
|
98
93
|
@property
|
|
99
|
-
def statistics(self) ->
|
|
94
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
100
95
|
datasets = [self._source, self._target]
|
|
101
96
|
return {
|
|
102
97
|
k: [d.statistics[k][d.name_to_index[i]] for d in datasets for i in d.variables if i in self.variables]
|
|
103
98
|
for k in datasets[0].statistics
|
|
104
99
|
}
|
|
105
100
|
|
|
106
|
-
def statistics_tendencies(self, delta:
|
|
101
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
107
102
|
index = [self._source.name_to_index[v] for v in self._variables]
|
|
108
103
|
if delta is None:
|
|
109
104
|
delta = self.frequency
|
|
110
105
|
return {k: v[index] for k, v in self._source.statistics_tendencies(delta).items()}
|
|
111
106
|
|
|
112
107
|
@property
|
|
113
|
-
def name_to_index(self) ->
|
|
108
|
+
def name_to_index(self) -> dict[str, int]:
|
|
114
109
|
"""Returns a dictionary mapping variable names to their indices."""
|
|
115
110
|
return {v: i for i, v in enumerate(self.variables)}
|
|
116
111
|
|
|
@@ -121,7 +116,7 @@ class Complement(Combined):
|
|
|
121
116
|
return (shape[0], len(self._variables)) + shape[2:]
|
|
122
117
|
|
|
123
118
|
@property
|
|
124
|
-
def variables_metadata(self) ->
|
|
119
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
125
120
|
"""Returns the metadata of the variables to be added to the target dataset."""
|
|
126
121
|
# Merge the two dicts first
|
|
127
122
|
all_meta = {**self._source.variables_metadata, **self._target.variables_metadata}
|
|
@@ -142,7 +137,7 @@ class Complement(Combined):
|
|
|
142
137
|
pass
|
|
143
138
|
|
|
144
139
|
@cached_property
|
|
145
|
-
def missing(self) ->
|
|
140
|
+
def missing(self) -> set[int]:
|
|
146
141
|
"""Returns the set of missing indices in the source and target datasets."""
|
|
147
142
|
missing = self._source.missing.copy()
|
|
148
143
|
missing = missing | self._target.missing
|
|
@@ -317,7 +312,7 @@ class ComplementNearest(Complement):
|
|
|
317
312
|
return apply_index_to_slices_changes(result, changes)
|
|
318
313
|
|
|
319
314
|
|
|
320
|
-
def complement_factory(args:
|
|
315
|
+
def complement_factory(args: tuple, kwargs: dict) -> Dataset:
|
|
321
316
|
"""Factory function to create a Complement instance based on the provided arguments.
|
|
322
317
|
|
|
323
318
|
Parameters
|
anemoi/datasets/data/concat.py
CHANGED
|
@@ -11,9 +11,6 @@
|
|
|
11
11
|
import logging
|
|
12
12
|
from functools import cached_property
|
|
13
13
|
from typing import Any
|
|
14
|
-
from typing import List
|
|
15
|
-
from typing import Set
|
|
16
|
-
from typing import Tuple
|
|
17
14
|
|
|
18
15
|
import numpy as np
|
|
19
16
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
@@ -123,12 +120,12 @@ class ConcatMixin:
|
|
|
123
120
|
return np.concatenate(result)
|
|
124
121
|
|
|
125
122
|
@cached_property
|
|
126
|
-
def missing(self) ->
|
|
123
|
+
def missing(self) -> set[int]:
|
|
127
124
|
"""Returns the set of missing indices in the concatenated datasets."""
|
|
128
|
-
result:
|
|
125
|
+
result: set[int] = set()
|
|
129
126
|
offset = 0
|
|
130
127
|
for d in self.datasets:
|
|
131
|
-
result = result |
|
|
128
|
+
result = result | {m + offset for m in d.missing}
|
|
132
129
|
offset += len(d)
|
|
133
130
|
return result
|
|
134
131
|
|
|
@@ -195,7 +192,7 @@ class Concat(ConcatMixin, Combined):
|
|
|
195
192
|
return Node(self, [d.tree() for d in self.datasets])
|
|
196
193
|
|
|
197
194
|
@classmethod
|
|
198
|
-
def check_dataset_compatibility(cls, datasets:
|
|
195
|
+
def check_dataset_compatibility(cls, datasets: list[Any], fill_missing_gaps: bool = False) -> list[Any]:
|
|
199
196
|
"""Checks the compatibility of the datasets for concatenation and fills missing gaps if required.
|
|
200
197
|
|
|
201
198
|
Parameters
|
|
@@ -259,7 +256,7 @@ class Concat(ConcatMixin, Combined):
|
|
|
259
256
|
return {}
|
|
260
257
|
|
|
261
258
|
|
|
262
|
-
def concat_factory(args:
|
|
259
|
+
def concat_factory(args: tuple[Any, ...], kwargs: dict) -> Concat:
|
|
263
260
|
"""Factory function to create a Concat object.
|
|
264
261
|
|
|
265
262
|
Parameters
|
anemoi/datasets/data/dataset.py
CHANGED
|
@@ -22,15 +22,10 @@ try:
|
|
|
22
22
|
except ImportError:
|
|
23
23
|
# Python 3.9
|
|
24
24
|
EllipsisType = type(Ellipsis)
|
|
25
|
+
from collections.abc import Sequence
|
|
26
|
+
from collections.abc import Sized
|
|
25
27
|
from typing import TYPE_CHECKING
|
|
26
28
|
from typing import Any
|
|
27
|
-
from typing import Dict
|
|
28
|
-
from typing import List
|
|
29
|
-
from typing import Optional
|
|
30
|
-
from typing import Sequence
|
|
31
|
-
from typing import Set
|
|
32
|
-
from typing import Sized
|
|
33
|
-
from typing import Tuple
|
|
34
29
|
from typing import Union
|
|
35
30
|
|
|
36
31
|
import numpy as np
|
|
@@ -48,8 +43,8 @@ if TYPE_CHECKING:
|
|
|
48
43
|
LOG = logging.getLogger(__name__)
|
|
49
44
|
|
|
50
45
|
|
|
51
|
-
Shape =
|
|
52
|
-
TupleIndex =
|
|
46
|
+
Shape = tuple[int, ...]
|
|
47
|
+
TupleIndex = tuple[Union[int, slice, EllipsisType], ...]
|
|
53
48
|
FullIndex = Union[int, slice, TupleIndex]
|
|
54
49
|
|
|
55
50
|
|
|
@@ -92,8 +87,8 @@ def _tidy(v: Any) -> Any:
|
|
|
92
87
|
|
|
93
88
|
|
|
94
89
|
class Dataset(ABC, Sized):
|
|
95
|
-
arguments:
|
|
96
|
-
_name:
|
|
90
|
+
arguments: dict[str, Any] = {}
|
|
91
|
+
_name: str | None = None
|
|
97
92
|
|
|
98
93
|
def mutate(self) -> "Dataset":
|
|
99
94
|
"""Give an opportunity to a subclass to return a new Dataset object of a different class, if needed.
|
|
@@ -148,7 +143,7 @@ class Dataset(ABC, Sized):
|
|
|
148
143
|
return result
|
|
149
144
|
|
|
150
145
|
@property
|
|
151
|
-
def name(self) ->
|
|
146
|
+
def name(self) -> str | None:
|
|
152
147
|
"""Return the name of the dataset."""
|
|
153
148
|
return self._name
|
|
154
149
|
|
|
@@ -360,9 +355,9 @@ class Dataset(ABC, Sized):
|
|
|
360
355
|
|
|
361
356
|
def _dates_to_indices(
|
|
362
357
|
self,
|
|
363
|
-
start:
|
|
364
|
-
end:
|
|
365
|
-
) ->
|
|
358
|
+
start: None | str | datetime.datetime,
|
|
359
|
+
end: None | str | datetime.datetime,
|
|
360
|
+
) -> list[int]:
|
|
366
361
|
"""Convert date range to a list of indices.
|
|
367
362
|
|
|
368
363
|
Parameters
|
|
@@ -387,7 +382,7 @@ class Dataset(ABC, Sized):
|
|
|
387
382
|
|
|
388
383
|
return [i for i, date in enumerate(self.dates) if start <= date <= end]
|
|
389
384
|
|
|
390
|
-
def _select_to_columns(self, vars:
|
|
385
|
+
def _select_to_columns(self, vars: str | list[str] | tuple[str] | set) -> list[int]:
|
|
391
386
|
"""Convert variable names to a list of column indices.
|
|
392
387
|
|
|
393
388
|
Parameters
|
|
@@ -411,7 +406,7 @@ class Dataset(ABC, Sized):
|
|
|
411
406
|
|
|
412
407
|
return [self.name_to_index[v] for v in vars]
|
|
413
408
|
|
|
414
|
-
def _drop_to_columns(self, vars:
|
|
409
|
+
def _drop_to_columns(self, vars: str | Sequence[str]) -> list[int]:
|
|
415
410
|
"""Convert variable names to a list of column indices to drop.
|
|
416
411
|
|
|
417
412
|
Parameters
|
|
@@ -432,7 +427,7 @@ class Dataset(ABC, Sized):
|
|
|
432
427
|
|
|
433
428
|
return sorted([v for k, v in self.name_to_index.items() if k not in vars])
|
|
434
429
|
|
|
435
|
-
def _reorder_to_columns(self, vars:
|
|
430
|
+
def _reorder_to_columns(self, vars: str | list[str] | tuple[str] | dict[str, int]) -> list[int]:
|
|
436
431
|
"""Convert variable names to a list of reordered column indices.
|
|
437
432
|
|
|
438
433
|
Parameters
|
|
@@ -465,8 +460,8 @@ class Dataset(ABC, Sized):
|
|
|
465
460
|
return indices
|
|
466
461
|
|
|
467
462
|
def dates_interval_to_indices(
|
|
468
|
-
self, start:
|
|
469
|
-
) ->
|
|
463
|
+
self, start: None | str | datetime.datetime, end: None | str | datetime.datetime
|
|
464
|
+
) -> list[int]:
|
|
470
465
|
"""Convert date interval to a list of indices.
|
|
471
466
|
|
|
472
467
|
Parameters
|
|
@@ -483,7 +478,7 @@ class Dataset(ABC, Sized):
|
|
|
483
478
|
"""
|
|
484
479
|
return self._dates_to_indices(start, end)
|
|
485
480
|
|
|
486
|
-
def provenance(self) ->
|
|
481
|
+
def provenance(self) -> dict[str, Any]:
|
|
487
482
|
"""Return the provenance information of the dataset.
|
|
488
483
|
|
|
489
484
|
Returns
|
|
@@ -511,7 +506,7 @@ class Dataset(ABC, Sized):
|
|
|
511
506
|
return tuple(shape)
|
|
512
507
|
|
|
513
508
|
@property
|
|
514
|
-
def typed_variables(self) ->
|
|
509
|
+
def typed_variables(self) -> dict[str, Any]:
|
|
515
510
|
"""Return the variables with their types."""
|
|
516
511
|
from anemoi.transform.variables import Variable
|
|
517
512
|
|
|
@@ -532,7 +527,7 @@ class Dataset(ABC, Sized):
|
|
|
532
527
|
|
|
533
528
|
return result
|
|
534
529
|
|
|
535
|
-
def _input_sources(self) ->
|
|
530
|
+
def _input_sources(self) -> list[Any]:
|
|
536
531
|
"""Return the input sources of the dataset.
|
|
537
532
|
|
|
538
533
|
Returns
|
|
@@ -544,7 +539,7 @@ class Dataset(ABC, Sized):
|
|
|
544
539
|
self.collect_input_sources(sources)
|
|
545
540
|
return sources
|
|
546
541
|
|
|
547
|
-
def metadata(self) ->
|
|
542
|
+
def metadata(self) -> dict[str, Any]:
|
|
548
543
|
"""Return the metadata of the dataset.
|
|
549
544
|
|
|
550
545
|
Returns
|
|
@@ -588,7 +583,7 @@ class Dataset(ABC, Sized):
|
|
|
588
583
|
"""Return the end date of the dataset."""
|
|
589
584
|
return self.dates[-1]
|
|
590
585
|
|
|
591
|
-
def dataset_metadata(self) ->
|
|
586
|
+
def dataset_metadata(self) -> dict[str, Any]:
|
|
592
587
|
"""Return the metadata of the dataset.
|
|
593
588
|
|
|
594
589
|
Returns
|
|
@@ -608,7 +603,7 @@ class Dataset(ABC, Sized):
|
|
|
608
603
|
name=self.name,
|
|
609
604
|
)
|
|
610
605
|
|
|
611
|
-
def _supporting_arrays(self, *path: str) ->
|
|
606
|
+
def _supporting_arrays(self, *path: str) -> dict[str, NDArray[Any]]:
|
|
612
607
|
"""Return the supporting arrays of the dataset.
|
|
613
608
|
|
|
614
609
|
Parameters
|
|
@@ -646,7 +641,7 @@ class Dataset(ABC, Sized):
|
|
|
646
641
|
|
|
647
642
|
return result
|
|
648
643
|
|
|
649
|
-
def supporting_arrays(self) ->
|
|
644
|
+
def supporting_arrays(self) -> dict[str, NDArray[Any]]:
|
|
650
645
|
"""Return the supporting arrays to be saved in the checkpoints.
|
|
651
646
|
|
|
652
647
|
Returns
|
|
@@ -657,7 +652,7 @@ class Dataset(ABC, Sized):
|
|
|
657
652
|
arrays, _ = self._supporting_arrays_and_sources()
|
|
658
653
|
return arrays
|
|
659
654
|
|
|
660
|
-
def _supporting_arrays_and_sources(self) ->
|
|
655
|
+
def _supporting_arrays_and_sources(self) -> tuple[dict[str, NDArray], dict[int, list[str]]]:
|
|
661
656
|
"""Return the supporting arrays and their sources.
|
|
662
657
|
|
|
663
658
|
Returns
|
|
@@ -684,7 +679,7 @@ class Dataset(ABC, Sized):
|
|
|
684
679
|
|
|
685
680
|
return result, source_to_arrays
|
|
686
681
|
|
|
687
|
-
def collect_supporting_arrays(self, collected:
|
|
682
|
+
def collect_supporting_arrays(self, collected: list[tuple[tuple[str, ...], str, NDArray[Any]]], *path: str) -> None:
|
|
688
683
|
"""Collect supporting arrays.
|
|
689
684
|
|
|
690
685
|
Parameters
|
|
@@ -697,7 +692,7 @@ class Dataset(ABC, Sized):
|
|
|
697
692
|
# Override this method to add more arrays
|
|
698
693
|
pass
|
|
699
694
|
|
|
700
|
-
def metadata_specific(self, **kwargs: Any) ->
|
|
695
|
+
def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
|
|
701
696
|
"""Return specific metadata of the dataset.
|
|
702
697
|
|
|
703
698
|
Parameters
|
|
@@ -764,7 +759,7 @@ class Dataset(ABC, Sized):
|
|
|
764
759
|
"""Return the label of the dataset."""
|
|
765
760
|
return self.__class__.__name__.lower()
|
|
766
761
|
|
|
767
|
-
def computed_constant_fields(self) ->
|
|
762
|
+
def computed_constant_fields(self) -> list[str]:
|
|
768
763
|
"""Return the computed constant fields of the dataset.
|
|
769
764
|
|
|
770
765
|
Returns
|
|
@@ -781,7 +776,7 @@ class Dataset(ABC, Sized):
|
|
|
781
776
|
|
|
782
777
|
return sorted(self._compute_constant_fields_from_a_few_samples())
|
|
783
778
|
|
|
784
|
-
def _compute_constant_fields_from_a_few_samples(self) ->
|
|
779
|
+
def _compute_constant_fields_from_a_few_samples(self) -> list[str]:
|
|
785
780
|
"""Compute constant fields from a few samples.
|
|
786
781
|
|
|
787
782
|
Returns
|
|
@@ -822,7 +817,7 @@ class Dataset(ABC, Sized):
|
|
|
822
817
|
|
|
823
818
|
return [v for i, v in enumerate(self.variables) if constants[i]]
|
|
824
819
|
|
|
825
|
-
def _compute_constant_fields_from_statistics(self) ->
|
|
820
|
+
def _compute_constant_fields_from_statistics(self) -> list[str]:
|
|
826
821
|
"""Compute constant fields from statistics.
|
|
827
822
|
|
|
828
823
|
Returns
|
|
@@ -842,8 +837,8 @@ class Dataset(ABC, Sized):
|
|
|
842
837
|
|
|
843
838
|
def plot(
|
|
844
839
|
self,
|
|
845
|
-
date:
|
|
846
|
-
variable:
|
|
840
|
+
date: int | datetime.datetime | np.datetime64 | str,
|
|
841
|
+
variable: int | str,
|
|
847
842
|
member: int = 0,
|
|
848
843
|
**kwargs: Any,
|
|
849
844
|
) -> "matplotlib.pyplot.Axes":
|
|
@@ -873,10 +868,10 @@ class Dataset(ABC, Sized):
|
|
|
873
868
|
|
|
874
869
|
def to_index(
|
|
875
870
|
self,
|
|
876
|
-
date:
|
|
877
|
-
variable:
|
|
871
|
+
date: int | datetime.datetime | np.datetime64 | str,
|
|
872
|
+
variable: int | str,
|
|
878
873
|
member: int = 0,
|
|
879
|
-
) ->
|
|
874
|
+
) -> tuple[int, int, int]:
|
|
880
875
|
"""Convert date, variable, and member to indices.
|
|
881
876
|
|
|
882
877
|
Parameters
|
|
@@ -945,7 +940,7 @@ class Dataset(ABC, Sized):
|
|
|
945
940
|
|
|
946
941
|
@property
|
|
947
942
|
@abstractmethod
|
|
948
|
-
def variables(self) ->
|
|
943
|
+
def variables(self) -> list[str]:
|
|
949
944
|
"""Return the list of variables in the dataset."""
|
|
950
945
|
pass
|
|
951
946
|
|
|
@@ -969,7 +964,7 @@ class Dataset(ABC, Sized):
|
|
|
969
964
|
|
|
970
965
|
@property
|
|
971
966
|
@abstractmethod
|
|
972
|
-
def name_to_index(self) ->
|
|
967
|
+
def name_to_index(self) -> dict[str, int]:
|
|
973
968
|
"""Return the mapping of variable names to indices."""
|
|
974
969
|
pass
|
|
975
970
|
|
|
@@ -1005,30 +1000,30 @@ class Dataset(ABC, Sized):
|
|
|
1005
1000
|
|
|
1006
1001
|
@property
|
|
1007
1002
|
@abstractmethod
|
|
1008
|
-
def variables_metadata(self) ->
|
|
1003
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
1009
1004
|
"""Return the metadata of the variables in the dataset."""
|
|
1010
1005
|
pass
|
|
1011
1006
|
|
|
1012
1007
|
@abstractmethod
|
|
1013
1008
|
@cached_property
|
|
1014
|
-
def missing(self) ->
|
|
1009
|
+
def missing(self) -> set[int]:
|
|
1015
1010
|
"""Return the set of missing indices in the dataset."""
|
|
1016
1011
|
pass
|
|
1017
1012
|
|
|
1018
1013
|
@abstractmethod
|
|
1019
1014
|
@cached_property
|
|
1020
|
-
def constant_fields(self) ->
|
|
1015
|
+
def constant_fields(self) -> list[str]:
|
|
1021
1016
|
"""Return the list of constant fields in the dataset."""
|
|
1022
1017
|
pass
|
|
1023
1018
|
|
|
1024
1019
|
@abstractmethod
|
|
1025
1020
|
@cached_property
|
|
1026
|
-
def statistics(self) ->
|
|
1021
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
1027
1022
|
"""Return the statistics of the dataset."""
|
|
1028
1023
|
pass
|
|
1029
1024
|
|
|
1030
1025
|
@abstractmethod
|
|
1031
|
-
def statistics_tendencies(self, delta:
|
|
1026
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
1032
1027
|
"""Return the tendencies of the statistics in the dataset.
|
|
1033
1028
|
|
|
1034
1029
|
Parameters
|
|
@@ -1071,7 +1066,7 @@ class Dataset(ABC, Sized):
|
|
|
1071
1066
|
pass
|
|
1072
1067
|
|
|
1073
1068
|
@abstractmethod
|
|
1074
|
-
def collect_input_sources(self, sources:
|
|
1069
|
+
def collect_input_sources(self, sources: list[Any]) -> None:
|
|
1075
1070
|
"""Collect the input sources of the dataset.
|
|
1076
1071
|
|
|
1077
1072
|
Parameters
|
|
@@ -1082,7 +1077,7 @@ class Dataset(ABC, Sized):
|
|
|
1082
1077
|
pass
|
|
1083
1078
|
|
|
1084
1079
|
@abstractmethod
|
|
1085
|
-
def get_dataset_names(self, names:
|
|
1080
|
+
def get_dataset_names(self, names: set[str]) -> None:
|
|
1086
1081
|
"""Get the names of the datasets.
|
|
1087
1082
|
|
|
1088
1083
|
Parameters
|
anemoi/datasets/data/debug.py
CHANGED
|
@@ -11,12 +11,10 @@
|
|
|
11
11
|
import logging
|
|
12
12
|
import os
|
|
13
13
|
import textwrap
|
|
14
|
+
from collections.abc import Callable
|
|
14
15
|
from functools import wraps
|
|
15
16
|
from typing import TYPE_CHECKING
|
|
16
17
|
from typing import Any
|
|
17
|
-
from typing import Callable
|
|
18
|
-
from typing import List
|
|
19
|
-
from typing import Optional
|
|
20
18
|
|
|
21
19
|
from anemoi.utils.text import Tree
|
|
22
20
|
from numpy.typing import NDArray
|
|
@@ -56,7 +54,7 @@ def css(name: str) -> str:
|
|
|
56
54
|
class Node:
|
|
57
55
|
"""A class to represent a node in a dataset tree."""
|
|
58
56
|
|
|
59
|
-
def __init__(self, dataset: "Dataset", kids:
|
|
57
|
+
def __init__(self, dataset: "Dataset", kids: list[Any], **kwargs: Any) -> None:
|
|
60
58
|
"""Initializes a Node object.
|
|
61
59
|
|
|
62
60
|
Parameters
|
|
@@ -72,7 +70,7 @@ class Node:
|
|
|
72
70
|
self.kids = kids
|
|
73
71
|
self.kwargs = kwargs
|
|
74
72
|
|
|
75
|
-
def _put(self, indent: int, result:
|
|
73
|
+
def _put(self, indent: int, result: list[str]) -> None:
|
|
76
74
|
"""Helper method to add the node representation to the result list.
|
|
77
75
|
|
|
78
76
|
Parameters
|
|
@@ -103,11 +101,11 @@ class Node:
|
|
|
103
101
|
str
|
|
104
102
|
String representation of the node.
|
|
105
103
|
"""
|
|
106
|
-
result:
|
|
104
|
+
result: list[str] = []
|
|
107
105
|
self._put(0, result)
|
|
108
106
|
return "\n".join(result)
|
|
109
107
|
|
|
110
|
-
def graph(self, digraph:
|
|
108
|
+
def graph(self, digraph: list[str], nodes: dict) -> None:
|
|
111
109
|
"""Generates a graph representation of the node.
|
|
112
110
|
|
|
113
111
|
Parameters
|
|
@@ -170,7 +168,7 @@ class Node:
|
|
|
170
168
|
digraph.append("}")
|
|
171
169
|
return "\n".join(digraph)
|
|
172
170
|
|
|
173
|
-
def _html(self, indent: str, rows:
|
|
171
|
+
def _html(self, indent: str, rows: list[list[str]]) -> None:
|
|
174
172
|
"""Helper method to add the node representation to the HTML rows.
|
|
175
173
|
|
|
176
174
|
Parameters
|
|
@@ -273,7 +271,7 @@ class Node:
|
|
|
273
271
|
class Source:
|
|
274
272
|
"""A class used to follow the provenance of a data point."""
|
|
275
273
|
|
|
276
|
-
def __init__(self, dataset: Any, index: int, source:
|
|
274
|
+
def __init__(self, dataset: Any, index: int, source: Any | None = None, info: Any | None = None) -> None:
|
|
277
275
|
"""Initializes a Source object.
|
|
278
276
|
|
|
279
277
|
Parameters
|
anemoi/datasets/data/ensemble.py
CHANGED
|
@@ -10,8 +10,6 @@
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
12
12
|
from typing import Any
|
|
13
|
-
from typing import Dict
|
|
14
|
-
from typing import Tuple
|
|
15
13
|
|
|
16
14
|
import numpy as np
|
|
17
15
|
from numpy.typing import NDArray
|
|
@@ -105,7 +103,7 @@ class Number(Forwards):
|
|
|
105
103
|
"""
|
|
106
104
|
return Node(self, [self.forward.tree()], numbers=[n + 1 for n in self.members])
|
|
107
105
|
|
|
108
|
-
def metadata_specific(self, **kwargs: Any) ->
|
|
106
|
+
def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
|
|
109
107
|
"""Returns metadata specific to the Number object.
|
|
110
108
|
|
|
111
109
|
Parameters
|
|
@@ -122,7 +120,7 @@ class Number(Forwards):
|
|
|
122
120
|
"numbers": [n + 1 for n in self.members],
|
|
123
121
|
}
|
|
124
122
|
|
|
125
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
123
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
126
124
|
"""Returns metadata specific to the Number object."""
|
|
127
125
|
return {}
|
|
128
126
|
|
|
@@ -140,7 +138,7 @@ class Ensemble(GivenAxis):
|
|
|
140
138
|
"""
|
|
141
139
|
return Node(self, [d.tree() for d in self.datasets])
|
|
142
140
|
|
|
143
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
141
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
144
142
|
"""Get the metadata specific to the forwards subclass.
|
|
145
143
|
|
|
146
144
|
Returns:
|
|
@@ -149,7 +147,7 @@ class Ensemble(GivenAxis):
|
|
|
149
147
|
return {}
|
|
150
148
|
|
|
151
149
|
|
|
152
|
-
def ensemble_factory(args:
|
|
150
|
+
def ensemble_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> Ensemble:
|
|
153
151
|
"""Factory function to create an Ensemble object.
|
|
154
152
|
|
|
155
153
|
Parameters
|
|
@@ -10,9 +10,6 @@
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
12
12
|
from typing import Any
|
|
13
|
-
from typing import Dict
|
|
14
|
-
from typing import Optional
|
|
15
|
-
from typing import Set
|
|
16
13
|
|
|
17
14
|
import numpy as np
|
|
18
15
|
from numpy.typing import NDArray
|
|
@@ -46,7 +43,7 @@ class MissingDatesFill(Forwards):
|
|
|
46
43
|
"""
|
|
47
44
|
super().__init__(dataset)
|
|
48
45
|
self._missing = set(dataset.missing)
|
|
49
|
-
self._warnings:
|
|
46
|
+
self._warnings: set[int] = set()
|
|
50
47
|
|
|
51
48
|
@debug_indexing
|
|
52
49
|
@expand_list_indexing
|
|
@@ -84,7 +81,7 @@ class MissingDatesFill(Forwards):
|
|
|
84
81
|
return np.stack([self[i] for i in range(*s.indices(self._len))])
|
|
85
82
|
|
|
86
83
|
@property
|
|
87
|
-
def missing(self) ->
|
|
84
|
+
def missing(self) -> set[int]:
|
|
88
85
|
"""Get the set of missing dates."""
|
|
89
86
|
return set()
|
|
90
87
|
|
|
@@ -153,7 +150,7 @@ class MissingDatesClosest(MissingDatesFill):
|
|
|
153
150
|
self.closest = closest
|
|
154
151
|
self._closest = {}
|
|
155
152
|
|
|
156
|
-
def _fill_missing(self, n: int, a:
|
|
153
|
+
def _fill_missing(self, n: int, a: int | None, b: int | None) -> NDArray[Any]:
|
|
157
154
|
"""Fill the missing date at the given index.
|
|
158
155
|
|
|
159
156
|
Parameters
|
|
@@ -189,7 +186,7 @@ class MissingDatesClosest(MissingDatesFill):
|
|
|
189
186
|
|
|
190
187
|
return self.forward[self._closest[n]]
|
|
191
188
|
|
|
192
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
189
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
193
190
|
"""Get metadata specific to the subclass.
|
|
194
191
|
|
|
195
192
|
Returns
|
|
@@ -224,7 +221,7 @@ class MissingDatesInterpolate(MissingDatesFill):
|
|
|
224
221
|
super().__init__(dataset)
|
|
225
222
|
self._alpha = {}
|
|
226
223
|
|
|
227
|
-
def _fill_missing(self, n: int, a:
|
|
224
|
+
def _fill_missing(self, n: int, a: int | None, b: int | None) -> NDArray[Any]:
|
|
228
225
|
"""Fill the missing date at the given index using interpolation.
|
|
229
226
|
|
|
230
227
|
Parameters
|
|
@@ -264,7 +261,7 @@ class MissingDatesInterpolate(MissingDatesFill):
|
|
|
264
261
|
alpha = self._alpha[n]
|
|
265
262
|
return self.forward[a] * (1 - alpha) + self.forward[b] * alpha
|
|
266
263
|
|
|
267
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
264
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
268
265
|
"""Get metadata specific to the subclass.
|
|
269
266
|
|
|
270
267
|
Returns
|
|
@@ -285,7 +282,7 @@ class MissingDatesInterpolate(MissingDatesFill):
|
|
|
285
282
|
return Node(self, [self.forward.tree()])
|
|
286
283
|
|
|
287
284
|
|
|
288
|
-
def fill_missing_dates_factory(dataset: Any, method: str, kwargs:
|
|
285
|
+
def fill_missing_dates_factory(dataset: Any, method: str, kwargs: dict[str, Any]) -> Dataset:
|
|
289
286
|
"""Factory function to create an instance of a class to fill missing dates.
|
|
290
287
|
|
|
291
288
|
Parameters
|