anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/recipe/__init__.py +93 -0
- anemoi/datasets/commands/recipe/format.py +55 -0
- anemoi/datasets/commands/recipe/migrate.py +555 -0
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +64 -26
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +5 -6
- anemoi/datasets/create/input/__init__.py +44 -65
- anemoi/datasets/create/input/action.py +296 -238
- anemoi/datasets/create/input/context/__init__.py +71 -0
- anemoi/datasets/create/input/context/field.py +54 -0
- anemoi/datasets/create/input/data_sources.py +7 -9
- anemoi/datasets/create/input/misc.py +2 -75
- anemoi/datasets/create/input/repeated_dates.py +11 -130
- anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
- anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +120 -145
- anemoi/datasets/create/sources/accumulations2.py +20 -53
- anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
- anemoi/datasets/create/sources/constants.py +39 -40
- anemoi/datasets/create/sources/empty.py +22 -19
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +29 -29
- anemoi/datasets/create/sources/grib.py +94 -78
- anemoi/datasets/create/sources/grib_index.py +57 -55
- anemoi/datasets/create/sources/hindcasts.py +57 -59
- anemoi/datasets/create/sources/legacy.py +10 -62
- anemoi/datasets/create/sources/mars.py +121 -149
- anemoi/datasets/create/sources/netcdf.py +28 -25
- anemoi/datasets/create/sources/opendap.py +28 -26
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/recentre.py +46 -48
- anemoi/datasets/create/sources/repeated_dates.py +44 -0
- anemoi/datasets/create/sources/source.py +26 -51
- anemoi/datasets/create/sources/tendencies.py +68 -98
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
- anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
- anemoi/datasets/create/sources/xarray_support/field.py +20 -16
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
- anemoi/datasets/create/sources/xarray_zarr.py +28 -25
- anemoi/datasets/create/sources/zenodo.py +43 -41
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +4 -0
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +0 -43
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +7 -12
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +48 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +22 -26
- anemoi/datasets/data/grids.py +12 -168
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/rolling_average.py +141 -0
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +22 -29
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +15 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/dumper.py +76 -0
- anemoi/datasets/grids.py +4 -185
- anemoi/datasets/schemas/recipe.json +131 -0
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
- anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
- anemoi/datasets/create/filter.py +0 -48
- anemoi/datasets/create/input/concat.py +0 -164
- anemoi/datasets/create/input/context.py +0 -89
- anemoi/datasets/create/input/empty.py +0 -54
- anemoi/datasets/create/input/filter.py +0 -118
- anemoi/datasets/create/input/function.py +0 -233
- anemoi/datasets/create/input/join.py +0 -130
- anemoi/datasets/create/input/pipe.py +0 -66
- anemoi/datasets/create/input/step.py +0 -177
- anemoi/datasets/create/input/template.py +0 -162
- anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
|
@@ -17,9 +17,6 @@ import pickle
|
|
|
17
17
|
import shutil
|
|
18
18
|
import socket
|
|
19
19
|
from typing import Any
|
|
20
|
-
from typing import List
|
|
21
|
-
from typing import Optional
|
|
22
|
-
from typing import Union
|
|
23
20
|
|
|
24
21
|
import numpy as np
|
|
25
22
|
import tqdm
|
|
@@ -77,7 +74,7 @@ def default_statistics_dates(dates: list[datetime.datetime]) -> tuple[datetime.d
|
|
|
77
74
|
return dates[0], end
|
|
78
75
|
|
|
79
76
|
|
|
80
|
-
def to_datetime(date:
|
|
77
|
+
def to_datetime(date: str | datetime.datetime) -> np.datetime64:
|
|
81
78
|
"""Convert a date to numpy datetime64 format.
|
|
82
79
|
|
|
83
80
|
Parameters
|
|
@@ -97,7 +94,7 @@ def to_datetime(date: Union[str, datetime.datetime]) -> np.datetime64:
|
|
|
97
94
|
return date
|
|
98
95
|
|
|
99
96
|
|
|
100
|
-
def to_datetimes(dates: list[
|
|
97
|
+
def to_datetimes(dates: list[str | datetime.datetime]) -> list[np.datetime64]:
|
|
101
98
|
"""Convert a list of dates to numpy datetime64 format.
|
|
102
99
|
|
|
103
100
|
Parameters
|
|
@@ -221,7 +218,7 @@ def check_variance(
|
|
|
221
218
|
|
|
222
219
|
|
|
223
220
|
def compute_statistics(
|
|
224
|
-
array: NDArray[Any], check_variables_names:
|
|
221
|
+
array: NDArray[Any], check_variables_names: list[str] | None = None, allow_nans: bool = False
|
|
225
222
|
) -> dict[str, np.ndarray]:
|
|
226
223
|
"""Compute statistics for a given array, provides minimum, maximum, sum, squares, count and has_nans as a dictionary.
|
|
227
224
|
|
anemoi/datasets/create/typing.py
CHANGED
anemoi/datasets/create/utils.py
CHANGED
|
@@ -13,7 +13,6 @@ import os
|
|
|
13
13
|
import warnings
|
|
14
14
|
from contextlib import contextmanager
|
|
15
15
|
from typing import Any
|
|
16
|
-
from typing import Union
|
|
17
16
|
|
|
18
17
|
import numpy as np
|
|
19
18
|
from earthkit.data import settings
|
|
@@ -97,48 +96,6 @@ def to_datetime(*args: Any, **kwargs: Any) -> datetime.datetime:
|
|
|
97
96
|
return to_datetime_(*args, **kwargs)
|
|
98
97
|
|
|
99
98
|
|
|
100
|
-
def make_list_int(value: Union[str, list, tuple, int]) -> list[int]:
|
|
101
|
-
"""Convert a string, list, tuple, or integer to a list of integers.
|
|
102
|
-
|
|
103
|
-
Parameters
|
|
104
|
-
----------
|
|
105
|
-
value : str or list or tuple or int
|
|
106
|
-
The value to convert.
|
|
107
|
-
|
|
108
|
-
Returns
|
|
109
|
-
-------
|
|
110
|
-
list[int]
|
|
111
|
-
A list of integers.
|
|
112
|
-
|
|
113
|
-
Raises
|
|
114
|
-
------
|
|
115
|
-
ValueError
|
|
116
|
-
If the value cannot be converted to a list of integers.
|
|
117
|
-
"""
|
|
118
|
-
# Convert a string like "1/2/3" or "1/to/3" or "1/to/10/by/2" to a list of integers.
|
|
119
|
-
# Moved to anemoi.utils.humanize
|
|
120
|
-
# replace with from anemoi.utils.humanize import make_list_int
|
|
121
|
-
# when anemoi-utils is released and pyproject.toml is updated
|
|
122
|
-
if isinstance(value, str):
|
|
123
|
-
if "/" not in value:
|
|
124
|
-
return [value]
|
|
125
|
-
bits = value.split("/")
|
|
126
|
-
if len(bits) == 3 and bits[1].lower() == "to":
|
|
127
|
-
value = list(range(int(bits[0]), int(bits[2]) + 1, 1))
|
|
128
|
-
|
|
129
|
-
elif len(bits) == 5 and bits[1].lower() == "to" and bits[3].lower() == "by":
|
|
130
|
-
value = list(range(int(bits[0]), int(bits[2]) + int(bits[4]), int(bits[4])))
|
|
131
|
-
|
|
132
|
-
if isinstance(value, list):
|
|
133
|
-
return value
|
|
134
|
-
if isinstance(value, tuple):
|
|
135
|
-
return value
|
|
136
|
-
if isinstance(value, int):
|
|
137
|
-
return [value]
|
|
138
|
-
|
|
139
|
-
raise ValueError(f"Cannot make list from {value}")
|
|
140
|
-
|
|
141
|
-
|
|
142
99
|
def normalize_and_check_dates(
|
|
143
100
|
dates: list[datetime.datetime],
|
|
144
101
|
start: datetime.datetime,
|
anemoi/datasets/create/zarr.py
CHANGED
|
@@ -11,7 +11,6 @@ import datetime
|
|
|
11
11
|
import logging
|
|
12
12
|
import shutil
|
|
13
13
|
from typing import Any
|
|
14
|
-
from typing import Optional
|
|
15
14
|
|
|
16
15
|
import numpy as np
|
|
17
16
|
import zarr
|
|
@@ -120,7 +119,7 @@ class ZarrBuiltRegistry:
|
|
|
120
119
|
flags = None
|
|
121
120
|
z = None
|
|
122
121
|
|
|
123
|
-
def __init__(self, path: str, synchronizer_path:
|
|
122
|
+
def __init__(self, path: str, synchronizer_path: str | None = None, use_threads: bool = False):
|
|
124
123
|
"""Initialize the ZarrBuiltRegistry.
|
|
125
124
|
|
|
126
125
|
Parameters
|
|
@@ -154,6 +153,12 @@ class ZarrBuiltRegistry:
|
|
|
154
153
|
except FileNotFoundError:
|
|
155
154
|
pass
|
|
156
155
|
|
|
156
|
+
_build = self.zarr_path + "/_build"
|
|
157
|
+
try:
|
|
158
|
+
shutil.rmtree(_build)
|
|
159
|
+
except FileNotFoundError:
|
|
160
|
+
pass
|
|
161
|
+
|
|
157
162
|
def _open_write(self) -> zarr.Group:
|
|
158
163
|
"""Open the Zarr store in write mode."""
|
|
159
164
|
import zarr
|
anemoi/datasets/data/__init__.py
CHANGED
|
@@ -8,9 +8,9 @@
|
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
|
+
import os
|
|
11
12
|
from typing import TYPE_CHECKING
|
|
12
13
|
from typing import Any
|
|
13
|
-
from typing import Set
|
|
14
14
|
|
|
15
15
|
# from .dataset import FullIndex
|
|
16
16
|
# from .dataset import Shape
|
|
@@ -82,6 +82,9 @@ def open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
|
|
|
82
82
|
Dataset
|
|
83
83
|
The opened dataset.
|
|
84
84
|
"""
|
|
85
|
+
|
|
86
|
+
trace = int(os.environ.get("ANEMOI_DATASETS_TRACE", 0))
|
|
87
|
+
|
|
85
88
|
# That will get rid of OmegaConf objects
|
|
86
89
|
|
|
87
90
|
args, kwargs = _convert(args), _convert(kwargs)
|
|
@@ -90,22 +93,28 @@ def open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
|
|
|
90
93
|
ds = ds.mutate()
|
|
91
94
|
ds.arguments = {"args": args, "kwargs": kwargs}
|
|
92
95
|
ds._check()
|
|
96
|
+
|
|
97
|
+
if trace:
|
|
98
|
+
from anemoi.datasets.testing import Trace
|
|
99
|
+
|
|
100
|
+
ds = Trace(ds)
|
|
101
|
+
|
|
93
102
|
return ds
|
|
94
103
|
|
|
95
104
|
|
|
96
|
-
def save_dataset(
|
|
105
|
+
def save_dataset(dataset: "Dataset", zarr_path: str, n_workers: int = 1) -> None:
|
|
97
106
|
"""Open a dataset and save it to disk.
|
|
98
107
|
|
|
99
108
|
Parameters
|
|
100
109
|
----------
|
|
101
|
-
|
|
102
|
-
|
|
110
|
+
dataset : Dataset
|
|
111
|
+
anemoi-dataset opened from python to save to Zarr store
|
|
103
112
|
zarr_path : str
|
|
104
113
|
Path to store the obtained anemoi dataset to disk.
|
|
105
114
|
n_workers : int
|
|
106
115
|
Number of workers to use for parallel processing. If none, sequential processing will be performed.
|
|
107
116
|
"""
|
|
108
|
-
_save_dataset(
|
|
117
|
+
_save_dataset(dataset, zarr_path, n_workers)
|
|
109
118
|
|
|
110
119
|
|
|
111
120
|
def list_dataset_names(*args: Any, **kwargs: Any) -> list[str]:
|
|
@@ -124,6 +133,6 @@ def list_dataset_names(*args: Any, **kwargs: Any) -> list[str]:
|
|
|
124
133
|
The list of dataset names.
|
|
125
134
|
"""
|
|
126
135
|
ds = _open_dataset(*args, **kwargs)
|
|
127
|
-
names:
|
|
136
|
+
names: set[str] = set()
|
|
128
137
|
ds.get_dataset_names(names)
|
|
129
138
|
return sorted(names)
|
|
@@ -12,11 +12,6 @@ import logging
|
|
|
12
12
|
from abc import abstractmethod
|
|
13
13
|
from functools import cached_property
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Optional
|
|
18
|
-
from typing import Set
|
|
19
|
-
from typing import Tuple
|
|
20
15
|
|
|
21
16
|
import numpy as np
|
|
22
17
|
from numpy.typing import NDArray
|
|
@@ -91,26 +86,26 @@ class Complement(Combined):
|
|
|
91
86
|
raise ValueError("Augment: no missing variables")
|
|
92
87
|
|
|
93
88
|
@property
|
|
94
|
-
def variables(self) ->
|
|
89
|
+
def variables(self) -> list[str]:
|
|
95
90
|
"""Returns the list of variables to be added to the target dataset."""
|
|
96
91
|
return self._variables
|
|
97
92
|
|
|
98
93
|
@property
|
|
99
|
-
def statistics(self) ->
|
|
94
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
100
95
|
datasets = [self._source, self._target]
|
|
101
96
|
return {
|
|
102
97
|
k: [d.statistics[k][d.name_to_index[i]] for d in datasets for i in d.variables if i in self.variables]
|
|
103
98
|
for k in datasets[0].statistics
|
|
104
99
|
}
|
|
105
100
|
|
|
106
|
-
def statistics_tendencies(self, delta:
|
|
101
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
107
102
|
index = [self._source.name_to_index[v] for v in self._variables]
|
|
108
103
|
if delta is None:
|
|
109
104
|
delta = self.frequency
|
|
110
105
|
return {k: v[index] for k, v in self._source.statistics_tendencies(delta).items()}
|
|
111
106
|
|
|
112
107
|
@property
|
|
113
|
-
def name_to_index(self) ->
|
|
108
|
+
def name_to_index(self) -> dict[str, int]:
|
|
114
109
|
"""Returns a dictionary mapping variable names to their indices."""
|
|
115
110
|
return {v: i for i, v in enumerate(self.variables)}
|
|
116
111
|
|
|
@@ -121,7 +116,7 @@ class Complement(Combined):
|
|
|
121
116
|
return (shape[0], len(self._variables)) + shape[2:]
|
|
122
117
|
|
|
123
118
|
@property
|
|
124
|
-
def variables_metadata(self) ->
|
|
119
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
125
120
|
"""Returns the metadata of the variables to be added to the target dataset."""
|
|
126
121
|
# Merge the two dicts first
|
|
127
122
|
all_meta = {**self._source.variables_metadata, **self._target.variables_metadata}
|
|
@@ -142,7 +137,7 @@ class Complement(Combined):
|
|
|
142
137
|
pass
|
|
143
138
|
|
|
144
139
|
@cached_property
|
|
145
|
-
def missing(self) ->
|
|
140
|
+
def missing(self) -> set[int]:
|
|
146
141
|
"""Returns the set of missing indices in the source and target datasets."""
|
|
147
142
|
missing = self._source.missing.copy()
|
|
148
143
|
missing = missing | self._target.missing
|
|
@@ -317,7 +312,7 @@ class ComplementNearest(Complement):
|
|
|
317
312
|
return apply_index_to_slices_changes(result, changes)
|
|
318
313
|
|
|
319
314
|
|
|
320
|
-
def complement_factory(args:
|
|
315
|
+
def complement_factory(args: tuple, kwargs: dict) -> Dataset:
|
|
321
316
|
"""Factory function to create a Complement instance based on the provided arguments.
|
|
322
317
|
|
|
323
318
|
Parameters
|
anemoi/datasets/data/concat.py
CHANGED
|
@@ -11,9 +11,6 @@
|
|
|
11
11
|
import logging
|
|
12
12
|
from functools import cached_property
|
|
13
13
|
from typing import Any
|
|
14
|
-
from typing import List
|
|
15
|
-
from typing import Set
|
|
16
|
-
from typing import Tuple
|
|
17
14
|
|
|
18
15
|
import numpy as np
|
|
19
16
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
@@ -123,12 +120,12 @@ class ConcatMixin:
|
|
|
123
120
|
return np.concatenate(result)
|
|
124
121
|
|
|
125
122
|
@cached_property
|
|
126
|
-
def missing(self) ->
|
|
123
|
+
def missing(self) -> set[int]:
|
|
127
124
|
"""Returns the set of missing indices in the concatenated datasets."""
|
|
128
|
-
result:
|
|
125
|
+
result: set[int] = set()
|
|
129
126
|
offset = 0
|
|
130
127
|
for d in self.datasets:
|
|
131
|
-
result = result |
|
|
128
|
+
result = result | {m + offset for m in d.missing}
|
|
132
129
|
offset += len(d)
|
|
133
130
|
return result
|
|
134
131
|
|
|
@@ -195,7 +192,7 @@ class Concat(ConcatMixin, Combined):
|
|
|
195
192
|
return Node(self, [d.tree() for d in self.datasets])
|
|
196
193
|
|
|
197
194
|
@classmethod
|
|
198
|
-
def check_dataset_compatibility(cls, datasets:
|
|
195
|
+
def check_dataset_compatibility(cls, datasets: list[Any], fill_missing_gaps: bool = False) -> list[Any]:
|
|
199
196
|
"""Checks the compatibility of the datasets for concatenation and fills missing gaps if required.
|
|
200
197
|
|
|
201
198
|
Parameters
|
|
@@ -259,7 +256,7 @@ class Concat(ConcatMixin, Combined):
|
|
|
259
256
|
return {}
|
|
260
257
|
|
|
261
258
|
|
|
262
|
-
def concat_factory(args:
|
|
259
|
+
def concat_factory(args: tuple[Any, ...], kwargs: dict) -> Concat:
|
|
263
260
|
"""Factory function to create a Concat object.
|
|
264
261
|
|
|
265
262
|
Parameters
|
anemoi/datasets/data/dataset.py
CHANGED
|
@@ -22,15 +22,10 @@ try:
|
|
|
22
22
|
except ImportError:
|
|
23
23
|
# Python 3.9
|
|
24
24
|
EllipsisType = type(Ellipsis)
|
|
25
|
+
from collections.abc import Sequence
|
|
26
|
+
from collections.abc import Sized
|
|
25
27
|
from typing import TYPE_CHECKING
|
|
26
28
|
from typing import Any
|
|
27
|
-
from typing import Dict
|
|
28
|
-
from typing import List
|
|
29
|
-
from typing import Optional
|
|
30
|
-
from typing import Sequence
|
|
31
|
-
from typing import Set
|
|
32
|
-
from typing import Sized
|
|
33
|
-
from typing import Tuple
|
|
34
29
|
from typing import Union
|
|
35
30
|
|
|
36
31
|
import numpy as np
|
|
@@ -48,8 +43,8 @@ if TYPE_CHECKING:
|
|
|
48
43
|
LOG = logging.getLogger(__name__)
|
|
49
44
|
|
|
50
45
|
|
|
51
|
-
Shape =
|
|
52
|
-
TupleIndex =
|
|
46
|
+
Shape = tuple[int, ...]
|
|
47
|
+
TupleIndex = tuple[Union[int, slice, EllipsisType], ...]
|
|
53
48
|
FullIndex = Union[int, slice, TupleIndex]
|
|
54
49
|
|
|
55
50
|
|
|
@@ -92,8 +87,8 @@ def _tidy(v: Any) -> Any:
|
|
|
92
87
|
|
|
93
88
|
|
|
94
89
|
class Dataset(ABC, Sized):
|
|
95
|
-
arguments:
|
|
96
|
-
_name:
|
|
90
|
+
arguments: dict[str, Any] = {}
|
|
91
|
+
_name: str | None = None
|
|
97
92
|
|
|
98
93
|
def mutate(self) -> "Dataset":
|
|
99
94
|
"""Give an opportunity to a subclass to return a new Dataset object of a different class, if needed.
|
|
@@ -148,7 +143,7 @@ class Dataset(ABC, Sized):
|
|
|
148
143
|
return result
|
|
149
144
|
|
|
150
145
|
@property
|
|
151
|
-
def name(self) ->
|
|
146
|
+
def name(self) -> str | None:
|
|
152
147
|
"""Return the name of the dataset."""
|
|
153
148
|
return self._name
|
|
154
149
|
|
|
@@ -298,6 +293,12 @@ class Dataset(ABC, Sized):
|
|
|
298
293
|
if skip_missing_dates:
|
|
299
294
|
return SkipMissingDates(self, expected_access)._subset(**kwargs).mutate()
|
|
300
295
|
|
|
296
|
+
if "rolling_average" in kwargs:
|
|
297
|
+
from .rolling_average import RollingAverage
|
|
298
|
+
|
|
299
|
+
rolling_average = kwargs.pop("rolling_average")
|
|
300
|
+
return RollingAverage(self, rolling_average)._subset(**kwargs).mutate()
|
|
301
|
+
|
|
301
302
|
if "interpolate_frequency" in kwargs:
|
|
302
303
|
from .interpolate import InterpolateFrequency
|
|
303
304
|
|
|
@@ -360,9 +361,9 @@ class Dataset(ABC, Sized):
|
|
|
360
361
|
|
|
361
362
|
def _dates_to_indices(
|
|
362
363
|
self,
|
|
363
|
-
start:
|
|
364
|
-
end:
|
|
365
|
-
) ->
|
|
364
|
+
start: None | str | datetime.datetime,
|
|
365
|
+
end: None | str | datetime.datetime,
|
|
366
|
+
) -> list[int]:
|
|
366
367
|
"""Convert date range to a list of indices.
|
|
367
368
|
|
|
368
369
|
Parameters
|
|
@@ -387,7 +388,7 @@ class Dataset(ABC, Sized):
|
|
|
387
388
|
|
|
388
389
|
return [i for i, date in enumerate(self.dates) if start <= date <= end]
|
|
389
390
|
|
|
390
|
-
def _select_to_columns(self, vars:
|
|
391
|
+
def _select_to_columns(self, vars: str | list[str] | tuple[str] | set) -> list[int]:
|
|
391
392
|
"""Convert variable names to a list of column indices.
|
|
392
393
|
|
|
393
394
|
Parameters
|
|
@@ -411,7 +412,7 @@ class Dataset(ABC, Sized):
|
|
|
411
412
|
|
|
412
413
|
return [self.name_to_index[v] for v in vars]
|
|
413
414
|
|
|
414
|
-
def _drop_to_columns(self, vars:
|
|
415
|
+
def _drop_to_columns(self, vars: str | Sequence[str]) -> list[int]:
|
|
415
416
|
"""Convert variable names to a list of column indices to drop.
|
|
416
417
|
|
|
417
418
|
Parameters
|
|
@@ -432,7 +433,7 @@ class Dataset(ABC, Sized):
|
|
|
432
433
|
|
|
433
434
|
return sorted([v for k, v in self.name_to_index.items() if k not in vars])
|
|
434
435
|
|
|
435
|
-
def _reorder_to_columns(self, vars:
|
|
436
|
+
def _reorder_to_columns(self, vars: str | list[str] | tuple[str] | dict[str, int]) -> list[int]:
|
|
436
437
|
"""Convert variable names to a list of reordered column indices.
|
|
437
438
|
|
|
438
439
|
Parameters
|
|
@@ -465,8 +466,8 @@ class Dataset(ABC, Sized):
|
|
|
465
466
|
return indices
|
|
466
467
|
|
|
467
468
|
def dates_interval_to_indices(
|
|
468
|
-
self, start:
|
|
469
|
-
) ->
|
|
469
|
+
self, start: None | str | datetime.datetime, end: None | str | datetime.datetime
|
|
470
|
+
) -> list[int]:
|
|
470
471
|
"""Convert date interval to a list of indices.
|
|
471
472
|
|
|
472
473
|
Parameters
|
|
@@ -483,7 +484,7 @@ class Dataset(ABC, Sized):
|
|
|
483
484
|
"""
|
|
484
485
|
return self._dates_to_indices(start, end)
|
|
485
486
|
|
|
486
|
-
def provenance(self) ->
|
|
487
|
+
def provenance(self) -> dict[str, Any]:
|
|
487
488
|
"""Return the provenance information of the dataset.
|
|
488
489
|
|
|
489
490
|
Returns
|
|
@@ -511,7 +512,7 @@ class Dataset(ABC, Sized):
|
|
|
511
512
|
return tuple(shape)
|
|
512
513
|
|
|
513
514
|
@property
|
|
514
|
-
def typed_variables(self) ->
|
|
515
|
+
def typed_variables(self) -> dict[str, Any]:
|
|
515
516
|
"""Return the variables with their types."""
|
|
516
517
|
from anemoi.transform.variables import Variable
|
|
517
518
|
|
|
@@ -532,7 +533,7 @@ class Dataset(ABC, Sized):
|
|
|
532
533
|
|
|
533
534
|
return result
|
|
534
535
|
|
|
535
|
-
def _input_sources(self) ->
|
|
536
|
+
def _input_sources(self) -> list[Any]:
|
|
536
537
|
"""Return the input sources of the dataset.
|
|
537
538
|
|
|
538
539
|
Returns
|
|
@@ -544,7 +545,7 @@ class Dataset(ABC, Sized):
|
|
|
544
545
|
self.collect_input_sources(sources)
|
|
545
546
|
return sources
|
|
546
547
|
|
|
547
|
-
def metadata(self) ->
|
|
548
|
+
def metadata(self) -> dict[str, Any]:
|
|
548
549
|
"""Return the metadata of the dataset.
|
|
549
550
|
|
|
550
551
|
Returns
|
|
@@ -588,7 +589,7 @@ class Dataset(ABC, Sized):
|
|
|
588
589
|
"""Return the end date of the dataset."""
|
|
589
590
|
return self.dates[-1]
|
|
590
591
|
|
|
591
|
-
def dataset_metadata(self) ->
|
|
592
|
+
def dataset_metadata(self) -> dict[str, Any]:
|
|
592
593
|
"""Return the metadata of the dataset.
|
|
593
594
|
|
|
594
595
|
Returns
|
|
@@ -608,7 +609,7 @@ class Dataset(ABC, Sized):
|
|
|
608
609
|
name=self.name,
|
|
609
610
|
)
|
|
610
611
|
|
|
611
|
-
def _supporting_arrays(self, *path: str) ->
|
|
612
|
+
def _supporting_arrays(self, *path: str) -> dict[str, NDArray[Any]]:
|
|
612
613
|
"""Return the supporting arrays of the dataset.
|
|
613
614
|
|
|
614
615
|
Parameters
|
|
@@ -646,7 +647,7 @@ class Dataset(ABC, Sized):
|
|
|
646
647
|
|
|
647
648
|
return result
|
|
648
649
|
|
|
649
|
-
def supporting_arrays(self) ->
|
|
650
|
+
def supporting_arrays(self) -> dict[str, NDArray[Any]]:
|
|
650
651
|
"""Return the supporting arrays to be saved in the checkpoints.
|
|
651
652
|
|
|
652
653
|
Returns
|
|
@@ -657,7 +658,7 @@ class Dataset(ABC, Sized):
|
|
|
657
658
|
arrays, _ = self._supporting_arrays_and_sources()
|
|
658
659
|
return arrays
|
|
659
660
|
|
|
660
|
-
def _supporting_arrays_and_sources(self) ->
|
|
661
|
+
def _supporting_arrays_and_sources(self) -> tuple[dict[str, NDArray], dict[int, list[str]]]:
|
|
661
662
|
"""Return the supporting arrays and their sources.
|
|
662
663
|
|
|
663
664
|
Returns
|
|
@@ -684,7 +685,7 @@ class Dataset(ABC, Sized):
|
|
|
684
685
|
|
|
685
686
|
return result, source_to_arrays
|
|
686
687
|
|
|
687
|
-
def collect_supporting_arrays(self, collected:
|
|
688
|
+
def collect_supporting_arrays(self, collected: list[tuple[tuple[str, ...], str, NDArray[Any]]], *path: str) -> None:
|
|
688
689
|
"""Collect supporting arrays.
|
|
689
690
|
|
|
690
691
|
Parameters
|
|
@@ -697,7 +698,7 @@ class Dataset(ABC, Sized):
|
|
|
697
698
|
# Override this method to add more arrays
|
|
698
699
|
pass
|
|
699
700
|
|
|
700
|
-
def metadata_specific(self, **kwargs: Any) ->
|
|
701
|
+
def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
|
|
701
702
|
"""Return specific metadata of the dataset.
|
|
702
703
|
|
|
703
704
|
Parameters
|
|
@@ -764,7 +765,7 @@ class Dataset(ABC, Sized):
|
|
|
764
765
|
"""Return the label of the dataset."""
|
|
765
766
|
return self.__class__.__name__.lower()
|
|
766
767
|
|
|
767
|
-
def computed_constant_fields(self) ->
|
|
768
|
+
def computed_constant_fields(self) -> list[str]:
|
|
768
769
|
"""Return the computed constant fields of the dataset.
|
|
769
770
|
|
|
770
771
|
Returns
|
|
@@ -781,7 +782,7 @@ class Dataset(ABC, Sized):
|
|
|
781
782
|
|
|
782
783
|
return sorted(self._compute_constant_fields_from_a_few_samples())
|
|
783
784
|
|
|
784
|
-
def _compute_constant_fields_from_a_few_samples(self) ->
|
|
785
|
+
def _compute_constant_fields_from_a_few_samples(self) -> list[str]:
|
|
785
786
|
"""Compute constant fields from a few samples.
|
|
786
787
|
|
|
787
788
|
Returns
|
|
@@ -822,7 +823,7 @@ class Dataset(ABC, Sized):
|
|
|
822
823
|
|
|
823
824
|
return [v for i, v in enumerate(self.variables) if constants[i]]
|
|
824
825
|
|
|
825
|
-
def _compute_constant_fields_from_statistics(self) ->
|
|
826
|
+
def _compute_constant_fields_from_statistics(self) -> list[str]:
|
|
826
827
|
"""Compute constant fields from statistics.
|
|
827
828
|
|
|
828
829
|
Returns
|
|
@@ -842,8 +843,8 @@ class Dataset(ABC, Sized):
|
|
|
842
843
|
|
|
843
844
|
def plot(
|
|
844
845
|
self,
|
|
845
|
-
date:
|
|
846
|
-
variable:
|
|
846
|
+
date: int | datetime.datetime | np.datetime64 | str,
|
|
847
|
+
variable: int | str,
|
|
847
848
|
member: int = 0,
|
|
848
849
|
**kwargs: Any,
|
|
849
850
|
) -> "matplotlib.pyplot.Axes":
|
|
@@ -873,10 +874,10 @@ class Dataset(ABC, Sized):
|
|
|
873
874
|
|
|
874
875
|
def to_index(
|
|
875
876
|
self,
|
|
876
|
-
date:
|
|
877
|
-
variable:
|
|
877
|
+
date: int | datetime.datetime | np.datetime64 | str,
|
|
878
|
+
variable: int | str,
|
|
878
879
|
member: int = 0,
|
|
879
|
-
) ->
|
|
880
|
+
) -> tuple[int, int, int]:
|
|
880
881
|
"""Convert date, variable, and member to indices.
|
|
881
882
|
|
|
882
883
|
Parameters
|
|
@@ -945,7 +946,7 @@ class Dataset(ABC, Sized):
|
|
|
945
946
|
|
|
946
947
|
@property
|
|
947
948
|
@abstractmethod
|
|
948
|
-
def variables(self) ->
|
|
949
|
+
def variables(self) -> list[str]:
|
|
949
950
|
"""Return the list of variables in the dataset."""
|
|
950
951
|
pass
|
|
951
952
|
|
|
@@ -969,7 +970,7 @@ class Dataset(ABC, Sized):
|
|
|
969
970
|
|
|
970
971
|
@property
|
|
971
972
|
@abstractmethod
|
|
972
|
-
def name_to_index(self) ->
|
|
973
|
+
def name_to_index(self) -> dict[str, int]:
|
|
973
974
|
"""Return the mapping of variable names to indices."""
|
|
974
975
|
pass
|
|
975
976
|
|
|
@@ -1005,30 +1006,30 @@ class Dataset(ABC, Sized):
|
|
|
1005
1006
|
|
|
1006
1007
|
@property
|
|
1007
1008
|
@abstractmethod
|
|
1008
|
-
def variables_metadata(self) ->
|
|
1009
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
1009
1010
|
"""Return the metadata of the variables in the dataset."""
|
|
1010
1011
|
pass
|
|
1011
1012
|
|
|
1012
1013
|
@abstractmethod
|
|
1013
1014
|
@cached_property
|
|
1014
|
-
def missing(self) ->
|
|
1015
|
+
def missing(self) -> set[int]:
|
|
1015
1016
|
"""Return the set of missing indices in the dataset."""
|
|
1016
1017
|
pass
|
|
1017
1018
|
|
|
1018
1019
|
@abstractmethod
|
|
1019
1020
|
@cached_property
|
|
1020
|
-
def constant_fields(self) ->
|
|
1021
|
+
def constant_fields(self) -> list[str]:
|
|
1021
1022
|
"""Return the list of constant fields in the dataset."""
|
|
1022
1023
|
pass
|
|
1023
1024
|
|
|
1024
1025
|
@abstractmethod
|
|
1025
1026
|
@cached_property
|
|
1026
|
-
def statistics(self) ->
|
|
1027
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
1027
1028
|
"""Return the statistics of the dataset."""
|
|
1028
1029
|
pass
|
|
1029
1030
|
|
|
1030
1031
|
@abstractmethod
|
|
1031
|
-
def statistics_tendencies(self, delta:
|
|
1032
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
1032
1033
|
"""Return the tendencies of the statistics in the dataset.
|
|
1033
1034
|
|
|
1034
1035
|
Parameters
|
|
@@ -1071,7 +1072,7 @@ class Dataset(ABC, Sized):
|
|
|
1071
1072
|
pass
|
|
1072
1073
|
|
|
1073
1074
|
@abstractmethod
|
|
1074
|
-
def collect_input_sources(self, sources:
|
|
1075
|
+
def collect_input_sources(self, sources: list[Any]) -> None:
|
|
1075
1076
|
"""Collect the input sources of the dataset.
|
|
1076
1077
|
|
|
1077
1078
|
Parameters
|
|
@@ -1082,7 +1083,7 @@ class Dataset(ABC, Sized):
|
|
|
1082
1083
|
pass
|
|
1083
1084
|
|
|
1084
1085
|
@abstractmethod
|
|
1085
|
-
def get_dataset_names(self, names:
|
|
1086
|
+
def get_dataset_names(self, names: set[str]) -> None:
|
|
1086
1087
|
"""Get the names of the datasets.
|
|
1087
1088
|
|
|
1088
1089
|
Parameters
|