anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +4 -1
- anemoi/datasets/__main__.py +12 -2
- anemoi/datasets/_version.py +9 -4
- anemoi/datasets/commands/cleanup.py +17 -2
- anemoi/datasets/commands/compare.py +18 -2
- anemoi/datasets/commands/copy.py +196 -14
- anemoi/datasets/commands/create.py +50 -7
- anemoi/datasets/commands/finalise-additions.py +17 -2
- anemoi/datasets/commands/finalise.py +17 -2
- anemoi/datasets/commands/init-additions.py +17 -2
- anemoi/datasets/commands/init.py +16 -2
- anemoi/datasets/commands/inspect.py +283 -62
- anemoi/datasets/commands/load-additions.py +16 -2
- anemoi/datasets/commands/load.py +16 -2
- anemoi/datasets/commands/patch.py +17 -2
- anemoi/datasets/commands/publish.py +17 -2
- anemoi/datasets/commands/scan.py +31 -3
- anemoi/datasets/compute/recentre.py +47 -11
- anemoi/datasets/create/__init__.py +612 -85
- anemoi/datasets/create/check.py +142 -20
- anemoi/datasets/create/chunks.py +64 -4
- anemoi/datasets/create/config.py +185 -21
- anemoi/datasets/create/filter.py +50 -0
- anemoi/datasets/create/filters/__init__.py +33 -0
- anemoi/datasets/create/filters/empty.py +37 -0
- anemoi/datasets/create/filters/legacy.py +93 -0
- anemoi/datasets/create/filters/noop.py +37 -0
- anemoi/datasets/create/filters/orog_to_z.py +58 -0
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
- anemoi/datasets/create/filters/rename.py +205 -0
- anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
- anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
- anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
- anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
- anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
- anemoi/datasets/create/filters/transform.py +53 -0
- anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
- anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
- anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
- anemoi/datasets/create/input/__init__.py +76 -5
- anemoi/datasets/create/input/action.py +149 -13
- anemoi/datasets/create/input/concat.py +81 -10
- anemoi/datasets/create/input/context.py +39 -4
- anemoi/datasets/create/input/data_sources.py +72 -6
- anemoi/datasets/create/input/empty.py +21 -3
- anemoi/datasets/create/input/filter.py +60 -12
- anemoi/datasets/create/input/function.py +154 -37
- anemoi/datasets/create/input/join.py +86 -14
- anemoi/datasets/create/input/misc.py +67 -17
- anemoi/datasets/create/input/pipe.py +33 -6
- anemoi/datasets/create/input/repeated_dates.py +189 -41
- anemoi/datasets/create/input/result.py +202 -87
- anemoi/datasets/create/input/step.py +119 -22
- anemoi/datasets/create/input/template.py +100 -13
- anemoi/datasets/create/input/trace.py +62 -7
- anemoi/datasets/create/patch.py +52 -4
- anemoi/datasets/create/persistent.py +134 -17
- anemoi/datasets/create/size.py +15 -1
- anemoi/datasets/create/source.py +51 -0
- anemoi/datasets/create/sources/__init__.py +36 -0
- anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
- anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
- anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
- anemoi/datasets/create/sources/empty.py +37 -0
- anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
- anemoi/datasets/create/sources/grib.py +297 -0
- anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
- anemoi/datasets/create/sources/legacy.py +93 -0
- anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
- anemoi/datasets/create/sources/netcdf.py +42 -0
- anemoi/datasets/create/sources/opendap.py +43 -0
- anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
- anemoi/datasets/create/sources/recentre.py +150 -0
- anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
- anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
- anemoi/datasets/create/sources/xarray.py +92 -0
- anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
- anemoi/datasets/create/sources/xarray_support/README.md +1 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
- anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
- anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
- anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
- anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
- anemoi/datasets/create/sources/xarray_support/time.py +391 -0
- anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
- anemoi/datasets/create/sources/xarray_zarr.py +41 -0
- anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
- anemoi/datasets/create/statistics/__init__.py +233 -44
- anemoi/datasets/create/statistics/summary.py +52 -6
- anemoi/datasets/create/testing.py +76 -0
- anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
- anemoi/datasets/create/utils.py +97 -6
- anemoi/datasets/create/writer.py +26 -4
- anemoi/datasets/create/zarr.py +170 -23
- anemoi/datasets/data/__init__.py +51 -4
- anemoi/datasets/data/complement.py +191 -40
- anemoi/datasets/data/concat.py +141 -16
- anemoi/datasets/data/dataset.py +552 -61
- anemoi/datasets/data/debug.py +197 -26
- anemoi/datasets/data/ensemble.py +93 -8
- anemoi/datasets/data/fill_missing.py +165 -18
- anemoi/datasets/data/forwards.py +428 -56
- anemoi/datasets/data/grids.py +323 -97
- anemoi/datasets/data/indexing.py +112 -19
- anemoi/datasets/data/interpolate.py +92 -12
- anemoi/datasets/data/join.py +158 -19
- anemoi/datasets/data/masked.py +129 -15
- anemoi/datasets/data/merge.py +137 -23
- anemoi/datasets/data/misc.py +172 -16
- anemoi/datasets/data/missing.py +233 -29
- anemoi/datasets/data/rescale.py +111 -10
- anemoi/datasets/data/select.py +168 -26
- anemoi/datasets/data/statistics.py +67 -6
- anemoi/datasets/data/stores.py +149 -64
- anemoi/datasets/data/subset.py +159 -25
- anemoi/datasets/data/unchecked.py +168 -57
- anemoi/datasets/data/xy.py +168 -25
- anemoi/datasets/dates/__init__.py +191 -16
- anemoi/datasets/dates/groups.py +189 -47
- anemoi/datasets/grids.py +270 -31
- anemoi/datasets/testing.py +28 -1
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +9 -6
- anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/functions/__init__.py +0 -66
- anemoi/datasets/create/functions/filters/__init__.py +0 -9
- anemoi/datasets/create/functions/filters/empty.py +0 -17
- anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/functions/filters/rename.py +0 -79
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
- anemoi/datasets/create/functions/sources/empty.py +0 -15
- anemoi/datasets/create/functions/sources/grib.py +0 -150
- anemoi/datasets/create/functions/sources/netcdf.py +0 -15
- anemoi/datasets/create/functions/sources/opendap.py +0 -15
- anemoi/datasets/create/functions/sources/recentre.py +0 -60
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
- anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
- anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
- anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
- anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
- anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
- anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
- anemoi/datasets/utils/fields.py +0 -47
- anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
anemoi/datasets/data/subset.py
CHANGED
|
@@ -8,12 +8,24 @@
|
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
import datetime
|
|
11
12
|
import logging
|
|
12
13
|
from functools import cached_property
|
|
14
|
+
from typing import Any
|
|
15
|
+
from typing import Dict
|
|
16
|
+
from typing import List
|
|
17
|
+
from typing import Sequence
|
|
18
|
+
from typing import Set
|
|
19
|
+
from typing import Union
|
|
13
20
|
|
|
14
21
|
import numpy as np
|
|
15
22
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
23
|
+
from numpy.typing import NDArray
|
|
16
24
|
|
|
25
|
+
from .dataset import Dataset
|
|
26
|
+
from .dataset import FullIndex
|
|
27
|
+
from .dataset import Shape
|
|
28
|
+
from .dataset import TupleIndex
|
|
17
29
|
from .debug import Node
|
|
18
30
|
from .debug import Source
|
|
19
31
|
from .debug import debug_indexing
|
|
@@ -27,11 +39,31 @@ from .indexing import update_tuple
|
|
|
27
39
|
LOG = logging.getLogger(__name__)
|
|
28
40
|
|
|
29
41
|
|
|
30
|
-
def _default(a, b, dates):
|
|
42
|
+
def _default(a: int, b: int, dates: NDArray[np.datetime64]) -> list[int]:
|
|
43
|
+
"""Default combination function for reasons.
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
a (int): First integer value.
|
|
47
|
+
b (int): Second integer value.
|
|
48
|
+
dates (NDArray[np.datetime64]): Array of datetime64 dates.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
list[int]: List containing the two input integers.
|
|
52
|
+
"""
|
|
31
53
|
return [a, b]
|
|
32
54
|
|
|
33
55
|
|
|
34
|
-
def _start(a, b, dates):
|
|
56
|
+
def _start(a: int, b: int, dates: NDArray[np.datetime64]) -> int:
|
|
57
|
+
"""Determine the start date between two dates.
|
|
58
|
+
|
|
59
|
+
Parameters:
|
|
60
|
+
a (int): First integer value.
|
|
61
|
+
b (int): Second integer value.
|
|
62
|
+
dates (NDArray[np.datetime64]): Array of datetime64 dates.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
int: The index of the start date.
|
|
66
|
+
"""
|
|
35
67
|
from .misc import as_first_date
|
|
36
68
|
|
|
37
69
|
c = as_first_date(a, dates)
|
|
@@ -42,7 +74,17 @@ def _start(a, b, dates):
|
|
|
42
74
|
return a
|
|
43
75
|
|
|
44
76
|
|
|
45
|
-
def _end(a, b, dates):
|
|
77
|
+
def _end(a: int, b: int, dates: NDArray[np.datetime64]) -> int:
|
|
78
|
+
"""Determine the end date between two dates.
|
|
79
|
+
|
|
80
|
+
Parameters:
|
|
81
|
+
a (int): First integer value.
|
|
82
|
+
b (int): Second integer value.
|
|
83
|
+
dates (NDArray[np.datetime64]): Array of datetime64 dates.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
int: The index of the end date.
|
|
87
|
+
"""
|
|
46
88
|
from .misc import as_last_date
|
|
47
89
|
|
|
48
90
|
c = as_last_date(a, dates)
|
|
@@ -53,7 +95,17 @@ def _end(a, b, dates):
|
|
|
53
95
|
return b
|
|
54
96
|
|
|
55
97
|
|
|
56
|
-
def _combine_reasons(reason1, reason2, dates):
|
|
98
|
+
def _combine_reasons(reason1: Dict[str, Any], reason2: Dict[str, Any], dates: NDArray[np.datetime64]) -> Dict[str, Any]:
|
|
99
|
+
"""Combine two reason dictionaries.
|
|
100
|
+
|
|
101
|
+
Parameters:
|
|
102
|
+
reason1 (Dict[str, Any]): First reason dictionary.
|
|
103
|
+
reason2 (Dict[str, Any]): Second reason dictionary.
|
|
104
|
+
dates (NDArray[np.datetime64]): Array of datetime64 dates.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Dict[str, Any]: Combined reason dictionary.
|
|
108
|
+
"""
|
|
57
109
|
|
|
58
110
|
reason = reason1.copy()
|
|
59
111
|
for k, v in reason2.items():
|
|
@@ -66,29 +118,63 @@ def _combine_reasons(reason1, reason2, dates):
|
|
|
66
118
|
|
|
67
119
|
|
|
68
120
|
class Subset(Forwards):
|
|
69
|
-
"""Select a subset of the dates.
|
|
70
|
-
|
|
71
|
-
|
|
121
|
+
"""Select a subset of the dates.
|
|
122
|
+
|
|
123
|
+
Attributes:
|
|
124
|
+
dataset (Dataset): The dataset.
|
|
125
|
+
indices (List[int]): List of indices.
|
|
126
|
+
reason (Dict[str, Any]): Dictionary of reasons.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
def __init__(self, dataset: Union[Dataset, "Subset"], indices: Sequence[int], reason: Dict[str, Any]) -> None:
|
|
130
|
+
"""Initialize the Subset.
|
|
131
|
+
|
|
132
|
+
Parameters:
|
|
133
|
+
dataset (Dataset | Subset): The dataset or subset.
|
|
134
|
+
indices (Sequence[int]): Sequence of indices.
|
|
135
|
+
reason (Dict[str, Any]): Dictionary of reasons.
|
|
136
|
+
"""
|
|
72
137
|
while isinstance(dataset, Subset):
|
|
73
138
|
indices = [dataset.indices[i] for i in indices]
|
|
74
139
|
reason = _combine_reasons(reason, dataset.reason, dataset.dates)
|
|
75
140
|
dataset = dataset.dataset
|
|
76
141
|
|
|
77
|
-
self.dataset = dataset
|
|
78
|
-
self.indices = list(indices)
|
|
79
|
-
self.reason = {k: v for k, v in reason.items() if v is not None}
|
|
142
|
+
self.dataset: Dataset = dataset
|
|
143
|
+
self.indices: List[int] = list(indices)
|
|
144
|
+
self.reason: Dict[str, Any] = {k: v for k, v in reason.items() if v is not None}
|
|
80
145
|
|
|
81
146
|
# Forward other properties to the super dataset
|
|
82
147
|
super().__init__(dataset)
|
|
83
148
|
|
|
84
|
-
def clone(self, dataset):
|
|
149
|
+
def clone(self, dataset: Dataset) -> Dataset:
|
|
150
|
+
"""Clone the subset with a new dataset.
|
|
151
|
+
|
|
152
|
+
Parameters:
|
|
153
|
+
dataset (Dataset): The new dataset.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
Dataset: The cloned subset.
|
|
157
|
+
"""
|
|
85
158
|
return self.__class__(dataset, self.indices, self.reason).mutate()
|
|
86
159
|
|
|
87
|
-
def mutate(self):
|
|
160
|
+
def mutate(self) -> Dataset:
|
|
161
|
+
"""Mutate the subset.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Dataset: The mutated subset.
|
|
165
|
+
"""
|
|
88
166
|
return self.forward.swap_with_parent(parent=self)
|
|
89
167
|
|
|
90
168
|
@debug_indexing
|
|
91
|
-
def __getitem__(self, n):
|
|
169
|
+
def __getitem__(self, n: FullIndex) -> NDArray[Any]:
|
|
170
|
+
"""Get item by index.
|
|
171
|
+
|
|
172
|
+
Parameters:
|
|
173
|
+
n (FullIndex): The index.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
NDArray[Any]: The indexed data.
|
|
177
|
+
"""
|
|
92
178
|
if isinstance(n, tuple):
|
|
93
179
|
return self._get_tuple(n)
|
|
94
180
|
|
|
@@ -100,7 +186,15 @@ class Subset(Forwards):
|
|
|
100
186
|
return self.dataset[n]
|
|
101
187
|
|
|
102
188
|
@debug_indexing
|
|
103
|
-
def _get_slice(self, s):
|
|
189
|
+
def _get_slice(self, s: slice) -> NDArray[Any]:
|
|
190
|
+
"""Get slice of data.
|
|
191
|
+
|
|
192
|
+
Parameters:
|
|
193
|
+
s (slice): The slice.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
NDArray[Any]: The sliced data.
|
|
197
|
+
"""
|
|
104
198
|
# TODO: check if the indices can be simplified to a slice
|
|
105
199
|
# the time checking maybe be longer than the time saved
|
|
106
200
|
# using a slice
|
|
@@ -112,7 +206,15 @@ class Subset(Forwards):
|
|
|
112
206
|
|
|
113
207
|
@debug_indexing
|
|
114
208
|
@expand_list_indexing
|
|
115
|
-
def _get_tuple(self, n):
|
|
209
|
+
def _get_tuple(self, n: TupleIndex) -> NDArray[Any]:
|
|
210
|
+
"""Get tuple of data.
|
|
211
|
+
|
|
212
|
+
Parameters:
|
|
213
|
+
n (TupleIndex): The tuple index.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
NDArray[Any]: The indexed data.
|
|
217
|
+
"""
|
|
116
218
|
index, changes = index_to_slices(n, self.shape)
|
|
117
219
|
indices = [self.indices[i] for i in range(*index[0].indices(self._len))]
|
|
118
220
|
indices = make_slice_or_index_from_list_or_tuple(indices)
|
|
@@ -121,43 +223,75 @@ class Subset(Forwards):
|
|
|
121
223
|
result = apply_index_to_slices_changes(result, changes)
|
|
122
224
|
return result
|
|
123
225
|
|
|
124
|
-
def __len__(self):
|
|
226
|
+
def __len__(self) -> int:
|
|
227
|
+
"""Get the length of the subset.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
int: The length of the subset.
|
|
231
|
+
"""
|
|
125
232
|
return len(self.indices)
|
|
126
233
|
|
|
127
234
|
@cached_property
|
|
128
|
-
def shape(self):
|
|
235
|
+
def shape(self) -> Shape:
|
|
236
|
+
"""Get the shape of the subset."""
|
|
129
237
|
return (len(self),) + self.dataset.shape[1:]
|
|
130
238
|
|
|
131
239
|
@cached_property
|
|
132
|
-
def dates(self):
|
|
240
|
+
def dates(self) -> NDArray[np.datetime64]:
|
|
241
|
+
"""Get the dates of the subset."""
|
|
133
242
|
return self.dataset.dates[self.indices]
|
|
134
243
|
|
|
135
244
|
@cached_property
|
|
136
|
-
def frequency(self):
|
|
245
|
+
def frequency(self) -> datetime.timedelta:
|
|
246
|
+
"""Get the frequency of the subset."""
|
|
137
247
|
dates = self.dates
|
|
138
248
|
if len(dates) < 2:
|
|
139
249
|
raise ValueError(f"Cannot determine frequency of a subset with less than two dates ({self.dates}).")
|
|
140
250
|
return frequency_to_timedelta(dates[1].astype(object) - dates[0].astype(object))
|
|
141
251
|
|
|
142
|
-
def source(self, index):
|
|
252
|
+
def source(self, index: int) -> Source:
|
|
253
|
+
"""Get the source of the subset.
|
|
254
|
+
|
|
255
|
+
Parameters:
|
|
256
|
+
index (int): The index.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
Source: The source of the subset.
|
|
260
|
+
"""
|
|
143
261
|
return Source(self, index, self.forward.source(index))
|
|
144
262
|
|
|
145
|
-
def __repr__(self):
|
|
263
|
+
def __repr__(self) -> str:
|
|
264
|
+
"""Get the string representation of the subset.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
str: The string representation of the subset.
|
|
268
|
+
"""
|
|
146
269
|
return f"Subset({self.dataset},{self.dates[0]}...{self.dates[-1]}/{self.frequency})"
|
|
147
270
|
|
|
148
271
|
@cached_property
|
|
149
|
-
def missing(self):
|
|
272
|
+
def missing(self) -> Set[int]:
|
|
273
|
+
"""Get the missing indices of the subset."""
|
|
150
274
|
missing = self.dataset.missing
|
|
151
|
-
result = set()
|
|
275
|
+
result: Set[int] = set()
|
|
152
276
|
for j, i in enumerate(self.indices):
|
|
153
277
|
if i in missing:
|
|
154
278
|
result.add(j)
|
|
155
279
|
return result
|
|
156
280
|
|
|
157
|
-
def tree(self):
|
|
281
|
+
def tree(self) -> Node:
|
|
282
|
+
"""Get the tree representation of the subset.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
Node: The tree representation of the subset.
|
|
286
|
+
"""
|
|
158
287
|
return Node(self, [self.dataset.tree()], **self.reason)
|
|
159
288
|
|
|
160
|
-
def
|
|
289
|
+
def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
|
|
290
|
+
"""Get the metadata specific to the forwards subclass.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
Dict[str, Any]: The metadata specific to the forwards subclass.
|
|
294
|
+
"""
|
|
161
295
|
return {
|
|
162
296
|
# "indices": self.indices,
|
|
163
297
|
"reason": self.reason,
|
|
@@ -8,11 +8,24 @@
|
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
import datetime
|
|
11
12
|
import logging
|
|
12
13
|
from functools import cached_property
|
|
13
14
|
from functools import wraps
|
|
15
|
+
from typing import Any
|
|
16
|
+
from typing import Callable
|
|
17
|
+
from typing import Dict
|
|
18
|
+
from typing import List
|
|
19
|
+
from typing import Optional
|
|
20
|
+
from typing import Set
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
from numpy.typing import NDArray
|
|
14
24
|
|
|
15
25
|
from .concat import ConcatMixin
|
|
26
|
+
from .dataset import Dataset
|
|
27
|
+
from .dataset import FullIndex
|
|
28
|
+
from .dataset import Shape
|
|
16
29
|
from .debug import Node
|
|
17
30
|
from .forwards import Combined
|
|
18
31
|
from .misc import _auto_adjust
|
|
@@ -22,18 +35,48 @@ LOG = logging.getLogger(__name__)
|
|
|
22
35
|
|
|
23
36
|
|
|
24
37
|
class check:
|
|
38
|
+
"""A decorator class to perform checks before calling a method."""
|
|
39
|
+
|
|
40
|
+
def __init__(self, check: str) -> None:
|
|
41
|
+
"""Initialize the check decorator.
|
|
25
42
|
|
|
26
|
-
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
check : str
|
|
46
|
+
The name of the check method.
|
|
47
|
+
"""
|
|
27
48
|
self.check = check
|
|
28
49
|
|
|
29
|
-
def __call__(self, method):
|
|
50
|
+
def __call__(self, method: Callable) -> Callable:
|
|
51
|
+
"""Call the check decorator.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
method : Callable
|
|
56
|
+
The method to decorate.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
Callable
|
|
61
|
+
The decorated method.
|
|
62
|
+
"""
|
|
30
63
|
name = method.__name__
|
|
31
64
|
check = self.check
|
|
32
65
|
|
|
33
66
|
@wraps(method)
|
|
34
|
-
def wrapper(obj):
|
|
35
|
-
"""
|
|
36
|
-
|
|
67
|
+
def wrapper(obj: "Unchecked") -> Any:
|
|
68
|
+
"""Wrapper function to check compatibility before calling the method.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
obj : Unchecked
|
|
73
|
+
The Unchecked object.
|
|
74
|
+
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
Any
|
|
78
|
+
The result of the method.
|
|
79
|
+
"""
|
|
37
80
|
for d in obj.datasets[1:]:
|
|
38
81
|
getattr(obj, check)(obj.datasets[0], d)
|
|
39
82
|
|
|
@@ -43,126 +86,194 @@ class check:
|
|
|
43
86
|
|
|
44
87
|
|
|
45
88
|
class Unchecked(Combined):
|
|
89
|
+
"""A class representing a dataset without compatibility checks."""
|
|
90
|
+
|
|
91
|
+
def tree(self) -> Node:
|
|
92
|
+
"""Get the tree representation of the dataset.
|
|
46
93
|
|
|
47
|
-
|
|
94
|
+
Returns
|
|
95
|
+
-------
|
|
96
|
+
Node
|
|
97
|
+
The tree representation.
|
|
98
|
+
"""
|
|
48
99
|
return Node(self, [d.tree() for d in self.datasets])
|
|
49
100
|
|
|
50
|
-
def _subset(self, **kwargs):
|
|
101
|
+
def _subset(self, **kwargs: dict) -> "Unchecked":
|
|
102
|
+
"""Get a subset of the dataset.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
**kwargs : dict
|
|
107
|
+
Subset parameters.
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
Unchecked
|
|
112
|
+
The subset of the dataset.
|
|
113
|
+
"""
|
|
51
114
|
assert not kwargs
|
|
52
115
|
return self
|
|
53
116
|
|
|
54
|
-
def check_compatibility(self, d1, d2):
|
|
117
|
+
def check_compatibility(self, d1: Dataset, d2: Dataset) -> None:
|
|
118
|
+
"""Check compatibility between two datasets.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
d1 : Dataset
|
|
123
|
+
The first dataset.
|
|
124
|
+
d2 : Dataset
|
|
125
|
+
The second dataset.
|
|
126
|
+
"""
|
|
55
127
|
pass
|
|
56
128
|
|
|
57
|
-
###########################################
|
|
58
129
|
@property
|
|
59
130
|
@check("check_same_dates")
|
|
60
|
-
def dates(self):
|
|
131
|
+
def dates(self) -> NDArray[np.datetime64]:
|
|
132
|
+
"""Get the dates of the dataset."""
|
|
61
133
|
pass
|
|
62
134
|
|
|
63
135
|
@property
|
|
64
136
|
@check("check_same_resolution")
|
|
65
|
-
def resolution(self):
|
|
137
|
+
def resolution(self) -> Any:
|
|
138
|
+
"""Get the resolution of the dataset."""
|
|
66
139
|
pass
|
|
67
140
|
|
|
68
141
|
@property
|
|
69
|
-
def field_shape(self):
|
|
142
|
+
def field_shape(self) -> tuple:
|
|
143
|
+
"""Get the field shape of the dataset."""
|
|
70
144
|
raise NotImplementedError()
|
|
71
145
|
|
|
72
146
|
@property
|
|
73
147
|
@check("check_same_frequency")
|
|
74
|
-
def frequency(self):
|
|
148
|
+
def frequency(self) -> datetime.timedelta:
|
|
149
|
+
"""Get the frequency of the dataset."""
|
|
75
150
|
raise NotImplementedError()
|
|
76
151
|
|
|
77
152
|
@property
|
|
78
153
|
@check("check_same_grid")
|
|
79
|
-
def latitudes(self):
|
|
154
|
+
def latitudes(self) -> NDArray[Any]:
|
|
155
|
+
"""Get the latitudes of the dataset."""
|
|
80
156
|
raise NotImplementedError()
|
|
81
157
|
|
|
82
158
|
@property
|
|
83
159
|
@check("check_same_grid")
|
|
84
|
-
def longitudes(self):
|
|
160
|
+
def longitudes(self) -> NDArray[Any]:
|
|
161
|
+
"""Get the longitudes of the dataset."""
|
|
85
162
|
raise NotImplementedError()
|
|
86
163
|
|
|
87
|
-
@property
|
|
88
164
|
@check("check_same_variables")
|
|
89
|
-
|
|
165
|
+
@property
|
|
166
|
+
def name_to_index(self) -> Dict[str, int]:
|
|
167
|
+
"""Get the mapping of variable names to their indices."""
|
|
90
168
|
raise NotImplementedError()
|
|
91
169
|
|
|
92
|
-
@property
|
|
93
170
|
@check("check_same_variables")
|
|
94
|
-
|
|
171
|
+
@property
|
|
172
|
+
def variables(self) -> List[str]:
|
|
173
|
+
"""Get the list of variables in the dataset."""
|
|
95
174
|
raise NotImplementedError()
|
|
96
175
|
|
|
97
|
-
@property
|
|
98
176
|
@check("check_same_variables")
|
|
99
|
-
|
|
177
|
+
@property
|
|
178
|
+
def variables_metadata(self) -> dict:
|
|
179
|
+
"""Get the metadata for the variables."""
|
|
100
180
|
raise NotImplementedError()
|
|
101
181
|
|
|
102
|
-
@property
|
|
103
182
|
@check("check_same_variables")
|
|
104
|
-
|
|
183
|
+
@property
|
|
184
|
+
def statistics(self) -> Dict[str, NDArray[Any]]:
|
|
185
|
+
"""Get the statistics of the dataset."""
|
|
105
186
|
raise NotImplementedError()
|
|
106
187
|
|
|
107
188
|
@check("check_same_variables")
|
|
108
|
-
def statistics_tendencies(self, delta=None):
|
|
189
|
+
def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
|
|
190
|
+
"""Get the statistics tendencies of the dataset.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
delta : Optional[datetime.timedelta]
|
|
195
|
+
The time delta for tendencies.
|
|
196
|
+
|
|
197
|
+
Returns
|
|
198
|
+
-------
|
|
199
|
+
Dict[str, NDArray[Any]]
|
|
200
|
+
The statistics tendencies.
|
|
201
|
+
"""
|
|
109
202
|
raise NotImplementedError()
|
|
110
203
|
|
|
111
204
|
@property
|
|
112
|
-
def shape(self):
|
|
205
|
+
def shape(self) -> Shape:
|
|
206
|
+
"""Get the shape of the dataset."""
|
|
113
207
|
raise NotImplementedError()
|
|
114
208
|
|
|
115
|
-
# @property
|
|
116
|
-
# def field_shape(self):
|
|
117
|
-
# return tuple(d.shape for d in self.datasets)
|
|
118
|
-
|
|
119
|
-
# @property
|
|
120
|
-
# def latitudes(self):
|
|
121
|
-
# return tuple(d.latitudes for d in self.datasets)
|
|
122
|
-
|
|
123
|
-
# @property
|
|
124
|
-
# def longitudes(self):
|
|
125
|
-
# return tuple(d.longitudes for d in self.datasets)
|
|
126
|
-
|
|
127
|
-
# @property
|
|
128
|
-
# def statistics(self):
|
|
129
|
-
# return tuple(d.statistics for d in self.datasets)
|
|
130
|
-
|
|
131
|
-
# @property
|
|
132
|
-
# def resolution(self):
|
|
133
|
-
# return tuple(d.resolution for d in self.datasets)
|
|
134
|
-
|
|
135
|
-
# @property
|
|
136
|
-
# def name_to_index(self):
|
|
137
|
-
# return tuple(d.name_to_index for d in self.datasets)
|
|
138
|
-
|
|
139
209
|
@cached_property
|
|
140
|
-
def missing(self):
|
|
141
|
-
|
|
210
|
+
def missing(self) -> Set[int]:
|
|
211
|
+
"""Get the missing data indices."""
|
|
212
|
+
result: Set[int] = set()
|
|
142
213
|
for d in self.datasets:
|
|
143
214
|
result = result | d.missing
|
|
144
215
|
return result
|
|
145
216
|
|
|
146
217
|
|
|
147
218
|
class Chain(ConcatMixin, Unchecked):
|
|
148
|
-
"""
|
|
219
|
+
"""A class representing a chain of datasets without compatibility checks."""
|
|
149
220
|
|
|
150
|
-
def __len__(self):
|
|
221
|
+
def __len__(self) -> int:
|
|
222
|
+
"""Get the length of the dataset.
|
|
223
|
+
|
|
224
|
+
Returns
|
|
225
|
+
-------
|
|
226
|
+
int
|
|
227
|
+
The length of the dataset.
|
|
228
|
+
"""
|
|
151
229
|
return sum(len(d) for d in self.datasets)
|
|
152
230
|
|
|
153
|
-
def __getitem__(self, n):
|
|
231
|
+
def __getitem__(self, n: FullIndex) -> tuple:
|
|
232
|
+
"""Get an item from the dataset.
|
|
233
|
+
|
|
234
|
+
Parameters
|
|
235
|
+
----------
|
|
236
|
+
n : FullIndex
|
|
237
|
+
The index of the item.
|
|
238
|
+
|
|
239
|
+
Returns
|
|
240
|
+
-------
|
|
241
|
+
tuple
|
|
242
|
+
The item at the specified index.
|
|
243
|
+
"""
|
|
154
244
|
return tuple(d[n] for d in self.datasets)
|
|
155
245
|
|
|
156
246
|
@property
|
|
157
|
-
def dates(self):
|
|
247
|
+
def dates(self) -> NDArray[np.datetime64]:
|
|
248
|
+
"""Get the dates of the dataset."""
|
|
158
249
|
raise NotImplementedError()
|
|
159
250
|
|
|
160
|
-
def dataset_metadata(self):
|
|
251
|
+
def dataset_metadata(self) -> dict:
|
|
252
|
+
"""Get the metadata of the dataset.
|
|
253
|
+
|
|
254
|
+
Returns
|
|
255
|
+
-------
|
|
256
|
+
dict
|
|
257
|
+
The metadata of the dataset.
|
|
258
|
+
"""
|
|
161
259
|
return {"multiple": [d.dataset_metadata() for d in self.datasets]}
|
|
162
260
|
|
|
163
261
|
|
|
164
|
-
def chain_factory(args, kwargs):
|
|
262
|
+
def chain_factory(args: tuple, kwargs: dict) -> Dataset:
|
|
263
|
+
"""Factory function to create a Chain dataset.
|
|
264
|
+
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
args : tuple
|
|
268
|
+
Positional arguments.
|
|
269
|
+
kwargs : dict
|
|
270
|
+
Keyword arguments.
|
|
165
271
|
|
|
272
|
+
Returns
|
|
273
|
+
-------
|
|
274
|
+
Dataset
|
|
275
|
+
The Chain dataset.
|
|
276
|
+
"""
|
|
166
277
|
chain = kwargs.pop("chain")
|
|
167
278
|
assert len(args) == 0
|
|
168
279
|
assert isinstance(chain, (list, tuple))
|