anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +4 -1
- anemoi/datasets/__main__.py +12 -2
- anemoi/datasets/_version.py +9 -4
- anemoi/datasets/commands/cleanup.py +17 -2
- anemoi/datasets/commands/compare.py +18 -2
- anemoi/datasets/commands/copy.py +196 -14
- anemoi/datasets/commands/create.py +50 -7
- anemoi/datasets/commands/finalise-additions.py +17 -2
- anemoi/datasets/commands/finalise.py +17 -2
- anemoi/datasets/commands/init-additions.py +17 -2
- anemoi/datasets/commands/init.py +16 -2
- anemoi/datasets/commands/inspect.py +283 -62
- anemoi/datasets/commands/load-additions.py +16 -2
- anemoi/datasets/commands/load.py +16 -2
- anemoi/datasets/commands/patch.py +17 -2
- anemoi/datasets/commands/publish.py +17 -2
- anemoi/datasets/commands/scan.py +31 -3
- anemoi/datasets/compute/recentre.py +47 -11
- anemoi/datasets/create/__init__.py +612 -85
- anemoi/datasets/create/check.py +142 -20
- anemoi/datasets/create/chunks.py +64 -4
- anemoi/datasets/create/config.py +185 -21
- anemoi/datasets/create/filter.py +50 -0
- anemoi/datasets/create/filters/__init__.py +33 -0
- anemoi/datasets/create/filters/empty.py +37 -0
- anemoi/datasets/create/filters/legacy.py +93 -0
- anemoi/datasets/create/filters/noop.py +37 -0
- anemoi/datasets/create/filters/orog_to_z.py +58 -0
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
- anemoi/datasets/create/filters/rename.py +205 -0
- anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
- anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
- anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
- anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
- anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
- anemoi/datasets/create/filters/transform.py +53 -0
- anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
- anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
- anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
- anemoi/datasets/create/input/__init__.py +76 -5
- anemoi/datasets/create/input/action.py +149 -13
- anemoi/datasets/create/input/concat.py +81 -10
- anemoi/datasets/create/input/context.py +39 -4
- anemoi/datasets/create/input/data_sources.py +72 -6
- anemoi/datasets/create/input/empty.py +21 -3
- anemoi/datasets/create/input/filter.py +60 -12
- anemoi/datasets/create/input/function.py +154 -37
- anemoi/datasets/create/input/join.py +86 -14
- anemoi/datasets/create/input/misc.py +67 -17
- anemoi/datasets/create/input/pipe.py +33 -6
- anemoi/datasets/create/input/repeated_dates.py +189 -41
- anemoi/datasets/create/input/result.py +202 -87
- anemoi/datasets/create/input/step.py +119 -22
- anemoi/datasets/create/input/template.py +100 -13
- anemoi/datasets/create/input/trace.py +62 -7
- anemoi/datasets/create/patch.py +52 -4
- anemoi/datasets/create/persistent.py +134 -17
- anemoi/datasets/create/size.py +15 -1
- anemoi/datasets/create/source.py +51 -0
- anemoi/datasets/create/sources/__init__.py +36 -0
- anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
- anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
- anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
- anemoi/datasets/create/sources/empty.py +37 -0
- anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
- anemoi/datasets/create/sources/grib.py +297 -0
- anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
- anemoi/datasets/create/sources/legacy.py +93 -0
- anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
- anemoi/datasets/create/sources/netcdf.py +42 -0
- anemoi/datasets/create/sources/opendap.py +43 -0
- anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
- anemoi/datasets/create/sources/recentre.py +150 -0
- anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
- anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
- anemoi/datasets/create/sources/xarray.py +92 -0
- anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
- anemoi/datasets/create/sources/xarray_support/README.md +1 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
- anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
- anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
- anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
- anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
- anemoi/datasets/create/sources/xarray_support/time.py +391 -0
- anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
- anemoi/datasets/create/sources/xarray_zarr.py +41 -0
- anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
- anemoi/datasets/create/statistics/__init__.py +233 -44
- anemoi/datasets/create/statistics/summary.py +52 -6
- anemoi/datasets/create/testing.py +76 -0
- anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
- anemoi/datasets/create/utils.py +97 -6
- anemoi/datasets/create/writer.py +26 -4
- anemoi/datasets/create/zarr.py +170 -23
- anemoi/datasets/data/__init__.py +51 -4
- anemoi/datasets/data/complement.py +191 -40
- anemoi/datasets/data/concat.py +141 -16
- anemoi/datasets/data/dataset.py +552 -61
- anemoi/datasets/data/debug.py +197 -26
- anemoi/datasets/data/ensemble.py +93 -8
- anemoi/datasets/data/fill_missing.py +165 -18
- anemoi/datasets/data/forwards.py +428 -56
- anemoi/datasets/data/grids.py +323 -97
- anemoi/datasets/data/indexing.py +112 -19
- anemoi/datasets/data/interpolate.py +92 -12
- anemoi/datasets/data/join.py +158 -19
- anemoi/datasets/data/masked.py +129 -15
- anemoi/datasets/data/merge.py +137 -23
- anemoi/datasets/data/misc.py +172 -16
- anemoi/datasets/data/missing.py +233 -29
- anemoi/datasets/data/rescale.py +111 -10
- anemoi/datasets/data/select.py +168 -26
- anemoi/datasets/data/statistics.py +67 -6
- anemoi/datasets/data/stores.py +149 -64
- anemoi/datasets/data/subset.py +159 -25
- anemoi/datasets/data/unchecked.py +168 -57
- anemoi/datasets/data/xy.py +168 -25
- anemoi/datasets/dates/__init__.py +191 -16
- anemoi/datasets/dates/groups.py +189 -47
- anemoi/datasets/grids.py +270 -31
- anemoi/datasets/testing.py +28 -1
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +9 -6
- anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/functions/__init__.py +0 -66
- anemoi/datasets/create/functions/filters/__init__.py +0 -9
- anemoi/datasets/create/functions/filters/empty.py +0 -17
- anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/functions/filters/rename.py +0 -79
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
- anemoi/datasets/create/functions/sources/empty.py +0 -15
- anemoi/datasets/create/functions/sources/grib.py +0 -150
- anemoi/datasets/create/functions/sources/netcdf.py +0 -15
- anemoi/datasets/create/functions/sources/opendap.py +0 -15
- anemoi/datasets/create/functions/sources/recentre.py +0 -60
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
- anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
- anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
- anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
- anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
- anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
- anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
- anemoi/datasets/utils/fields.py +0 -47
- anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
|
@@ -9,9 +9,21 @@
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
12
|
+
from abc import abstractmethod
|
|
12
13
|
from functools import cached_property
|
|
14
|
+
from typing import Any
|
|
15
|
+
from typing import Dict
|
|
16
|
+
from typing import List
|
|
17
|
+
from typing import Set
|
|
18
|
+
from typing import Tuple
|
|
19
|
+
|
|
20
|
+
from numpy.typing import NDArray
|
|
13
21
|
|
|
14
22
|
from ..grids import nearest_grid_points
|
|
23
|
+
from .dataset import Dataset
|
|
24
|
+
from .dataset import FullIndex
|
|
25
|
+
from .dataset import Shape
|
|
26
|
+
from .dataset import TupleIndex
|
|
15
27
|
from .debug import Node
|
|
16
28
|
from .forwards import Combined
|
|
17
29
|
from .indexing import apply_index_to_slices_changes
|
|
@@ -24,101 +36,226 @@ LOG = logging.getLogger(__name__)
|
|
|
24
36
|
|
|
25
37
|
|
|
26
38
|
class Complement(Combined):
|
|
27
|
-
|
|
28
|
-
|
|
39
|
+
"""A class to complement a target dataset with variables from a source dataset,
|
|
40
|
+
interpolated on the grid of the target dataset.
|
|
41
|
+
|
|
42
|
+
Attributes
|
|
43
|
+
----------
|
|
44
|
+
target : Dataset
|
|
45
|
+
The target dataset.
|
|
46
|
+
source : Dataset
|
|
47
|
+
The source dataset.
|
|
48
|
+
variables : List[str]
|
|
49
|
+
List of variables to be added to the target dataset.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
target: Dataset,
|
|
55
|
+
source: Dataset,
|
|
56
|
+
what: str = "variables",
|
|
57
|
+
interpolation: str = "nearest",
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Initializes the Complement class.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
target : Dataset
|
|
64
|
+
The target dataset.
|
|
65
|
+
source : Dataset
|
|
66
|
+
The source dataset.
|
|
67
|
+
what : str, optional
|
|
68
|
+
What to complement, default is "variables".
|
|
69
|
+
interpolation : str, optional
|
|
70
|
+
Interpolation method, default is "nearest".
|
|
71
|
+
"""
|
|
29
72
|
super().__init__([target, source])
|
|
30
73
|
|
|
31
74
|
# We had the variables of dataset[1] to dataset[0]
|
|
32
75
|
# interpoated on the grid of dataset[0]
|
|
33
76
|
|
|
34
|
-
self.
|
|
35
|
-
self.
|
|
77
|
+
self._target: Dataset = target
|
|
78
|
+
self._source: Dataset = source
|
|
36
79
|
|
|
37
80
|
self._variables = []
|
|
38
81
|
|
|
39
82
|
# Keep the same order as the original dataset
|
|
40
|
-
for v in self.
|
|
41
|
-
if v not in self.
|
|
83
|
+
for v in self._source.variables:
|
|
84
|
+
if v not in self._target.variables:
|
|
42
85
|
self._variables.append(v)
|
|
43
86
|
|
|
44
87
|
if not self._variables:
|
|
45
88
|
raise ValueError("Augment: no missing variables")
|
|
46
89
|
|
|
47
90
|
@property
|
|
48
|
-
def variables(self):
|
|
91
|
+
def variables(self) -> List[str]:
|
|
92
|
+
"""Returns the list of variables to be added to the target dataset."""
|
|
49
93
|
return self._variables
|
|
50
94
|
|
|
51
95
|
@property
|
|
52
|
-
def name_to_index(self):
|
|
96
|
+
def name_to_index(self) -> Dict[str, int]:
|
|
97
|
+
"""Returns a dictionary mapping variable names to their indices."""
|
|
53
98
|
return {v: i for i, v in enumerate(self.variables)}
|
|
54
99
|
|
|
55
100
|
@property
|
|
56
|
-
def shape(self):
|
|
57
|
-
shape
|
|
101
|
+
def shape(self) -> Shape:
|
|
102
|
+
"""Returns the shape of the complemented dataset."""
|
|
103
|
+
shape = self._target.shape
|
|
58
104
|
return (shape[0], len(self._variables)) + shape[2:]
|
|
59
105
|
|
|
60
106
|
@property
|
|
61
|
-
def variables_metadata(self):
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
107
|
+
def variables_metadata(self) -> Dict[str, Any]:
|
|
108
|
+
"""Returns the metadata of the variables to be added to the target dataset."""
|
|
109
|
+
return {k: v for k, v in self._source.variables_metadata.items() if k in self._variables}
|
|
110
|
+
|
|
111
|
+
def check_same_variables(self, d1: Dataset, d2: Dataset) -> None:
|
|
112
|
+
"""Checks if the variables in two datasets are the same.
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
d1 : Dataset
|
|
117
|
+
The first dataset.
|
|
118
|
+
d2 : Dataset
|
|
119
|
+
The second dataset.
|
|
120
|
+
"""
|
|
65
121
|
pass
|
|
66
122
|
|
|
67
123
|
@cached_property
|
|
68
|
-
def missing(self):
|
|
69
|
-
missing
|
|
70
|
-
missing =
|
|
124
|
+
def missing(self) -> Set[int]:
|
|
125
|
+
"""Returns the set of missing indices in the source and target datasets."""
|
|
126
|
+
missing = self._source.missing.copy()
|
|
127
|
+
missing = missing | self._target.missing
|
|
71
128
|
return set(missing)
|
|
72
129
|
|
|
73
|
-
def tree(self):
|
|
74
|
-
"""Generates a hierarchical tree structure for the
|
|
75
|
-
its associated datasets.
|
|
130
|
+
def tree(self) -> Node:
|
|
131
|
+
"""Generates a hierarchical tree structure for the Complement instance and its associated datasets.
|
|
76
132
|
|
|
77
|
-
Returns
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
node.
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
Node
|
|
136
|
+
A Node object representing the Complement instance as the root node, with each dataset in self.datasets represented as a child node.
|
|
81
137
|
"""
|
|
82
|
-
return Node(self, [d.tree() for d in (self.
|
|
138
|
+
return Node(self, [d.tree() for d in (self._target, self._source)])
|
|
139
|
+
|
|
140
|
+
def __getitem__(self, index: FullIndex) -> NDArray[Any]:
|
|
141
|
+
"""Gets the data at the specified index.
|
|
142
|
+
|
|
143
|
+
Parameters
|
|
144
|
+
----------
|
|
145
|
+
index : FullIndex
|
|
146
|
+
The index to retrieve data from.
|
|
83
147
|
|
|
84
|
-
|
|
148
|
+
Returns
|
|
149
|
+
-------
|
|
150
|
+
NDArray[Any]
|
|
151
|
+
The data at the specified index.
|
|
152
|
+
"""
|
|
85
153
|
if isinstance(index, (int, slice)):
|
|
86
154
|
index = (index, slice(None), slice(None), slice(None))
|
|
87
155
|
return self._get_tuple(index)
|
|
88
156
|
|
|
157
|
+
@abstractmethod
|
|
158
|
+
def _get_tuple(self, index: TupleIndex) -> NDArray[Any]:
|
|
159
|
+
"""Abstract method to get the data at the specified tuple index.
|
|
160
|
+
|
|
161
|
+
Parameters
|
|
162
|
+
----------
|
|
163
|
+
index : TupleIndex
|
|
164
|
+
The tuple index to retrieve data from.
|
|
165
|
+
|
|
166
|
+
Returns
|
|
167
|
+
-------
|
|
168
|
+
NDArray[Any]
|
|
169
|
+
The data at the specified tuple index.
|
|
170
|
+
"""
|
|
171
|
+
pass
|
|
172
|
+
|
|
89
173
|
|
|
90
174
|
class ComplementNone(Complement):
|
|
175
|
+
"""A class to complement a target dataset with variables from a source dataset without interpolation."""
|
|
176
|
+
|
|
177
|
+
def __init__(self, target: Any, source: Any) -> None:
|
|
178
|
+
"""Initializes the ComplementNone class.
|
|
91
179
|
|
|
92
|
-
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
target : Any
|
|
183
|
+
The target dataset.
|
|
184
|
+
source : Any
|
|
185
|
+
The source dataset.
|
|
186
|
+
"""
|
|
93
187
|
super().__init__(target, source)
|
|
94
188
|
|
|
95
|
-
def _get_tuple(self, index):
|
|
189
|
+
def _get_tuple(self, index: TupleIndex) -> NDArray[Any]:
|
|
190
|
+
"""Gets the data at the specified tuple index without interpolation.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
index : TupleIndex
|
|
195
|
+
The tuple index to retrieve data from.
|
|
196
|
+
|
|
197
|
+
Returns
|
|
198
|
+
-------
|
|
199
|
+
NDArray[Any]
|
|
200
|
+
The data at the specified tuple index.
|
|
201
|
+
"""
|
|
96
202
|
index, changes = index_to_slices(index, self.shape)
|
|
97
|
-
result = self.
|
|
203
|
+
result = self._source[index]
|
|
98
204
|
return apply_index_to_slices_changes(result, changes)
|
|
99
205
|
|
|
100
206
|
|
|
101
207
|
class ComplementNearest(Complement):
|
|
208
|
+
"""A class to complement a target dataset with variables from a source dataset using nearest neighbor interpolation."""
|
|
102
209
|
|
|
103
|
-
def __init__(self, target, source):
|
|
210
|
+
def __init__(self, target: Any, source: Any) -> None:
|
|
211
|
+
"""Initializes the ComplementNearest class.
|
|
212
|
+
|
|
213
|
+
Parameters
|
|
214
|
+
----------
|
|
215
|
+
target : Any
|
|
216
|
+
The target dataset.
|
|
217
|
+
source : Any
|
|
218
|
+
The source dataset.
|
|
219
|
+
"""
|
|
104
220
|
super().__init__(target, source)
|
|
105
221
|
|
|
106
222
|
self._nearest_grid_points = nearest_grid_points(
|
|
107
|
-
self.
|
|
108
|
-
self.
|
|
109
|
-
self.
|
|
110
|
-
self.
|
|
223
|
+
self._source.latitudes,
|
|
224
|
+
self._source.longitudes,
|
|
225
|
+
self._target.latitudes,
|
|
226
|
+
self._target.longitudes,
|
|
111
227
|
)
|
|
112
228
|
|
|
113
|
-
def check_compatibility(self, d1, d2):
|
|
229
|
+
def check_compatibility(self, d1: Dataset, d2: Dataset) -> None:
|
|
230
|
+
"""Checks the compatibility of two datasets for nearest neighbor interpolation.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
d1 : Dataset
|
|
235
|
+
The first dataset.
|
|
236
|
+
d2 : Dataset
|
|
237
|
+
The second dataset.
|
|
238
|
+
"""
|
|
114
239
|
pass
|
|
115
240
|
|
|
116
|
-
def _get_tuple(self, index):
|
|
241
|
+
def _get_tuple(self, index: TupleIndex) -> NDArray[Any]:
|
|
242
|
+
"""Gets the data at the specified tuple index using nearest neighbor interpolation.
|
|
243
|
+
|
|
244
|
+
Parameters
|
|
245
|
+
----------
|
|
246
|
+
index : TupleIndex
|
|
247
|
+
The tuple index to retrieve data from.
|
|
248
|
+
|
|
249
|
+
Returns
|
|
250
|
+
-------
|
|
251
|
+
NDArray[Any]
|
|
252
|
+
The data at the specified tuple index.
|
|
253
|
+
"""
|
|
117
254
|
variable_index = 1
|
|
118
255
|
index, changes = index_to_slices(index, self.shape)
|
|
119
256
|
index, previous = update_tuple(index, variable_index, slice(None))
|
|
120
|
-
source_index = [self.
|
|
121
|
-
source_data = self.
|
|
257
|
+
source_index = [self._source.name_to_index[x] for x in self.variables[previous]]
|
|
258
|
+
source_data = self._source[index[0], source_index, index[2], ...]
|
|
122
259
|
target_data = source_data[..., self._nearest_grid_points]
|
|
123
260
|
|
|
124
261
|
result = target_data[..., index[3]]
|
|
@@ -126,7 +263,21 @@ class ComplementNearest(Complement):
|
|
|
126
263
|
return apply_index_to_slices_changes(result, changes)
|
|
127
264
|
|
|
128
265
|
|
|
129
|
-
def complement_factory(args, kwargs):
|
|
266
|
+
def complement_factory(args: Tuple, kwargs: dict) -> Dataset:
|
|
267
|
+
"""Factory function to create a Complement instance based on the provided arguments.
|
|
268
|
+
|
|
269
|
+
Parameters
|
|
270
|
+
----------
|
|
271
|
+
args : Tuple
|
|
272
|
+
Positional arguments.
|
|
273
|
+
kwargs : dict
|
|
274
|
+
Keyword arguments.
|
|
275
|
+
|
|
276
|
+
Returns
|
|
277
|
+
-------
|
|
278
|
+
Dataset
|
|
279
|
+
The complemented dataset.
|
|
280
|
+
"""
|
|
130
281
|
from .select import Select
|
|
131
282
|
|
|
132
283
|
assert len(args) == 0, args
|
|
@@ -159,6 +310,6 @@ def complement_factory(args, kwargs):
|
|
|
159
310
|
reorder = source.variables
|
|
160
311
|
complemented = _open([target, complement])
|
|
161
312
|
ordered = (
|
|
162
|
-
Select(complemented, complemented._reorder_to_columns(reorder), {"reoder": reorder})._subset(**kwargs).mutate()
|
|
313
|
+
Select(complemented, complemented._reorder_to_columns(reorder), {"reoder": reorder})._subset(**kwargs).mutate(),
|
|
163
314
|
)
|
|
164
315
|
return ordered
|
anemoi/datasets/data/concat.py
CHANGED
|
@@ -10,10 +10,19 @@
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
12
12
|
from functools import cached_property
|
|
13
|
+
from typing import Any
|
|
14
|
+
from typing import List
|
|
15
|
+
from typing import Set
|
|
16
|
+
from typing import Tuple
|
|
13
17
|
|
|
14
18
|
import numpy as np
|
|
15
19
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
20
|
+
from numpy.typing import NDArray
|
|
16
21
|
|
|
22
|
+
from .dataset import Dataset
|
|
23
|
+
from .dataset import FullIndex
|
|
24
|
+
from .dataset import Shape
|
|
25
|
+
from .dataset import TupleIndex
|
|
17
26
|
from .debug import Node
|
|
18
27
|
from .debug import debug_indexing
|
|
19
28
|
from .forwards import Combined
|
|
@@ -29,13 +38,31 @@ LOG = logging.getLogger(__name__)
|
|
|
29
38
|
|
|
30
39
|
|
|
31
40
|
class ConcatMixin:
|
|
32
|
-
|
|
33
|
-
|
|
41
|
+
def __len__(self) -> int:
|
|
42
|
+
"""Returns the total length of the concatenated datasets.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
int
|
|
47
|
+
Total length of the concatenated datasets.
|
|
48
|
+
"""
|
|
34
49
|
return sum(len(i) for i in self.datasets)
|
|
35
50
|
|
|
36
51
|
@debug_indexing
|
|
37
52
|
@expand_list_indexing
|
|
38
|
-
def _get_tuple(self, index):
|
|
53
|
+
def _get_tuple(self, index: TupleIndex) -> NDArray[Any]:
|
|
54
|
+
"""Retrieves a tuple of data from the concatenated datasets based on the given index.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
index : TupleIndex
|
|
59
|
+
Index specifying the data to retrieve.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
NDArray[Any]
|
|
64
|
+
Concatenated data array from the specified index.
|
|
65
|
+
"""
|
|
39
66
|
index, changes = index_to_slices(index, self.shape)
|
|
40
67
|
# print(index, changes)
|
|
41
68
|
lengths = [d.shape[0] for d in self.datasets]
|
|
@@ -46,7 +73,19 @@ class ConcatMixin:
|
|
|
46
73
|
return apply_index_to_slices_changes(result, changes)
|
|
47
74
|
|
|
48
75
|
@debug_indexing
|
|
49
|
-
def __getitem__(self, n):
|
|
76
|
+
def __getitem__(self, n: FullIndex) -> NDArray[Any]:
|
|
77
|
+
"""Retrieves data from the concatenated datasets based on the given index.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
n : FullIndex
|
|
82
|
+
Index specifying the data to retrieve.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
NDArray[Any]
|
|
87
|
+
Data array from the concatenated datasets based on the index.
|
|
88
|
+
"""
|
|
50
89
|
if isinstance(n, tuple):
|
|
51
90
|
return self._get_tuple(n)
|
|
52
91
|
|
|
@@ -61,7 +100,19 @@ class ConcatMixin:
|
|
|
61
100
|
return self.datasets[k][n]
|
|
62
101
|
|
|
63
102
|
@debug_indexing
|
|
64
|
-
def _get_slice(self, s):
|
|
103
|
+
def _get_slice(self, s: slice) -> NDArray[Any]:
|
|
104
|
+
"""Retrieves a slice of data from the concatenated datasets.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
s : slice
|
|
109
|
+
Slice object specifying the range of data to retrieve.
|
|
110
|
+
|
|
111
|
+
Returns
|
|
112
|
+
-------
|
|
113
|
+
NDArray[Any]
|
|
114
|
+
Concatenated data array from the specified slice.
|
|
115
|
+
"""
|
|
65
116
|
result = []
|
|
66
117
|
|
|
67
118
|
lengths = [d.shape[0] for d in self.datasets]
|
|
@@ -72,8 +123,9 @@ class ConcatMixin:
|
|
|
72
123
|
return np.concatenate(result)
|
|
73
124
|
|
|
74
125
|
@cached_property
|
|
75
|
-
def missing(self):
|
|
76
|
-
|
|
126
|
+
def missing(self) -> Set[int]:
|
|
127
|
+
"""Returns the set of missing indices in the concatenated datasets."""
|
|
128
|
+
result: Set[int] = set()
|
|
77
129
|
offset = 0
|
|
78
130
|
for d in self.datasets:
|
|
79
131
|
result = result | set(m + offset for m in d.missing)
|
|
@@ -82,32 +134,82 @@ class ConcatMixin:
|
|
|
82
134
|
|
|
83
135
|
|
|
84
136
|
class Concat(ConcatMixin, Combined):
|
|
85
|
-
|
|
86
|
-
|
|
137
|
+
def check_compatibility(self, d1: Dataset, d2: Dataset) -> None:
|
|
138
|
+
"""Checks the compatibility of two datasets for concatenation.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
d1 : Dataset
|
|
143
|
+
The first dataset.
|
|
144
|
+
d2 : Dataset
|
|
145
|
+
The second dataset.
|
|
146
|
+
"""
|
|
87
147
|
super().check_compatibility(d1, d2)
|
|
88
148
|
self.check_same_sub_shapes(d1, d2, drop_axis=0)
|
|
89
149
|
|
|
90
|
-
def check_same_lengths(self, d1, d2):
|
|
150
|
+
def check_same_lengths(self, d1: Dataset, d2: Dataset) -> None:
|
|
151
|
+
"""Checks if the lengths of two datasets are the same.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
d1 : Dataset
|
|
156
|
+
The first dataset.
|
|
157
|
+
d2 : Dataset
|
|
158
|
+
The second dataset.
|
|
159
|
+
"""
|
|
91
160
|
# Turned off because we are concatenating along the first axis
|
|
92
161
|
pass
|
|
93
162
|
|
|
94
|
-
def check_same_dates(self, d1, d2):
|
|
163
|
+
def check_same_dates(self, d1: Dataset, d2: Dataset) -> None:
|
|
164
|
+
"""Checks if the dates of two datasets are the same.
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
d1 : Dataset
|
|
169
|
+
The first dataset.
|
|
170
|
+
d2 : Dataset
|
|
171
|
+
The second dataset.
|
|
172
|
+
"""
|
|
95
173
|
# Turned off because we are concatenating along the dates axis
|
|
96
174
|
pass
|
|
97
175
|
|
|
98
176
|
@property
|
|
99
|
-
def dates(self):
|
|
177
|
+
def dates(self) -> NDArray[np.datetime64]:
|
|
178
|
+
"""Returns the concatenated dates of all datasets."""
|
|
100
179
|
return np.concatenate([d.dates for d in self.datasets])
|
|
101
180
|
|
|
102
181
|
@property
|
|
103
|
-
def shape(self):
|
|
182
|
+
def shape(self) -> Shape:
|
|
183
|
+
"""Returns the shape of the concatenated datasets."""
|
|
104
184
|
return (len(self),) + self.datasets[0].shape[1:]
|
|
105
185
|
|
|
106
|
-
def tree(self):
|
|
186
|
+
def tree(self) -> Node:
|
|
187
|
+
"""Generates a hierarchical tree structure for the concatenated datasets.
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
|
|
192
|
+
Node
|
|
193
|
+
A Node object representing the concatenated datasets.
|
|
194
|
+
"""
|
|
107
195
|
return Node(self, [d.tree() for d in self.datasets])
|
|
108
196
|
|
|
109
197
|
@classmethod
|
|
110
|
-
def check_dataset_compatibility(cls, datasets, fill_missing_gaps=False):
|
|
198
|
+
def check_dataset_compatibility(cls, datasets: List[Any], fill_missing_gaps: bool = False) -> List[Any]:
|
|
199
|
+
"""Checks the compatibility of the datasets for concatenation and fills missing gaps if required.
|
|
200
|
+
|
|
201
|
+
Parameters
|
|
202
|
+
----------
|
|
203
|
+
datasets : List[Any]
|
|
204
|
+
List of datasets to check.
|
|
205
|
+
fill_missing_gaps : bool, optional
|
|
206
|
+
Whether to fill missing gaps between datasets, by default False.
|
|
207
|
+
|
|
208
|
+
Returns
|
|
209
|
+
-------
|
|
210
|
+
List[Any]
|
|
211
|
+
List of compatible datasets.
|
|
212
|
+
"""
|
|
111
213
|
# Study the dates
|
|
112
214
|
ranges = [(d.dates[0].astype(object), d.dates[-1].astype(object)) for d in datasets]
|
|
113
215
|
|
|
@@ -146,9 +248,32 @@ class Concat(ConcatMixin, Combined):
|
|
|
146
248
|
|
|
147
249
|
return result
|
|
148
250
|
|
|
251
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
252
|
+
"""Get the metadata specific to the forwards subclass.
|
|
253
|
+
|
|
254
|
+
Returns
|
|
255
|
+
-------
|
|
256
|
+
dict[str, Any]
|
|
257
|
+
The metadata specific to the forwards subclass.
|
|
258
|
+
"""
|
|
259
|
+
return {}
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def concat_factory(args: Tuple[Any, ...], kwargs: dict) -> Concat:
|
|
263
|
+
"""Factory function to create a Concat object.
|
|
149
264
|
|
|
150
|
-
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
args : Tuple[Any, ...]
|
|
268
|
+
Positional arguments.
|
|
269
|
+
kwargs : dict
|
|
270
|
+
Keyword arguments.
|
|
151
271
|
|
|
272
|
+
Returns
|
|
273
|
+
-------
|
|
274
|
+
Concat
|
|
275
|
+
A Concat object.
|
|
276
|
+
"""
|
|
152
277
|
datasets = kwargs.pop("concat")
|
|
153
278
|
fill_missing_gaps = kwargs.pop("fill_missing_gaps", False)
|
|
154
279
|
|